Cudf

https://docs.rapids.ai/api/cudf/stable/ 

https://colab.research.google.com/drive/1_lp1vTeEeJy7v9Xh2gb-QVvvhMYctetC#scrollTo=XwSBXo9p4TEK 

mamba install -c rapidsai -c conda-forge -c nvidia  rapids=24.06 python=3.11 cuda-version=12.2 --y

 

import cudf

print(f"{cudf.__version__=}")

print(f"{cudf.__file__=}")

s = cudf.Series([1, 2, 3])

s.max()


import pandas as pd

pdf = pd.DataFrame({"a": [0, 1]})

gdf = cudf.from_pandas(pdf)

pdf = gdf.to_pandas()

arr = gdf.to_numpy()

 

def f(row):

    x = row["a"]

    if x is cudf.NA:  

        return 0

    else:             

        return x + 1

df = cudf.DataFrame({"a": [1, cudf.NA, 3]})

df.apply(f, axis=1)


from numba import cuda

import cupy as cp


@cuda.jit

def multiply_by_5(x, out):

    i = cuda.grid(1)

    if i < x.size:

        out[i] = x[i] * 5


out = cudf.Series(cp.zeros(3, dtype="int32"))

multiply_by_5.forall(s.shape[0])(s, out)

out


cudf.pandas

https://colab.research.google.com/drive/12tCzP94zFG2BRduACucn5Q_OcX1TUKY3 

python -m cudf.pandas script.py

 Jupyter notebook cell

import cudf.pandas

cudf.pandas.install()

import pandas as pd

 or

%load_ext cudf.pandas

 

!wget https://data.rapids.ai/datasets/nyc_parking/nyc_parking_violations_2022.parquet -O /tmp/nyc_parking_violations_2022.parquet

 Development

git clone https://github.com/rapidsai/cudf.git

cd cudf

conda env create --solver=libmamba --name cudf_dev --file conda/environments/all_cuda-122_arch-x86_64.yaml

conda activate cudf_dev

./build.sh libcudf cudf

cd docs/cudf

make html

python3 -m http.server

kill -9 $(lsof -t -i:8000)