Cudf
https://docs.rapids.ai/api/cudf/stable/
https://colab.research.google.com/drive/1_lp1vTeEeJy7v9Xh2gb-QVvvhMYctetC#scrollTo=XwSBXo9p4TEK
mamba install -c rapidsai -c conda-forge -c nvidia rapids=24.06 python=3.11 cuda-version=12.2 --y
import cudf
print(f"{cudf.__version__=}")
print(f"{cudf.__file__=}")
s = cudf.Series([1, 2, 3])
s.max()
import pandas as pd
pdf = pd.DataFrame({"a": [0, 1]})
gdf = cudf.from_pandas(pdf)
pdf = gdf.to_pandas()
arr = gdf.to_numpy()
def f(row):
x = row["a"]
if x is cudf.NA:
return 0
else:
return x + 1
df = cudf.DataFrame({"a": [1, cudf.NA, 3]})
df.apply(f, axis=1)
from numba import cuda
import cupy as cp
@cuda.jit
def multiply_by_5(x, out):
i = cuda.grid(1)
if i < x.size:
out[i] = x[i] * 5
out = cudf.Series(cp.zeros(3, dtype="int32"))
multiply_by_5.forall(s.shape[0])(s, out)
out
cudf.pandas
https://colab.research.google.com/drive/12tCzP94zFG2BRduACucn5Q_OcX1TUKY3
python -m cudf.pandas script.py
Jupyter notebook cell
import cudf.pandas
cudf.pandas.install()
import pandas as pd
or
%load_ext cudf.pandas
!wget https://data.rapids.ai/datasets/nyc_parking/nyc_parking_violations_2022.parquet -O /tmp/nyc_parking_violations_2022.parquet
Development
git clone https://github.com/rapidsai/cudf.git
cd cudf
conda env create --solver=libmamba --name cudf_dev --file conda/environments/all_cuda-122_arch-x86_64.yaml
conda activate cudf_dev
./build.sh libcudf cudf
cd docs/cudf
make html
python3 -m http.server
kill -9 $(lsof -t -i:8000)