Python

venv

python -m venv venv && source venv/bin/activate

deactivate

Functions

def f(arg, **kwargs):

    return f2(arg, **kwargs)

kwargs = {"method": "square"}

f(3, kwargs)

Classes

class MyClass:

    """A simple example class"""

    i = 1

    def f(self):

        self.b = "ran"

a = MyClass()

if hasattr(a, "b"):

    print("ran")

subclass

class SubClass(MyClass):

    def __init__(self, b):

        self.b = b

Dataclasses

import random

import string

from dataclasses import dataclass, field



def generate_id() -> str:

    return "".join(random.choices(string.ascii_uppercase, k=12))



@dataclass(slots=True)

class Person:

    name: str

    address: str

    active: bool = True

    email_addresses: list[str] = field(default_factory=list)

    id: str = field(init=False, default_factory=generate_id)

    _search_string: str = field(init=False, repr=False)


    def __post_init__(self) -> None:

        self._search_string = f"{self.name}, {self.address}"



def main() -> None:

    person = Person(name="John", address="123 Main St", active=False)

    print(person)

return class

from __future__ import annotations

from dataclasses import dataclass


@dataclass

class StockAnalysis:

    cols: list[str]

    data: list[list[str | float]]


    def filter_stock(self, ticker: Tickers = "IBM") -> StockAnalysis:

        filtered_data = list(

            filter(lambda x: x[self.ticker_index] == ticker, self.data)

        )

        return StockAnalysis(cols=self.cols, data=filtered_data)

main function

def main():

    return None

if __name__ == "__main__":

    main()


IO

Read a text file

with open("input.txt") as f:

    inputs = f.readlines()

 

from pathlib import Path

Path("input.txt")

JSON

import json

with open("data.json") as f:

    data = json.load(f)

with open("data.json", "w") as f:

    json.dump(dict, f)

Requests

import requests


response = requests.get(

    "URL",

    headers={"KEY": "VALUE"},

    params={"KEY": "VALUE", "limit": 1000},

)


requests.get(URL, auth=("USERNAME', "PASSWORD"))


with requests.Session() as client:

    client.auth = (USERNAME, PASSWORD)

    response = client.post(

        CURRENT_URL,

        json={

            "Content-Type": "application/json",

        },

    )

    parsed = response.json()

Pickle

import pickle


with open("obj.pkl", "wb") as f:

    pickle.dump(obj, f)


import s3fs

fs = s3fs.S3FileSystem()

pickle.dump(obj, fs.open(f"s3://BUCKET/obj.pkl", "wb"))


with open("obj.pkl", "rb") as f:

    obj = pickle.load(f)

pickle.load(open("/tmp/ds.pkl", "rb"))

pickle.load(fs.open(f"s3://BUCKET/obj.pkl", "rb"))

Tempfile

import tempfile


with tempfile.TemporaryFile() as f:

    df.to_paruqet(f.name)

Utils

See environmental variables

import os

os.environ

CPU count

os.cpu_count()

Size/memory of object

import sys


sys.getsizeof(x)

See memory useage

import resource

bytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

 

import tracemalloc

tracemalloc.start()

print(f"peak memory usage: {format_bytes(tracemalloc.get_traced_memory()[1])}")

tracemalloc.stop()

Get environmental variable

os.getenv("ENV")

Set environmental variable

os.environ["ENV"] = "1"

Remove file

if os.path.exists("file.txt"):

    os.remove("file.txt")


os.remove("file.txt")

F-strings/f strings

f"{var:.2f}"

f"{x:02}" # leading 0

[f"{x:02}:00" for x in range(0, 24, 3)]

f"${amount:,.2f}"

Regular expressions

import re


re.sub("\D", "", "abc1cdf2dfd3dgd") # returns only digits (123)

Multiprocessing

import multiprocessing as mp

from multiprocessing import Pool, Process


def inc(x):

    return x + 1


# processes defaults to os.cpu_count()

with Pool() as pool:

# with Pool(processes=2) as pool:

    out = pool.map(inc, [1, 2])

Shell

import shutil

import subprocess


shutil.rmtree("ds.zarr", ignore_errors=True)

subprocess.call("rm -rf /tmp/*.zarr", shell=True)

subprocess.check_output("conda list --json", shell=True)

Itertools

All combinations of two list

import itertools


list(itertools.product(list1, list2))

combinations of character in string

list(itertools.combinations('AB', 2))

consecutive pairs of characters

s = "abcd"

[''.join(pair) for pair in zip(s[:-1], s[1:])]

list(itertools.pairwise(s))

 consecutive n chars

list(zip(*["ABCD"[i:] for i in range(3)]))

Filter

filter rows in a list of lists

list(filter(lambda x: x[0] == "a", [["a", 1], ["b", 2]]))

Reduce

from functools import reduce


Analytics

from statistics import mean


mean(l)

If-else on one line

out = 'Yes' if fruit == 'Apple' else 'No' 

Check for a type

if isinstance(o, str):

    print("type o is str")

For loops on one line

["{}_{}".format(i, j) for i in list1 for j in list2]

Dictionary keys

list(d.keys())

 Dictionary values

list(d.values())

Dictionary from two lists

dict(zip(l1, l2))

Remove an element from a list

l = ["a", "b"]

l2 = l.copy()

l2.remove("a")

Reverse a list

l.reverse()

Unique list from array of tuples

l = sorted(set(sum(arr, start=())))

 Sort by length of word

sorted(["aaa", "bb", "c"], key=len)

Sort alphabetically by last letter

sorted(["aaa", "bb", "c"], key=lambda word: word[::-1])

List of tuples from two lists

list(zip(l1, l2))

Unique elements in multiple lists

set(l) | set(l2)

Elements not in another list

list(set(long_list) - set(short_list))

Datetime

https://strftime.org/

from datetime import datetime, timedelta, timezone


datetime(2019, 5, 18, 15, 17, tzinfo=timezone.utc) # '2019-05-18T15:17:00+00:00'

datetime.now().strftime(format="%Y-%m-%dT%H:%M:%S")

datetime.now().isoformat()

print(f"{datetime.now():%Y%m%d_%H}")

print(f"{datetime.now():%Y%m%dT%H0000Z}")


yyyy_mm = datetime.now().strftime(format="%Y-%m")

last_yyyy_mm = (datetime.now() - timedelta(days=30)).strftime(format="%Y-%m")


td_seconds = timedelta(hours=1).total_seconds()

td_hours = timedelta(hours=1).total_seconds() / 3600


from dateutil import tz

datetime.now(tz.tzlocal()).strftime('%Y-%m-%dT%H:%M:%S%z')

Days in month

from calendar import monthrange


ndays_in_month = monthrange(2020, 1)[1]

Timeit

import timeit


start_timer = timeit.timer()

starttime = timeit.default_timer()

...

time_took_mins = round((timer() - start_timer) / 60, 2)


python -m timeit '"-".join(str(n) for n in range(100))'

python -m timeit "x = 12345; x**2"

python -m timeit "x = 12345; x * x"

Supress warning

import warnings


warnings.filterwarnings("ignore")

Exceptions / Raise Errors

try:

    func(arg1, arg2)

except ValueError:

    print("func didn't finish. continuing")


raise 

Inspect

import inspect


frameinfo = inspect.getframeinfo(inspect.currentframe())

print(frameinfo.filename, frameinfo.lineno)

Argparse

from argparse import ArgumentParser


parser = ArgumentParser(description='Script')

parser.add_argument('--year', type=str, help="year str as YYYY", required=True)

parser.add_argument('--month', type=str, help="mon str as MM", required=True)

args = parser.parse_args()


year=args.year

month=args.month


python file.py --year 2020 --month 01 

Logging

import logging

logger = logging.getLogger()

logger.setLevel("INFO")

_handler = logging.StreamHandler(sys.stderr)

_handler.setLevel("INFO")

_formatter = logging.Formatter("[file.py] %(message)s")

_handler.setFormatter(_formatter)

logger.addHandler(_handler)

del _handler, _formatter

Using asv

$ cd asv_bench

$ asv run

$ asv show master

$ asv publish

$ asv preview

Update verion of package

Change VERSION in setup.py

Upload a package to pypi

$ python -m pip install --user --upgrade setuptools wheel twine

$ python setup.py sdist bdist_wheel

$ twine upload dist/*

$ !Add your username and password.