Msal

Upload markdown to Word

# /// script

# requires-python = ">=3.14"

# dependencies = [

# "msal>=1.34.0",

# "requests>=2.32.5",

# "pypandoc>=1.16.2",

# "pypandoc_binary>=1.16.2",

# ]

# ///

import argparse

import atexit

import json

import os

import random

import time

import tempfile

from pathlib import Path

import msal

import requests

import pypandoc

CLIENT_CONFIG_FILE = Path.home() / ".azure_client.json"

TOKEN_CACHE_FILE = Path.home() / ".msal_token_cache.json"

SCOPES = ["Files.ReadWrite"]

def get_client_config() -> dict:

if not CLIENT_CONFIG_FILE.exists():

raise FileNotFoundError(

f"Configuration file not found at {CLIENT_CONFIG_FILE}. "

"Please create it with {'client_id': '...', 'authority': '...'}"

)

return json.loads(CLIENT_CONFIG_FILE.read_text(encoding="utf-8"))

def build_msal_app() -> msal.PublicClientApplication:

config = get_client_config()

client_id = config.get("client_id")

authority = config.get("authority")

if not client_id or not authority:

raise ValueError("Config file must contain both 'client_id' and 'authority'")

cache = msal.SerializableTokenCache()

if TOKEN_CACHE_FILE.exists():

cache.deserialize(TOKEN_CACHE_FILE.read_text(encoding="utf-8"))

def persist_cache():

if cache.has_state_changed:

TOKEN_CACHE_FILE.write_text(cache.serialize(), encoding="utf-8")

atexit.register(persist_cache)

return msal.PublicClientApplication(

client_id=client_id,

authority=authority,

token_cache=cache,

)

def get_access_token() -> str:

app = build_msal_app()

accounts = app.get_accounts()

print(f"Cached accounts: {len(accounts)}")

result = None

if accounts:

result = app.acquire_token_silent(SCOPES, account=accounts[0])

if not result:

print("No suitable token found in cache. Opening browser to login...")

result = app.acquire_token_interactive(SCOPES)

if "access_token" not in result:

error_msg = (

result.get("error_description") or result.get("error") or "Unknown error"

)

raise RuntimeError(f"Could not authenticate: {error_msg}")

return result["access_token"]

def convert_md_to_docx_bytes(md_text: str) -> bytes:

"""

Convert Markdown text -> DOCX bytes using Pandoc via pypandoc.

pypandoc_binary supplies a bundled pandoc binary, so this works without a system install.

"""

with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp_docx:

tmp_docx_path = tmp_docx.name

try:

pypandoc.convert_text(

md_text,

to="docx",

format="md",

outputfile=tmp_docx_path,

extra_args=["--wrap=preserve"],

)

return Path(tmp_docx_path).read_bytes()

finally:

try:

os.remove(tmp_docx_path)

except FileNotFoundError:

pass

def upload_docx_to_onedrive(

token: str,

docx_bytes: bytes,

file_name: str,

folder: str = "",

) -> dict:

"""

Upload DOCX bytes to OneDrive root (or a folder under root) using simple upload (PUT /content).

Returns the created DriveItem JSON.

"""

headers = {

"Authorization": f"Bearer {token}",

"Content-Type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

}

# Normalize folder path

folder = folder.strip().strip("/")

if folder:

remote_path = f"{folder}/{file_name}"

else:

remote_path = file_name

upload_url = (

f"https://graph.microsoft.com/v1.0/me/drive/root:/{remote_path}:/content"

)

resp = requests.put(upload_url, headers=headers, data=docx_bytes)

if not resp.ok:

raise RuntimeError(f"Upload failed ({resp.status_code}): {resp.text}")

return resp.json()

def delete_with_retry(token: str, file_id: str, max_tries: int = 8) -> None:

"""

Delete a DriveItem, retrying on transient lock/conflict conditions.

Common right-after-upload scenario: 423 Locked / 409 Conflict due to indexing/preview generation.

"""

url = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"

headers = {

"Authorization": f"Bearer {token}",

"If-Match": "*",

}

for attempt in range(1, max_tries + 1):

resp = requests.delete(url, headers=headers)

if resp.status_code in (204, 200):

return

if resp.status_code in (423, 409):

sleep_s = min(30.0, 2.0**attempt) + random.random()

print(

f"Delete locked/conflict (HTTP {resp.status_code}). "

f"Retry {attempt}/{max_tries} in {sleep_s:.1f}s..."

)

time.sleep(sleep_s)

continue

raise RuntimeError(f"Delete failed ({resp.status_code}): {resp.text}")

raise RuntimeError(f"Could not delete after {max_tries} tries; still locked.")

def main():

parser = argparse.ArgumentParser(

description="Convert Markdown to DOCX and upload to OneDrive."

)

parser.add_argument(

"--md-file", type=str, help="Path to a Markdown file to upload."

)

parser.add_argument(

"--text",

type=str,

help="Markdown text to upload (ignored if --md-file is provided).",

)

parser.add_argument(

"--name", type=str, default="test_render.docx", help="Remote DOCX filename."

)

parser.add_argument(

"--folder",

type=str,

default="",

help="Optional OneDrive folder under root (e.g., 'Notes/Rendered').",

)

parser.add_argument(

"--delete-after",

type=int,

default=0,

help="If > 0, wait N seconds then delete the uploaded file (with retry).",

)

args = parser.parse_args()

if args.md_file:

md_path = Path(args.md_file)

if not md_path.exists():

raise FileNotFoundError(f"Markdown file not found: {md_path}")

markdown_text = md_path.read_text(encoding="utf-8")

else:

markdown_text = (

args.text

or "This is markdown.\n\n# Header 1\n\n## Header 2\n\n- list item\n\n**bold** and *italic*"

)

token = get_access_token()

print("Converting Markdown to DOCX (pandoc via pypandoc)...")

docx_bytes = convert_md_to_docx_bytes(markdown_text)

print(f"Converted DOCX size: {len(docx_bytes)} bytes")

print(f"Uploading '{args.name}' to OneDrive...")

item = upload_docx_to_onedrive(

token, docx_bytes, file_name=args.name, folder=args.folder

)

file_id = item.get("id")

web_url = item.get("webUrl")

print(f"Success!\nID: {file_id}\nLink: {web_url}")

if args.delete_after and args.delete_after > 0:

print(f"Deleting in {args.delete_after} seconds...")

time.sleep(args.delete_after)

print("Deleting (with retry on lock/conflict)...")

delete_with_retry(token, file_id)

print("Deleted.")

if __name__ == "__main__":

main()

Upload csv to xlsx

# /// script

# requires-python = ">=3.14"

# dependencies = [

# "msal>=1.34.0",

# "requests>=2.32.5",

# "polars>=0.20.0",

# "xlsxwriter>=3.1.9",

# ]

# ///

import argparse

import atexit

import io

import json

import random

import time

from pathlib import Path

import msal

import requests

import polars as pl

CLIENT_CONFIG_FILE = Path.home() / ".azure_client.json"

TOKEN_CACHE_FILE = Path.home() / ".msal_token_cache.json"

SCOPES = ["Files.ReadWrite"]

def get_client_config() -> dict:

if not CLIENT_CONFIG_FILE.exists():

raise FileNotFoundError(

f"Configuration file not found at {CLIENT_CONFIG_FILE}. "

"Please create it with {'client_id': '...', 'authority': '...'}"

)

return json.loads(CLIENT_CONFIG_FILE.read_text(encoding="utf-8"))

def build_msal_app() -> msal.PublicClientApplication:

config = get_client_config()

client_id = config.get("client_id")

authority = config.get("authority")

if not client_id or not authority:

raise ValueError("Config file must contain both 'client_id' and 'authority'")

cache = msal.SerializableTokenCache()

if TOKEN_CACHE_FILE.exists():

cache.deserialize(TOKEN_CACHE_FILE.read_text(encoding="utf-8"))

def persist_cache():

if cache.has_state_changed:

TOKEN_CACHE_FILE.write_text(cache.serialize(), encoding="utf-8")

atexit.register(persist_cache)

return msal.PublicClientApplication(

client_id=client_id,

authority=authority,

token_cache=cache,

)

def get_access_token() -> str:

app = build_msal_app()

accounts = app.get_accounts()

print(f"Cached accounts: {len(accounts)}")

result = None

if accounts:

result = app.acquire_token_silent(SCOPES, account=accounts[0])

if not result:

print("No suitable token found in cache. Opening browser to login...")

result = app.acquire_token_interactive(SCOPES)

if "access_token" not in result:

error_msg = (

result.get("error_description") or result.get("error") or "Unknown error"

)

raise RuntimeError(f"Could not authenticate: {error_msg}")

return result["access_token"]

def convert_csv_to_xlsx_bytes(csv_text: str) -> bytes:

"""

Convert CSV text -> XLSX bytes using Polars.

Uses io.BytesIO to avoid writing a temp file to disk.

"""

# 1. Read CSV string into Polars DataFrame

# Polars read_csv expects a file path or a file-like object (BytesIO/StringIO)

df = pl.read_csv(io.StringIO(csv_text))

# 2. Write DataFrame to Excel memory buffer

with io.BytesIO() as output:

# Polars uses xlsxwriter under the hood for this

df.write_excel(output)

return output.getvalue()

def upload_xlsx_to_onedrive(

token: str,

xlsx_bytes: bytes,

file_name: str,

folder: str = "",

) -> dict:

"""

Upload XLSX bytes to OneDrive root (or a folder under root).

"""

headers = {

"Authorization": f"Bearer {token}",

"Content-Type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",

}

# Normalize folder path

folder = folder.strip().strip("/")

if folder:

remote_path = f"{folder}/{file_name}"

else:

remote_path = file_name

upload_url = (

f"https://graph.microsoft.com/v1.0/me/drive/root:/{remote_path}:/content"

)

resp = requests.put(upload_url, headers=headers, data=xlsx_bytes)

if not resp.ok:

raise RuntimeError(f"Upload failed ({resp.status_code}): {resp.text}")

return resp.json()

def delete_with_retry(token: str, file_id: str, max_tries: int = 8) -> None:

"""

Delete a DriveItem, retrying on transient lock/conflict conditions.

"""

url = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"

headers = {

"Authorization": f"Bearer {token}",

"If-Match": "*",

}

for attempt in range(1, max_tries + 1):

resp = requests.delete(url, headers=headers)

if resp.status_code in (204, 200):

return

if resp.status_code in (423, 409):

sleep_s = min(30.0, 2.0**attempt) + random.random()

print(

f"Delete locked/conflict (HTTP {resp.status_code}). "

f"Retry {attempt}/{max_tries} in {sleep_s:.1f}s..."

)

time.sleep(sleep_s)

continue

raise RuntimeError(f"Delete failed ({resp.status_code}): {resp.text}")

raise RuntimeError(f"Could not delete after {max_tries} tries; still locked.")

def main():

parser = argparse.ArgumentParser(

description="Convert CSV to XLSX (via Polars) and upload to OneDrive."

)

parser.add_argument(

"--csv-file", type=str, help="Path to a CSV file to upload."

)

parser.add_argument(

"--text",

type=str,

help="CSV text to upload (ignored if --csv-file is provided).",

)

parser.add_argument(

"--name", type=str, default="polars_export.xlsx", help="Remote XLSX filename."

)

parser.add_argument(

"--folder",

type=str,

default="",

help="Optional OneDrive folder under root (e.g., 'Reports').",

)

parser.add_argument(

"--delete-after",

type=int,

default=0,

help="If > 0, wait N seconds then delete the uploaded file (with retry).",

)

args = parser.parse_args()

if args.csv_file:

csv_path = Path(args.csv_file)

if not csv_path.exists():

raise FileNotFoundError(f"CSV file not found: {csv_path}")

csv_text = csv_path.read_text(encoding="utf-8")

else:

csv_text = (

args.text

or "ID,Name,Role,Stack\n1,Alice,Engineer,Rust\n2,Bob,Designer,Figma\n3,Charlie,Manager,Jira"

)

token = get_access_token()

print("Converting CSV to XLSX (via Polars)...")

xlsx_bytes = convert_csv_to_xlsx_bytes(csv_text)

print(f"Converted XLSX size: {len(xlsx_bytes)} bytes")

print(f"Uploading '{args.name}' to OneDrive...")

item = upload_xlsx_to_onedrive(

token, xlsx_bytes, file_name=args.name, folder=args.folder

)

file_id = item.get("id")

web_url = item.get("webUrl")

print(f"Success!\nID: {file_id}\nLink: {web_url}")

if args.delete_after and args.delete_after > 0:

print(f"Deleting in {args.delete_after} seconds...")

time.sleep(args.delete_after)

print("Deleting (with retry on lock/conflict)...")

delete_with_retry(token, file_id)

print("Deleted.")

if __name__ == "__main__":

main()

Google Sites

Report abuse