# /// script
# requires-python = ">=3.14"
# dependencies = [
# "msal>=1.34.0",
# "requests>=2.32.5",
# "pypandoc>=1.16.2",
# "pypandoc_binary>=1.16.2",
# ]
# ///
import argparse
import atexit
import json
import os
import random
import time
import tempfile
from pathlib import Path
import msal
import requests
import pypandoc
CLIENT_CONFIG_FILE = Path.home() / ".azure_client.json"
TOKEN_CACHE_FILE = Path.home() / ".msal_token_cache.json"
SCOPES = ["Files.ReadWrite"]
def get_client_config() -> dict:
if not CLIENT_CONFIG_FILE.exists():
raise FileNotFoundError(
f"Configuration file not found at {CLIENT_CONFIG_FILE}. "
"Please create it with {'client_id': '...', 'authority': '...'}"
)
return json.loads(CLIENT_CONFIG_FILE.read_text(encoding="utf-8"))
def build_msal_app() -> msal.PublicClientApplication:
config = get_client_config()
client_id = config.get("client_id")
authority = config.get("authority")
if not client_id or not authority:
raise ValueError("Config file must contain both 'client_id' and 'authority'")
cache = msal.SerializableTokenCache()
if TOKEN_CACHE_FILE.exists():
cache.deserialize(TOKEN_CACHE_FILE.read_text(encoding="utf-8"))
def persist_cache():
if cache.has_state_changed:
TOKEN_CACHE_FILE.write_text(cache.serialize(), encoding="utf-8")
atexit.register(persist_cache)
return msal.PublicClientApplication(
client_id=client_id,
authority=authority,
token_cache=cache,
)
def get_access_token() -> str:
app = build_msal_app()
accounts = app.get_accounts()
print(f"Cached accounts: {len(accounts)}")
result = None
if accounts:
result = app.acquire_token_silent(SCOPES, account=accounts[0])
if not result:
print("No suitable token found in cache. Opening browser to login...")
result = app.acquire_token_interactive(SCOPES)
if "access_token" not in result:
error_msg = (
result.get("error_description") or result.get("error") or "Unknown error"
)
raise RuntimeError(f"Could not authenticate: {error_msg}")
return result["access_token"]
def convert_md_to_docx_bytes(md_text: str) -> bytes:
"""
Convert Markdown text -> DOCX bytes using Pandoc via pypandoc.
pypandoc_binary supplies a bundled pandoc binary, so this works without a system install.
"""
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp_docx:
tmp_docx_path = tmp_docx.name
try:
pypandoc.convert_text(
md_text,
to="docx",
format="md",
outputfile=tmp_docx_path,
extra_args=["--wrap=preserve"],
)
return Path(tmp_docx_path).read_bytes()
finally:
try:
os.remove(tmp_docx_path)
except FileNotFoundError:
pass
def upload_docx_to_onedrive(
token: str,
docx_bytes: bytes,
file_name: str,
folder: str = "",
) -> dict:
"""
Upload DOCX bytes to OneDrive root (or a folder under root) using simple upload (PUT /content).
Returns the created DriveItem JSON.
"""
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
}
# Normalize folder path
folder = folder.strip().strip("/")
if folder:
remote_path = f"{folder}/{file_name}"
else:
remote_path = file_name
upload_url = (
f"https://graph.microsoft.com/v1.0/me/drive/root:/{remote_path}:/content"
)
resp = requests.put(upload_url, headers=headers, data=docx_bytes)
if not resp.ok:
raise RuntimeError(f"Upload failed ({resp.status_code}): {resp.text}")
return resp.json()
def delete_with_retry(token: str, file_id: str, max_tries: int = 8) -> None:
"""
Delete a DriveItem, retrying on transient lock/conflict conditions.
Common right-after-upload scenario: 423 Locked / 409 Conflict due to indexing/preview generation.
"""
url = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
headers = {
"Authorization": f"Bearer {token}",
"If-Match": "*",
}
for attempt in range(1, max_tries + 1):
resp = requests.delete(url, headers=headers)
if resp.status_code in (204, 200):
return
if resp.status_code in (423, 409):
sleep_s = min(30.0, 2.0**attempt) + random.random()
print(
f"Delete locked/conflict (HTTP {resp.status_code}). "
f"Retry {attempt}/{max_tries} in {sleep_s:.1f}s..."
)
time.sleep(sleep_s)
continue
raise RuntimeError(f"Delete failed ({resp.status_code}): {resp.text}")
raise RuntimeError(f"Could not delete after {max_tries} tries; still locked.")
def main():
parser = argparse.ArgumentParser(
description="Convert Markdown to DOCX and upload to OneDrive."
)
parser.add_argument(
"--md-file", type=str, help="Path to a Markdown file to upload."
)
parser.add_argument(
"--text",
type=str,
help="Markdown text to upload (ignored if --md-file is provided).",
)
parser.add_argument(
"--name", type=str, default="test_render.docx", help="Remote DOCX filename."
)
parser.add_argument(
"--folder",
type=str,
default="",
help="Optional OneDrive folder under root (e.g., 'Notes/Rendered').",
)
parser.add_argument(
"--delete-after",
type=int,
default=0,
help="If > 0, wait N seconds then delete the uploaded file (with retry).",
)
args = parser.parse_args()
if args.md_file:
md_path = Path(args.md_file)
if not md_path.exists():
raise FileNotFoundError(f"Markdown file not found: {md_path}")
markdown_text = md_path.read_text(encoding="utf-8")
else:
markdown_text = (
args.text
or "This is markdown.\n\n# Header 1\n\n## Header 2\n\n- list item\n\n**bold** and *italic*"
)
token = get_access_token()
print("Converting Markdown to DOCX (pandoc via pypandoc)...")
docx_bytes = convert_md_to_docx_bytes(markdown_text)
print(f"Converted DOCX size: {len(docx_bytes)} bytes")
print(f"Uploading '{args.name}' to OneDrive...")
item = upload_docx_to_onedrive(
token, docx_bytes, file_name=args.name, folder=args.folder
)
file_id = item.get("id")
web_url = item.get("webUrl")
print(f"Success!\nID: {file_id}\nLink: {web_url}")
if args.delete_after and args.delete_after > 0:
print(f"Deleting in {args.delete_after} seconds...")
time.sleep(args.delete_after)
print("Deleting (with retry on lock/conflict)...")
delete_with_retry(token, file_id)
print("Deleted.")
if __name__ == "__main__":
main()
# /// script
# requires-python = ">=3.14"
# dependencies = [
# "msal>=1.34.0",
# "requests>=2.32.5",
# "polars>=0.20.0",
# "xlsxwriter>=3.1.9",
# ]
# ///
import argparse
import atexit
import io
import json
import random
import time
from pathlib import Path
import msal
import requests
import polars as pl
CLIENT_CONFIG_FILE = Path.home() / ".azure_client.json"
TOKEN_CACHE_FILE = Path.home() / ".msal_token_cache.json"
SCOPES = ["Files.ReadWrite"]
def get_client_config() -> dict:
if not CLIENT_CONFIG_FILE.exists():
raise FileNotFoundError(
f"Configuration file not found at {CLIENT_CONFIG_FILE}. "
"Please create it with {'client_id': '...', 'authority': '...'}"
)
return json.loads(CLIENT_CONFIG_FILE.read_text(encoding="utf-8"))
def build_msal_app() -> msal.PublicClientApplication:
config = get_client_config()
client_id = config.get("client_id")
authority = config.get("authority")
if not client_id or not authority:
raise ValueError("Config file must contain both 'client_id' and 'authority'")
cache = msal.SerializableTokenCache()
if TOKEN_CACHE_FILE.exists():
cache.deserialize(TOKEN_CACHE_FILE.read_text(encoding="utf-8"))
def persist_cache():
if cache.has_state_changed:
TOKEN_CACHE_FILE.write_text(cache.serialize(), encoding="utf-8")
atexit.register(persist_cache)
return msal.PublicClientApplication(
client_id=client_id,
authority=authority,
token_cache=cache,
)
def get_access_token() -> str:
app = build_msal_app()
accounts = app.get_accounts()
print(f"Cached accounts: {len(accounts)}")
result = None
if accounts:
result = app.acquire_token_silent(SCOPES, account=accounts[0])
if not result:
print("No suitable token found in cache. Opening browser to login...")
result = app.acquire_token_interactive(SCOPES)
if "access_token" not in result:
error_msg = (
result.get("error_description") or result.get("error") or "Unknown error"
)
raise RuntimeError(f"Could not authenticate: {error_msg}")
return result["access_token"]
def convert_csv_to_xlsx_bytes(csv_text: str) -> bytes:
"""
Convert CSV text -> XLSX bytes using Polars.
Uses io.BytesIO to avoid writing a temp file to disk.
"""
# 1. Read CSV string into Polars DataFrame
# Polars read_csv expects a file path or a file-like object (BytesIO/StringIO)
df = pl.read_csv(io.StringIO(csv_text))
# 2. Write DataFrame to Excel memory buffer
with io.BytesIO() as output:
# Polars uses xlsxwriter under the hood for this
df.write_excel(output)
return output.getvalue()
def upload_xlsx_to_onedrive(
token: str,
xlsx_bytes: bytes,
file_name: str,
folder: str = "",
) -> dict:
"""
Upload XLSX bytes to OneDrive root (or a folder under root).
"""
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
}
# Normalize folder path
folder = folder.strip().strip("/")
if folder:
remote_path = f"{folder}/{file_name}"
else:
remote_path = file_name
upload_url = (
f"https://graph.microsoft.com/v1.0/me/drive/root:/{remote_path}:/content"
)
resp = requests.put(upload_url, headers=headers, data=xlsx_bytes)
if not resp.ok:
raise RuntimeError(f"Upload failed ({resp.status_code}): {resp.text}")
return resp.json()
def delete_with_retry(token: str, file_id: str, max_tries: int = 8) -> None:
"""
Delete a DriveItem, retrying on transient lock/conflict conditions.
"""
url = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
headers = {
"Authorization": f"Bearer {token}",
"If-Match": "*",
}
for attempt in range(1, max_tries + 1):
resp = requests.delete(url, headers=headers)
if resp.status_code in (204, 200):
return
if resp.status_code in (423, 409):
sleep_s = min(30.0, 2.0**attempt) + random.random()
print(
f"Delete locked/conflict (HTTP {resp.status_code}). "
f"Retry {attempt}/{max_tries} in {sleep_s:.1f}s..."
)
time.sleep(sleep_s)
continue
raise RuntimeError(f"Delete failed ({resp.status_code}): {resp.text}")
raise RuntimeError(f"Could not delete after {max_tries} tries; still locked.")
def main():
parser = argparse.ArgumentParser(
description="Convert CSV to XLSX (via Polars) and upload to OneDrive."
)
parser.add_argument(
"--csv-file", type=str, help="Path to a CSV file to upload."
)
parser.add_argument(
"--text",
type=str,
help="CSV text to upload (ignored if --csv-file is provided).",
)
parser.add_argument(
"--name", type=str, default="polars_export.xlsx", help="Remote XLSX filename."
)
parser.add_argument(
"--folder",
type=str,
default="",
help="Optional OneDrive folder under root (e.g., 'Reports').",
)
parser.add_argument(
"--delete-after",
type=int,
default=0,
help="If > 0, wait N seconds then delete the uploaded file (with retry).",
)
args = parser.parse_args()
if args.csv_file:
csv_path = Path(args.csv_file)
if not csv_path.exists():
raise FileNotFoundError(f"CSV file not found: {csv_path}")
csv_text = csv_path.read_text(encoding="utf-8")
else:
csv_text = (
args.text
or "ID,Name,Role,Stack\n1,Alice,Engineer,Rust\n2,Bob,Designer,Figma\n3,Charlie,Manager,Jira"
)
token = get_access_token()
print("Converting CSV to XLSX (via Polars)...")
xlsx_bytes = convert_csv_to_xlsx_bytes(csv_text)
print(f"Converted XLSX size: {len(xlsx_bytes)} bytes")
print(f"Uploading '{args.name}' to OneDrive...")
item = upload_xlsx_to_onedrive(
token, xlsx_bytes, file_name=args.name, folder=args.folder
)
file_id = item.get("id")
web_url = item.get("webUrl")
print(f"Success!\nID: {file_id}\nLink: {web_url}")
if args.delete_after and args.delete_after > 0:
print(f"Deleting in {args.delete_after} seconds...")
time.sleep(args.delete_after)
print("Deleting (with retry on lock/conflict)...")
delete_with_retry(token, file_id)
print("Deleted.")
if __name__ == "__main__":
main()