https://developers.cloudflare.com/browser-rendering/
Token verify
Fetch HTML REST API
curl -X 'POST' "https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_NUMBER}/browser-rendering/content" \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer ${CLOUDFLARE_API_KEY}" \
-d '{"url": "https://example.com"}'
URL to markdown REST API
curl -X 'POST' "https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_NUMBER}/browser-rendering/markdown" \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer ${CLOUDFLARE_BROWSER_RENDERING_API_KEY}" \
-d '{"url": "https://services.mde.maryland.gov/Application/SearchPermitTypes"}'
URL to markdown python
account_number = os.getenv('CLOUDFLARE_ACCOUNT_NUMBER')
api_key = os.getenv('CLOUDFLARE_BROWSER_RENDERING_API_KEY')
endpoint_url = f"https://api.cloudflare.com/client/v4/accounts/{account_number}/browser-rendering/markdown"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
data = {
"url": "https://services.mde.maryland.gov/Application/SearchPermitTypes"
}
try:
response = requests.post(endpoint_url, headers=headers, json=data)
response.raise_for_status()
markdown = response.json()["result"]
print(markdown)
except requests.exceptions.HTTPError as err:
print(f"HTTP error occurred: {err}")
except Exception as err:
print(f"An error occurred: {err}")
AI task
curl https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_NUMBER}/ai/run/@cf/meta/llama-3-8b-instruct \
-H "Authorization: Bearer ${CLOUDFLARE_WORKERS_AI_API_KEY}" \
-d '{"messages":[{"role":"user","content":"test"}]}'
local pdf to markdown
https://developers.cloudflare.com/workers-ai/features/markdown-conversion/
curl "https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_NUMBER}/ai/tomarkdown" \
-X POST \
-H "Authorization: Bearer ${CLOUDFLARE_WORKERS_AI_API_KEY}" \
-F "files=@sb0818E.pdf"
crawl rest
curl -X POST "https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_NUMBER}/browser-rendering/crawl" \
-H "Authorization: Bearer ${CLOUDFLARE_BROWSER_RENDERING_API_KEY}" \
-H 'Content-Type: application/json' \
-d '{"url": "https://ai.maryland.gov/"}'
curl -X GET "https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_NUMBER}/browser-rendering/crawl/{{ ID }}" \
-H "Authorization: Bearer ${CLOUDFLARE_BROWSER_RENDERING_API_KEY}"
crawl python for urls
account_number = os.environ.get('CLOUDFLARE_ACCOUNT_NUMBER')
api_key = os.environ.get('CLOUDFLARE_BROWSER_RENDERING_API_KEY')
endpoint_url = f"https://api.cloudflare.com/client/v4/accounts/{account_number}/browser-rendering/crawl"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"url": "https://ai.maryland.gov/",
"limit": 100,
"options": {
"includePatterns":[
"https://ai.maryland.gov/**"
]
}
}
print("Starting crawl job...")
response = requests.post(endpoint_url, headers=headers, json=payload)
response.raise_for_status()
job_id = response.json()["result"]
print(f"Job ID: {job_id}")
get_url = f"https://api.cloudflare.com/client/v4/accounts/{account_number}/browser-rendering/crawl/{job_id}"
get_headers = {"Authorization": f"Bearer {api_key}"}
poll_url = f"{get_url}?limit=1"
while True:
get_response = requests.get(poll_url, headers=get_headers)
get_response.raise_for_status()
result_data = get_response.json().get("result", {})
status = result_data.get("status")
if status in [
"completed",
"cancelled_due_to_limits",
"cancelled_due_to_timeout",
"errored",
]:
print(f"Crawl finished with status: {status}")
break
print(f"Crawl status: {status}... waiting 5 seconds.")
sleep(5)
crawled_urls = set()
skipped_urls = []
other_status_urls = []
cursor = None
page_number = 1
print("\nFetching all records...")
while True:
paginated_url = f"{get_url}?cursor={cursor}" if cursor else get_url
page_response = requests.get(paginated_url, headers=get_headers)
page_response.raise_for_status()
page_data = page_response.json().get("result", {})
records = page_data.get("records", [])
print(f"Fetched page {page_number}: found {len(records)} records.")
for record in records:
status = record.get("status")
url = record.get("url")
if status in ["completed", "cancelled"]:
crawled_urls.add(url)
cursor = page_data.get("cursor")
if not cursor:
break
page_number += 1
print("\n--- Final Results ---")
sorted_urls = sorted(crawled_urls)
pprint.pprint(sorted_urls)
crawled_df = pd.DataFrame(sorted_urls, columns=['url'])
print(f"\nTotal unique completed URLs extracted across all pages: {len(crawled_urls)}")