Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import json | |
| from time import sleep | |
| # Base API URL | |
| BASE_URL = "https://api.openalex.org/authors" | |
| FILTER = "last_known_institutions.country_code:NO,x_concepts.id:C41008148" | |
| PER_PAGE = 200 | |
| OUTPUT_DIR = "C41008148_authors" | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # Initialize cursor | |
| cursor = "*" | |
| page_count = 1 # Track page numbers for saving files | |
| while cursor: | |
| url = f"{BASE_URL}?filter={FILTER}&per-page={PER_PAGE}&cursor={cursor}" | |
| try: | |
| print(f"Fetching page {page_count} with cursor...") | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| data = response.json() | |
| filename = os.path.join(OUTPUT_DIR, f"{page_count:010}.json") | |
| if os.path.exists(filename): | |
| print(f"File {filename} already exists, skipping...") | |
| cursor = data.get("meta", {}).get("next_cursor") | |
| page_count += 1 | |
| continue | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, ensure_ascii=False, indent=2) | |
| cursor = data.get("meta", {}).get("next_cursor") | |
| if not cursor: | |
| print("No more results.") | |
| break | |
| page_count += 1 | |
| sleep(1) # Rate-limiting | |
| except Exception as e: | |
| print(f"Error on page {page_count}: {e}") | |
| break | |
| print("Download complete using cursor pagination.") | |