Spaces:
Sleeping
Sleeping
DVampire
commited on
Commit
·
a878541
1
Parent(s):
49c88c9
update website
Browse files
src/crawl/huggingface_daily.py
CHANGED
|
@@ -18,10 +18,10 @@ class HuggingFaceDailyPapers:
|
|
| 18 |
"""Extract arXiv ID from a URL"""
|
| 19 |
if not url:
|
| 20 |
return None
|
| 21 |
-
#
|
| 22 |
-
m = re.search(r"
|
| 23 |
if m:
|
| 24 |
-
return m.group(
|
| 25 |
return None
|
| 26 |
|
| 27 |
def extract_json_data(self, html: str) -> Dict[str, Any]:
|
|
|
|
| 18 |
"""Extract arXiv ID from a URL"""
|
| 19 |
if not url:
|
| 20 |
return None
|
| 21 |
+
# matches https://huggingface.co/papers/2508.10711
|
| 22 |
+
m = re.search(r"huggingface\.co/papers/(\d{4,5}\.\d+)(v\d+)?", url)
|
| 23 |
if m:
|
| 24 |
+
return m.group(1)
|
| 25 |
return None
|
| 26 |
|
| 27 |
def extract_json_data(self, html: str) -> Dict[str, Any]:
|