|
|
|
|
|
""" |
|
|
Pre-launch script to fix HuggingFace model cache. |
|
|
This copies necessary tokenizer files to the HuggingFace cache directory |
|
|
so the downloaded models can find them. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import shutil |
|
|
from pathlib import Path |
|
|
|
|
|
def setup_hf_cache(): |
|
|
"""Copy tokenizer files to HuggingFace cache locations.""" |
|
|
|
|
|
|
|
|
hf_cache = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules" |
|
|
|
|
|
|
|
|
local_tokenizer = Path(__file__).parent / "stripedhyena" / "tokenizer.py" |
|
|
local_utils = Path(__file__).parent / "stripedhyena" / "utils.py" |
|
|
|
|
|
if not local_tokenizer.exists(): |
|
|
print(f"Warning: Local tokenizer not found at {local_tokenizer}") |
|
|
return |
|
|
|
|
|
|
|
|
model_dirs = [ |
|
|
"togethercomputer/evo-1-8k-base", |
|
|
"togethercomputer/evo-1-131k-base", |
|
|
] |
|
|
|
|
|
for model_dir in model_dirs: |
|
|
|
|
|
model_path = hf_cache / model_dir |
|
|
if model_path.exists(): |
|
|
for version_dir in model_path.iterdir(): |
|
|
if version_dir.is_dir(): |
|
|
|
|
|
dest_tokenizer = version_dir / "tokenizer.py" |
|
|
dest_utils = version_dir / "utils.py" |
|
|
|
|
|
try: |
|
|
shutil.copy2(local_tokenizer, dest_tokenizer) |
|
|
shutil.copy2(local_utils, dest_utils) |
|
|
print(f"✓ Copied tokenizer to {version_dir}") |
|
|
except Exception as e: |
|
|
print(f"Warning: Could not copy to {version_dir}: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
setup_hf_cache() |
|
|
|