#!/usr/bin/env python3 """ Pre-launch script to fix HuggingFace model cache. This copies necessary tokenizer files to the HuggingFace cache directory so the downloaded models can find them. """ import os import shutil from pathlib import Path def setup_hf_cache(): """Copy tokenizer files to HuggingFace cache locations.""" # HuggingFace cache base directory hf_cache = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules" # Our local stripedhyena tokenizer local_tokenizer = Path(__file__).parent / "stripedhyena" / "tokenizer.py" local_utils = Path(__file__).parent / "stripedhyena" / "utils.py" if not local_tokenizer.exists(): print(f"Warning: Local tokenizer not found at {local_tokenizer}") return # Model cache locations that might be created model_dirs = [ "togethercomputer/evo-1-8k-base", "togethercomputer/evo-1-131k-base", ] for model_dir in model_dirs: # Find all version subdirectories model_path = hf_cache / model_dir if model_path.exists(): for version_dir in model_path.iterdir(): if version_dir.is_dir(): # Copy tokenizer to this version dest_tokenizer = version_dir / "tokenizer.py" dest_utils = version_dir / "utils.py" try: shutil.copy2(local_tokenizer, dest_tokenizer) shutil.copy2(local_utils, dest_utils) print(f"✓ Copied tokenizer to {version_dir}") except Exception as e: print(f"Warning: Could not copy to {version_dir}: {e}") if __name__ == "__main__": setup_hf_cache()