File size: 1,801 Bytes
784595b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
"""
Pre-launch script to fix HuggingFace model cache.
This copies necessary tokenizer files to the HuggingFace cache directory
so the downloaded models can find them.
"""

import os
import shutil
from pathlib import Path

def setup_hf_cache():
    """Copy tokenizer files to HuggingFace cache locations."""
    
    # HuggingFace cache base directory
    hf_cache = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules"
    
    # Our local stripedhyena tokenizer
    local_tokenizer = Path(__file__).parent / "stripedhyena" / "tokenizer.py"
    local_utils = Path(__file__).parent / "stripedhyena" / "utils.py"
    
    if not local_tokenizer.exists():
        print(f"Warning: Local tokenizer not found at {local_tokenizer}")
        return
    
    # Model cache locations that might be created
    model_dirs = [
        "togethercomputer/evo-1-8k-base",
        "togethercomputer/evo-1-131k-base",
    ]
    
    for model_dir in model_dirs:
        # Find all version subdirectories
        model_path = hf_cache / model_dir
        if model_path.exists():
            for version_dir in model_path.iterdir():
                if version_dir.is_dir():
                    # Copy tokenizer to this version
                    dest_tokenizer = version_dir / "tokenizer.py"
                    dest_utils = version_dir / "utils.py"
                    
                    try:
                        shutil.copy2(local_tokenizer, dest_tokenizer)
                        shutil.copy2(local_utils, dest_utils)
                        print(f"✓ Copied tokenizer to {version_dir}")
                    except Exception as e:
                        print(f"Warning: Could not copy to {version_dir}: {e}")

if __name__ == "__main__":
    setup_hf_cache()