File size: 769 Bytes
6e07610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# data_sync.py
import json
import os
from datasets import Dataset
from huggingface_hub import login
from dotenv import load_dotenv

load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
FEEDBACK_FILE = "feedback.json"
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "modular-ai/rlhf_feedback_dataset")

def sync_to_hub():
    login(token=HF_TOKEN)
    if not os.path.exists(FEEDBACK_FILE):
        print("No feedback file.")
        return

    with open(FEEDBACK_FILE, "r") as f:
        data = json.load(f)

    if not data:
        print("No data to sync.")
        return

    dataset = Dataset.from_list(data)
    dataset.push_to_hub(HF_DATASET_REPO, private=True)
    print(f"Pushed {len(data)} samples to {HF_DATASET_REPO}")

if __name__ == "__main__":
    sync_to_hub()