File size: 769 Bytes
6e07610 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# data_sync.py
import json
import os
from datasets import Dataset
from huggingface_hub import login
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
FEEDBACK_FILE = "feedback.json"
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "modular-ai/rlhf_feedback_dataset")
def sync_to_hub():
login(token=HF_TOKEN)
if not os.path.exists(FEEDBACK_FILE):
print("No feedback file.")
return
with open(FEEDBACK_FILE, "r") as f:
data = json.load(f)
if not data:
print("No data to sync.")
return
dataset = Dataset.from_list(data)
dataset.push_to_hub(HF_DATASET_REPO, private=True)
print(f"Pushed {len(data)} samples to {HF_DATASET_REPO}")
if __name__ == "__main__":
sync_to_hub() |