naturalwellness-rlhf / data_sync.py
tarnava's picture
Upload folder using huggingface_hub
6e07610 verified
raw
history blame contribute delete
769 Bytes
# data_sync.py
import json
import os
from datasets import Dataset
from huggingface_hub import login
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
FEEDBACK_FILE = "feedback.json"
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "modular-ai/rlhf_feedback_dataset")
def sync_to_hub():
login(token=HF_TOKEN)
if not os.path.exists(FEEDBACK_FILE):
print("No feedback file.")
return
with open(FEEDBACK_FILE, "r") as f:
data = json.load(f)
if not data:
print("No data to sync.")
return
dataset = Dataset.from_list(data)
dataset.push_to_hub(HF_DATASET_REPO, private=True)
print(f"Pushed {len(data)} samples to {HF_DATASET_REPO}")
if __name__ == "__main__":
sync_to_hub()