cy0307's picture
Publish Ropedia Xperience-10M task baseline cards
eeac43c verified
{
"title": "Ropedia Xperience-10M Project Brief",
"summary": "A concise first-reader brief for the public-sample embodied-AI task lab, the first Qwen3-Omni diagnostic pilot, and the multi-model scale-up path.",
"research_intent": "Treat the public Xperience-10M sample as a small but real research system, then connect it to verified multi-episode experiments without presenting weak diagnostic results as final model quality.",
"capability_map": [
{
"capability": "Data understanding",
"evidence": "feature_manifest.json, available_modalities.json, modality atlas, and the episode-window HF viewer"
},
{
"capability": "Task design",
"evidence": "12 task contracts, task cards, case-study walkthroughs, and four research-direction extension probes"
},
{
"capability": "Evaluation rigor",
"evidence": "chronological split, per-task metrics, predictions, confusion matrices, leakage notes, and generated takeaways"
},
{
"capability": "Scale-up planning",
"evidence": "final verified 96/16/16 Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, Cosmos3-Nano compatibility branch, and policy-model candidates after action-space conversion"
}
],
"current_artifacts": [
{
"layer": "Data unit",
"status": "1 public sample episode, 5,821 frames, 1,161 synchronized 20-frame windows"
},
{
"layer": "Modalities",
"status": "Video-derived features, audio, depth, pose/SLAM, mocap, IMU, calibration, and language-derived features"
},
{
"layer": "Task suite",
"status": "12 embodied-AI task contracts with inputs, targets, metrics, predictions, and case-study walkthroughs"
},
{
"layer": "Models",
"status": "Minimal linear/ridge/logistic baselines plus compact PyTorch MLP heads for the same 12 tasks"
},
{
"layer": "Research map",
"status": "Four Ropedia research directions with direct, proxy, diagnostic, and extension-task coverage"
},
{
"layer": "Scale-up path",
"status": "A selected 96/16/16 Qwen3-Omni LoRA final diagnostic result is verified; strict-JSON validity meets target, while weak action/subtask metrics guide the next error-analysis pass"
}
],
"reading_order": [
"Start with the website or PROJECT_BRIEF.md to understand the project shape.",
"Open RESEARCH_ROADMAP.md to see how the work scales from the public sample to multi-episode modeling.",
"Open EVALUATION_PROTOCOL.md before comparing task scores.",
"Use RESEARCH_TAKEAWAYS.md for the current metric interpretation.",
"Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
],
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
"next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
"entry_points": {
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
"artifact_dataset": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
"baseline_model_bundle": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
"official_xperience10m_dataset": "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
}
}