Spaces:

MCP-1st-Birthday
/

ragmint-mcp-server

Running

App Files Files Community

André Oliveira commited on 28 days ago

Commit

59e6760

1 Parent(s): 499d53b

Initial MCP Space push

Browse files

Files changed (8) hide show

.gitignore +76 -0
LICENSE +19 -0
README.md +70 -10
api.py +347 -0
app.py +50 -0
models.py +133 -0
requirements.txt +6 -0
server.py +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,76 @@

+# ---- System files ----
+.DS_Store
+.idea/
+.vscode/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.so
+*.egg
+*.egg-info/
+.Python
+.env
+.venv
+env/
+venv/
+ENV/
+.ipynb_checkpoints/
+# ---- Build / packaging ----
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.manifest
+*.spec
+# ---- Logs and temp ----
+*.log
+pip-log.txt
+pip-delete-this-directory.txt
+coverage.xml
+htmlcov/
+.tox/
+.nox/
+.cache/
+.pytest_cache/
+.mypy_cache/
+.dmypy.json
+.pyre/
+# ---- IDEs ----
+# Already added:
+# .idea/
+# .vscode/
+# ---- Configs ----
+*.env.local
+*.env.production
+*.env.development
+# ---- RAGMint specific ----
+# Ignore raw datasets and local embeddings
+data/raw/
+data/interim/
+data/tmp/
+outputs/
+models/
+notebooks/
+data/docs/
+data/
+# ---- OS ----
+Thumbs.db
+structure.txt
+.pypirc
+leaderboard.jsonl
+archive
+experiments

LICENSE ADDED Viewed

	@@ -0,0 +1,19 @@

+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+Copyright 2025 André Oliveira
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

README.md CHANGED Viewed

@@ -1,13 +1,73 @@
 ---
-title: Ragmint Mcp Server
-emoji: 🐢
-colorFrom: gray
-colorTo: red
-sdk: gradio
-sdk_version: 5.49.1
-app_file: app.py
-pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Ragmint MCP HF Space
+This project is a **Ragmint MCP + Gradio Dashboard** designed for Hugging Face Spaces.
+It allows users to:
+- Optimize RAG pipelines
+- Run autotune for RAG parameters
+- Generate QA datasets
+- Monitor corpus stats and leaderboard
+The MCP backend handles all computations, and the Gradio frontend communicates with it via async HTTP requests.
+---
+## Features
+1. **Health Check** – Confirm the MCP backend is running.
+2. **Optimize RAG** – Run RAG optimization using user-defined parameters.
+3. **Autotune RAG** – Automatically tune chunk sizes, overlaps, and embedding models.
+4. **Generate QA** – Generate validation QA sets dynamically using an LLM.
+---
+## Usage
+### MCP Server (backend)
+Install dependencies and start the MCP server:
+```bash
+pip install -r requirements.txt
+python ragmint_mcp.py
+```
+The server runs on `http://127.0.0.1:8000`.
+### Gradio Dashboard (frontend)
+Install dependencies (if not already):
+```
+pip install -r requirements.txt
+```
+### Launch the Gradio frontend:
+```
+python app.py
+```
+The dashboard runs on `http://127.0.0.1:7860`.
 ---
+## File Structure
+```
+.
+├── app.py                  # Gradio frontend
+├── ragmint_mcp.py          # MCP server
+├── models.py               # Pydantic models
+├── README.md
+├── requirements.txt
+└── data/docs               # Example documents and QA sets
+```
 ---
+## License
+Apache 2.0
+<p align="center">
+  <sub>Built with ❤️ by <a href="https://andyolivers.com">André Oliveira</a> | Apache 2.0 License</sub>
+</p>

api.py ADDED Viewed

	@@ -0,0 +1,347 @@

+from __future__ import annotations
+import os
+import json
+import logging
+import time
+from models import OptimizeRequest, QARequest, AutotuneRequest
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+try:
+    from ragmint.autotuner import AutoRAGTuner
+    from ragmint.qa_generator import generate_validation_qa
+    from ragmint.explainer import explain_results
+    from ragmint.leaderboard import Leaderboard
+    from ragmint.tuner import RAGMint
+except Exception as e:
+    AutoRAGTuner = None
+    generate_validation_qa = None
+    explain_results = None
+    Leaderboard = None
+    RAGMint = None
+    _import_error = e
+else:
+    _import_error = None
+from dotenv import load_dotenv
+load_dotenv()
+# Logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("ragmint_mcp_server")
+# FastAPI
+app = FastAPI(title="Ragmint MCP Server", version="0.1.0")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+DEFAULT_DATA_DIR = "../data/docs"
+LEADERBOARD_STORAGE = "experiments/leaderboard.jsonl"
+os.makedirs("../experiments", exist_ok=True)
+@app.get("/health")
+def health():
+    return {
+        "status": "ok",
+        "ragmint_imported": _import_error is None,
+        "import_error": str(_import_error) if _import_error else None,
+    }
+@app.post("/optimize_rag")
+def optimize_rag(req: OptimizeRequest):
+    logger.info("Received optimize_rag request: %s", req.json())
+    if RAGMint is None:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Ragmint imports failed or RAGMint unavailable: {_import_error}"
+        )
+    docs_path = req.docs_path or DEFAULT_DATA_DIR
+    if not os.path.isdir(docs_path):
+        raise HTTPException(status_code=400, detail=f"docs_path does not exist: {docs_path}")
+    try:
+        # Build RAGMint exactly from request
+        rag = RAGMint(
+            docs_path=docs_path,
+            retrievers=req.retriever,
+            embeddings=req.embedding_model,
+            rerankers=(req.rerankers or ["mmr"]),
+            chunk_sizes=req.chunk_sizes,
+            overlaps=req.overlaps,
+            strategies=req.strategy,
+        )
+        # Validation selection
+        validation_set = None
+        validation_choice = (req.validation_choice or "").strip()
+        default_val_path = os.path.join(docs_path, "validation_qa.json")
+        # Auto
+        if not validation_choice:
+            if os.path.exists(default_val_path):
+                validation_set = default_val_path
+                logger.info("Using default validation set: %s", validation_set)
+            else:
+                logger.warning("No validation_choice provided and no default found.")
+                validation_set = None
+        # Remote HF dataset
+        elif "/" in validation_choice and not os.path.exists(validation_choice):
+            validation_set = validation_choice
+            logger.info("Using Hugging Face validation dataset: %s", validation_set)
+        # Local file
+        elif os.path.exists(validation_choice):
+            validation_set = validation_choice
+            logger.info("Using local validation dataset: %s", validation_set)
+        # Generate
+        elif validation_choice.lower() == "generate":
+            try:
+                gen_path = os.path.join(docs_path, "validation_qa.json")
+                generate_validation_qa(
+                    docs_path=docs_path,
+                    output_path=gen_path,
+                    llm_model=req.llm_model if hasattr(req, "llm_model") else "gemini-2.5-flash-lite"
+                )
+                validation_set = gen_path
+                logger.info("Generated new validation QA set at: %s", validation_set)
+            except Exception as e:
+                logger.exception("Failed to generate validation QA dataset: %s", e)
+                raise HTTPException(status_code=500, detail=f"Failed to generate validation QA dataset: {e}")
+        # Optimize
+        start_time = time.time()
+        best, results = rag.optimize(
+            validation_set=validation_set,
+            metric=req.metric,
+            trials=req.trials,
+            search_type=req.search_type
+        )
+        elapsed = time.time() - start_time
+        run_id = f"opt_{int(time.time())}"
+        # Corpus stats
+        try:
+            corpus_stats = {
+                "num_docs": len(rag.documents),
+                "avg_len": sum(len(d.split()) for d in rag.documents) / max(1, len(rag.documents)),
+                "corpus_size": sum(len(d) for d in rag.documents),
+            }
+        except Exception:
+            corpus_stats = None
+        # Leaderboard
+        try:
+            if Leaderboard:
+                lb = Leaderboard()
+                lb.upload(
+                    run_id=run_id,
+                    best_config=best,
+                    best_score=best.get("faithfulness", best.get("score", 0.0)),
+                    all_results=results,
+                    documents=os.listdir(docs_path),
+                    model=best.get("embedding_model", req.embedding_model),
+                    corpus_stats=corpus_stats,
+                )
+        except Exception:
+            logger.exception("Leaderboard persistence failed for optimize_rag")
+        return {
+            "status": "finished",
+            "run_id": run_id,
+            "elapsed_seconds": elapsed,
+            "best_config": best,
+            "results": results,
+            "corpus_stats": corpus_stats,
+        }
+    except Exception as exc:
+        logger.exception("optimize_rag failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@app.post("/autotune_rag")
+def autotune_rag(req: AutotuneRequest):
+    logger.info("Received autotune_rag request: %s", req.json())
+    if AutoRAGTuner is None or RAGMint is None:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Ragmint autotuner/RAGMint imports failed: {_import_error}"
+        )
+    docs_path = req.docs_path or DEFAULT_DATA_DIR
+    if not os.path.isdir(docs_path):
+        raise HTTPException(status_code=400, detail=f"docs_path does not exist: {docs_path}")
+    try:
+        start_time = time.time()
+        tuner = AutoRAGTuner(docs_path=docs_path)
+        rec = tuner.recommend(
+            embedding_model=req.embedding_model,
+            num_chunk_pairs=req.num_chunk_pairs
+        )
+        chunk_candidates = tuner.suggest_chunk_sizes(
+            model_name=rec.get("embedding_model"),
+            num_pairs=int(req.num_chunk_pairs),
+            step=20
+        )
+        chunk_sizes = sorted({c for c, _ in chunk_candidates})
+        overlaps = sorted({o for _, o in chunk_candidates})
+        rag = RAGMint(
+            docs_path=docs_path,
+            retrievers=[rec["retriever"]],
+            embeddings=[rec["embedding_model"]],
+            rerankers=["mmr"],
+            chunk_sizes=chunk_sizes,
+            overlaps=overlaps,
+            strategies=[rec["strategy"]],
+        )
+        # Validation selection
+        validation_set = None
+        validation_choice = (req.validation_choice or "").strip()
+        default_val_path = os.path.join(docs_path, "validation_qa.jsonl")
+        if not validation_choice:
+            if os.path.exists(default_val_path):
+                validation_set = default_val_path
+                logger.info("Using default validation set: %s", validation_set)
+            else:
+                logger.warning("No validation_choice provided and no default found.")
+                validation_set = None
+        elif "/" in validation_choice and not os.path.exists(validation_choice):
+            validation_set = validation_choice
+        elif os.path.exists(validation_choice):
+            validation_set = validation_choice
+        elif validation_choice.lower() == "generate":
+            try:
+                gen_path = os.path.join(docs_path, "validation_qa.json")
+                generate_validation_qa(
+                    docs_path=docs_path,
+                    output_path=gen_path,
+                    llm_model=req.llm_model if hasattr(req, "llm_model") else "gemini-2.5-flash-lite",
+                )
+                validation_set = gen_path
+            except Exception as e:
+                logger.exception("Failed to generate validation QA dataset: %s", e)
+                raise HTTPException(status_code=500, detail=f"Failed to generate validation QA dataset: {e}")
+        # Full optimize
+        best, results = rag.optimize(
+            validation_set=validation_set,
+            metric=req.metric,
+            search_type=req.search_type,
+            trials=req.trials,
+        )
+        elapsed = time.time() - start_time
+        run_id = f"autotune_{int(time.time())}"
+        # Corpus stats
+        try:
+            corpus_stats = {
+                "num_docs": len(rag.documents),
+                "avg_len": sum(len(d.split()) for d in rag.documents) / max(1, len(rag.documents)),
+                "corpus_size": sum(len(d) for d in rag.documents),
+            }
+        except Exception:
+            corpus_stats = None
+        # Leaderboard
+        try:
+            if Leaderboard:
+                lb = Leaderboard()
+                lb.upload(
+                    run_id=run_id,
+                    best_config=best,
+                    best_score=best.get("faithfulness", best.get("score", 0.0)),
+                    all_results=results,
+                    documents=os.listdir(docs_path),
+                    model=best.get("embedding_model", rec.get("embedding_model")),
+                    corpus_stats=corpus_stats,
+                )
+        except Exception:
+            logger.exception("Leaderboard persistence failed for autotune_rag")
+        return {
+            "status": "finished",
+            "run_id": run_id,
+            "elapsed_seconds": elapsed,
+            "recommendation": rec,
+            "chunk_candidates": chunk_candidates,
+            "best_config": best,
+            "results": results,
+            "corpus_stats": corpus_stats,
+        }
+    except Exception as exc:
+        logger.exception("autotune_rag failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@app.post("/generate_validation_qa")
+def generate_qa(req: QARequest):
+    logger.info("Received generate_validation_qa request: %s", req.json())
+    if generate_validation_qa is None:
+        raise HTTPException(status_code=500, detail=f"Ragmint imports failed: {_import_error}")
+    try:
+        out_path = f"data/docs/validation_qa.json"
+        os.makedirs(os.path.dirname(out_path), exist_ok=True)
+        generate_validation_qa(
+            docs_path=req.docs_path,
+            output_path=out_path,
+            llm_model=req.llm_model,
+            batch_size=req.batch_size,
+            min_q=req.min_q,
+            max_q=req.max_q,
+        )
+        with open(out_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        return {
+            "status": "finished",
+            "output_path": out_path,
+            "preview_count": len(data),
+            "sample": data[:5],
+        }
+    except Exception as exc:
+        logger.exception("generate_validation_qa failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+# -----------------------
+# FastAPI launch
+# -----------------------
+def main():
+    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")
+if __name__ == "__main__":
+    main()

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+import requests
+import json
+import server
+API_URL = "http://localhost:8000"
+def optimize_rag_tool(payload: str) -> str:
+    """Run RAGMint full optimization workflow.
+    Args:
+        payload: JSON string containing OptimizeRequest parameters.
+    Returns:
+        JSON result with best config and leaderboard stats.
+    """
+    r = requests.post(f"{API_URL}/optimize_rag", json=json.loads(payload))
+    return json.dumps(r.json(), indent=2)
+def autotune_tool(payload: str) -> str:
+    """Run AutoRAG tuner to recommend best configs and optimize.
+    Args:
+        payload: JSON string for AutotuneRequest
+    Returns:
+        JSON result for tuning and full optimization.
+    """
+    r = requests.post(f"{API_URL}/autotune_rag", json=json.loads(payload))
+    return json.dumps(r.json(), indent=2)
+def generate_qa_tool(payload: str) -> str:
+    """Generate validation QA set automatically with Gemini or Anthropic.
+    Args:
+        payload: JSON string for QARequest
+    Returns:
+        JSON preview of generated dataset
+    """
+    r = requests.post(f"{API_URL}/generate_validation_qa", json=json.loads(payload))
+    return json.dumps(r.json(), indent=2)
+demo = gr.Interface(
+    fn=optimize_rag_tool,
+    inputs=gr.Textbox(lines=12, label="OptimizeRequest JSON"),
+    outputs=gr.Textbox(label="Response")
+)
+demo.launch(mcp_server=True)

models.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
+# Models
+class OptimizeRequest(BaseModel):
+    """
+    🔧 Explicit optimization request: user provides all pipeline configs manually.
+    """
+    docs_path: Optional[str] = Field(
+        default="data/docs",
+        description="📂 Folder containing your documents for RAG optimization. Example: 'data/docs'"
+    )
+    retriever: Optional[List[str]] = Field(
+        description="🔍 Retriever type to use. Example: 'bm25', 'faiss', 'chroma'",
+        default=['faiss']
+    )
+    embedding_model: Optional[List[str]] = Field(
+        description="🧠 Embedding model name or path. Example: 'sentence-transformers/all-MiniLM-L6-v2'",
+        default=['sentence-transformers/all-MiniLM-L6-v2']
+    )
+    strategy: Optional[List[str]] = Field(
+        description="🎯 RAG strategy name. Example: 'fixed', 'token', 'sentence'",
+        default=['fixed']
+    )
+    chunk_sizes: Optional[List[int]] = Field(
+        description="📏 List of chunk sizes to evaluate. Example: [200, 400, 600]",
+        default=[200, 400, 600]
+    )
+    overlaps: Optional[List[int]] = Field(
+        description="🔁 List of overlap values to test. Example: [50, 100, 200]",
+        default = [50, 100, 200]
+    )
+    rerankers: Optional[List[str]] = Field(
+        default=["mmr"],
+        description="⚖️ Rerankers to apply after retrieval. Default: ['mmr']"
+    )
+    search_type: Optional[str] = Field(
+        default="grid",
+        description="🔍 Search method to explore parameter space. Options: 'grid', 'random', 'bayesian'"
+    )
+    trials: Optional[int] = Field(
+        default=5,
+        description="🧪 Number of optimization trials to run."
+    )
+    metric: Optional[str] = Field(
+        default="faithfulness",
+        description="📈 Evaluation metric for optimization. Options: 'faithfulness'"
+    )
+    validation_choice: Optional[str] = Field(
+        default='generate',
+        description=(
+            "✅ Validation data source. Options:\n"
+            "  - Leave blank → use default 'validation_qa.json' if available\n"
+            "  - 'generate' → auto-generate a validation QA file from your docs\n"
+            "  - Path to a local JSON file (e.g. 'data/validation_qa.json')\n"
+            "  - Hugging Face dataset ID (e.g. 'squad')"
+        )
+    )
+    llm_model: Optional[str] = Field(
+        default="gemini-2.5-flash-lite",
+        description="🤖 LLM used to generate QA dataset when validation_choice='generate'. Example: 'gemini-pro', 'gpt-4o-mini'"
+    )
+class AutotuneRequest(BaseModel):
+    docs_path: Optional[str] = Field(
+        default="data/docs",
+        description="📂 Folder containing your documents for RAG optimization. Example: 'data/docs'"
+    )
+    embedding_model: Optional[str] = Field(
+        default="sentence-transformers/all-MiniLM-L6-v2",
+        description="🧠 Embedding model name or path. Example: 'sentence-transformers/all-MiniLM-L6-v2'"
+    )
+    num_chunk_pairs: Optional[int] = Field(
+        default=5,
+        description="🔢 Number of chunk pairs to analyze for tuning."
+    )
+    metric: Optional[str] = Field(
+        default="faithfulness",
+        description="📈 Evaluation metric for optimization. Options: 'faithfulness'"
+    )
+    search_type: Optional[str] = Field(
+        default="grid",
+        description="🔍 Search method to explore parameter space. Options: 'grid', 'random', 'bayesian'"
+    )
+    trials: Optional[int] = Field(
+        default=5,
+        description="🧪 Number of optimization trials to run."
+    )
+    validation_choice: Optional[str] = Field(
+        default='generate',
+        description=(
+            "✅ Validation data source. Options:\n"
+            "  - Leave blank → use default 'validation_qa.jsonl' if available\n"
+            "  - 'generate' → auto-generate a validation QA file from your docs\n"
+            "  - Path to a local JSON file (e.g. 'data/validation_qa.json')\n"
+            "  - Hugging Face dataset ID (e.g. 'squad')"
+        )
+    )
+    llm_model: Optional[str] = Field(
+        default="gemini-2.5-flash-lite",
+        description="🤖 LLM used to generate QA dataset when validation_choice='generate'. Example: 'gemini-pro', 'gpt-4o-mini'"
+    )
+class QARequest(BaseModel):
+    """
+    🧩 Generates a validation QA dataset for RAG evaluation.
+    """
+    docs_path: str = Field(
+        description="📂 Folder containing your documents to generate QA pairs from. Example: 'data/docs'",
+        default='data/docs'
+    )
+    llm_model: str = Field(
+        default="gemini-2.5-flash-lite",
+        description="🤖 LLM model used for question generation. Example: 'gemini-2.5-flash-lite', 'gpt-4o-mini'"
+    )
+    batch_size: int = Field(
+        default=5,
+        description="📦 Number of documents processed per generation batch."
+    )
+    min_q: int = Field(
+        default=3,
+        description="�� Minimum number of questions per document."
+    )
+    max_q: int = Field(
+        default=25,
+        description="❓ Maximum number of questions per document."
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio[mcp]
+fastapi
+uvicorn
+requests
+ragmint
+pydantic

server.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import threading
+from api import main
+def start():
+    threading.Thread(target=main, daemon=True).start()
+start()