Spaces:

MCP-1st-Birthday
/

ragmint-mcp-server

Running

File size: 19,894 Bytes

59e6760
 
 
9d761b8
f7d462d
4f9b2d4
f7d462d
ec21b79
188a5d8
 
170863a
f7d462d
 
4f1961d
f7d462d
59e6760
4f9b2d4
9d761b8
 
434392c
9d761b8
 
 
59e6760
4f9b2d4
07b4f45
30720a5
709c564
a529c4d
30720a5
a529c4d
 
 
30720a5
 
 
 
 
 
 
 
 
 
07b4f45
30720a5
 
9d761b8
4f9b2d4
170863a
a529c4d
 
 
 
4f9b2d4
188a5d8
170863a
9d761b8
170863a
94a9110
188a5d8
94a9110
170863a
188a5d8
 
62897a2
 
 
 
 
 
 
 
 
 
 
 
 
188a5d8
 
62897a2
188a5d8
 
 
 
 
 
 
 
 
 
170863a
 
 
 
 
188a5d8
170863a
188a5d8
170863a
 
 
 
 
188a5d8
 
 
 
 
 
 
59e6760
c2fcdce
4f1961d
434392c
9d761b8
4f9b2d4
c2fcdce
4f1961d
434392c
59e6760
4f9b2d4
c2fcdce
4f1961d
434392c
9d761b8
4f9b2d4
 
9d761b8
188a5d8
7f8656a
 
4f9b2d4
9d761b8
 
 
 
 
170863a
c813369
 
709c564
c2fcdce
 
 
c40bfd2
c2fcdce
 
 
 
a529c4d
c2fcdce
 
 
 
 
ddfe5f5
b514ecb
ddfe5f5
 
c2fcdce
 
 
c813369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2fcdce
 
 
 
 
 
 
709c564
c2fcdce
709c564
 
 
 
 
 
c2fcdce
 
 
 
 
709c564
 
 
 
 
c2fcdce
 
 
 
 
709c564
c2fcdce
709c564
 
 
 
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709c564
 
 
 
c2fcdce
 
 
 
 
 
 
 
 
 
 
709c564
c2fcdce
 
c813369
c2fcdce
 
 
 
 
 
709c564
c2fcdce
 
 
 
 
 
a529c4d
c2fcdce
a529c4d
 
 
 
 
c2fcdce
 
 
a529c4d
c2fcdce
 
c813369
c2fcdce
 
 
 
 
 
 
dc0d368
9d761b8
 
 
59e6760
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813369
c2fcdce
 
 
a529c4d
c2fcdce
 
a529c4d
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a529c4d
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63d9cf4
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813369
c2fcdce
 
 
 
a529c4d
c2fcdce
 
 
a529c4d
c2fcdce
 
 
449e9fd
c2fcdce
82d43c9
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a529c4d
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813369
c2fcdce
 
 
a529c4d
c2fcdce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a529c4d
c2fcdce
 
 
 
709c564
c2fcdce
 
 
709c564
c2fcdce
c813369
c2fcdce
 
 
30720a5
434392c
7f8656a
f7d462d
 
 
4f9b2d4

import gradio as gr
import requests
import json
import os
import threading
from models import OptimizeRequest, AutotuneRequest, QARequest
from api import start_api



# Start FastAPI server in background
threading.Thread(target=start_api, daemon=True).start()

# Base URL for internal calls
BASE_INTERNAL = "http://127.0.0.1:8000"


def call_api(endpoint: str, payload: dict) -> str:
    try:
        r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
        return json.dumps(r.json(), indent=2)
    except Exception as e:
        return str(e)


def clear_cache_tool(docs_path="data/docs"):
    """
    🗑️ Clear Cache MCP Tool.

    Deletes all files and directories inside docs_path on the server.

    Args:
        docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
    """
    try:
        r = requests.post(
            f"{BASE_INTERNAL}/clear_cache",
            data={"docs_path": docs_path},
            timeout=60
        )
        r.raise_for_status()
        return r.json()
    except Exception as e:
        return {"error": str(e)}


def upload_docs_tool(files, docs_path="data/docs"):
    """
    Upload documents to the server's docs folder via FastAPI /upload_docs.

    Args:
        files (list): A list of local file paths, remote URLs, or file-like objects.
        docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
    """
    import shutil, tempfile

    os.makedirs(docs_path, exist_ok=True)
    files_payload = []

    temp_files = []

    try:
        for f in files:
            if isinstance(f, str) and f.startswith(("http://", "https://")):
                # Download URL to a temp file (txt aware)
                resp = requests.get(f, timeout=60)
                resp.raise_for_status()

                # create temp file with proper extension
                ext = os.path.splitext(f)[1] or ".txt"
                tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)

                if "text" in resp.headers.get("Content-Type", "").lower():
                    tmp.write(resp.text.encode("utf-8"))
                else:
                    tmp.write(resp.content)

                tmp.close()
                temp_files.append(tmp.name)

                files_payload.append(("files", open(tmp.name, "rb")))

            elif isinstance(f, str):
                # Local file path
                files_payload.append(("files", open(f, "rb")))

            else:
                # File-like object
                files_payload.append(("files", f))

        resp = requests.post(
            f"{BASE_INTERNAL}/upload_docs",
            files=files_payload,
            data={"docs_path": docs_path}
        )
        resp.raise_for_status()
        return resp.json()

    finally:
        # Close all file handles
        for _, file_obj in files_payload:
            if not file_obj.closed:
                file_obj.close()
        # Clean up temp files
        for tmp_file in temp_files:
            try:
                os.unlink(tmp_file)
            except Exception:
                pass


def optimize_rag_tool_(payload: str) -> str:
    """🔧 Explicit optimization request: user provides all pipeline configs manually."""
    return call_api("/optimize_rag", json.loads(payload))


def autotune_tool_(payload: str) -> str:
    """🔧 Autotune RAG: recommends chunk sizes and embedding models automatically."""
    return call_api("/autotune_rag", json.loads(payload))


def generate_qa_tool_(payload: str) -> str:
    """🧩 Generates a validation QA dataset for RAG evaluation."""
    return call_api("/generate_validation_qa", json.loads(payload))



def model_to_json(model_cls) -> str:
    return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)


# Default inputs
DEFAULT_UPLOAD_PATH = "data/docs"
DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
DEFAULT_QA_JSON = model_to_json(QARequest)


from claude_theme import Claude
with gr.Blocks(theme=Claude()) as demo:
    gr.Markdown("# 🧠 Ragmint MCP Server")

    gr.HTML("""
    <div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;">
      <a href="https://huggingface.co/spaces/MCP-1st-Birthday/ragmint-mcp-server">
        <img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space">
      </a>
      <img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
      <a href="https://pypi.org/project/ragmint/">
        <img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
      </a>
      <img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
      <img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
      <img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status">
      <img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna">
      <img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5">   
      <a href="https://www.linkedin.com/posts/andyolivers_ragmint-mcp-server-a-hugging-face-space-activity-7399028674261348352-P5wy?utm_source=share&utm_medium=member_desktop&rcm=ACoAABanwk4Bp0A-FVwO9wyzwVp0g_yqZoRDptI">
        <img src="https://img.shields.io/badge/LinkedIn-Post-blue" alt="LinkedIn">
      </a>
    </div>
    """)

    gr.HTML("""
    <style>
    .center-wrapper {
        display: flex;
        justify-content: center;
        align-items: center;
    }
    .center-wrapper img {
        height: 100px !important;
    }
    </style>

    <div class="center-wrapper">
      <img src="https://raw.githubusercontent.com/andyolivers/ragmint/main/src/ragmint/assets/img/ragmint_logo.png" alt="Ragmint Banner">
    </div>
    """)

    gr.Markdown("""
    **AI-Powered Optimization for RAG Pipelines**
    
    This server provides **6 MCP Tools** for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like **Claude Desktop, Cursor, VS Code MCP Extension**, and more.
    
    <br>
    
    ## 🔧 MCP Tools
    
    - 📄 **Upload Docs**: Upload .txt files to workspace for evaluation using `upload_docs`.
    - 🔗 **Upload URLs**: Import remote docs via URLs with `upload_urls`.
    - 🔧 **Optimize RAG**: Full hyperparameter search (Grid/Random/Bayesian) with metrics on `optimize_rag`.
    - ⚡️ **Autotune RAG**: Automated recommendations for best chunking and embeddings with `autotune`.
    - 🧩 **Generate QA Dataset**: Create validation QA pairs with LLMs for benchmarking using `generate_qa`.
    - 🗑️ **Clear Cache**: Reset workspace and delete stored docs with `clear_cache`.
    
    <br>
    
    ## 🧠 What Ragmint Solves
    
    - Automated RAG hyperparameter optimization.
    - Retriever, embedding, reranker selection.
    - Synthetic validation QA generation.
    - Evaluation metrics (faithfulness, latency, etc.).
    - Experiment tracking & reproducible pipeline comparison.
    
    🔬 **Built for RAG engineers, researchers, and LLM developers** who want consistent performance improvement without trial-and-error.
    
    <br>
    
    ## ⚙ Powered by
    
    - Optuna (Bayesian Optimization).
    - Google Gemini 2.5 Flash Lite/Pro.
    - FAISS, Chroma, BM25, scikit-learn retrievers.
    - Sentence-Transformers/BGE embeddings.
    
    <br>
    
    ## 🌐 MCP Connection
    
    **HuggingFace Space**  
    https://huggingface.co/spaces/andyolivers/ragmint-mcp-server
    
    **MCP Endpoint (SSE — Recommended)**  
    https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse
    
    <br>
    
    ## 📦 Example MCP Use Cases
    
    - Run Auto-Optimization for RAG pipelines.
    - Compare embedding + retriever combinations.  
    - Automatically generate QA validation datasets.
    - Rapid experiment iteration inside Claude/Cursor.
    
    
    ---
    
    """)

    with gr.Tab("📂 Upload"):
        with gr.Row():
            # Upload Documents
            with gr.Column(scale=1):
                gr.Markdown("## Upload Documents")
                gr.Markdown("📄 Upload files (local paths or URLs) to your `data/docs` folder.")
                upload_files = gr.File(file_count="multiple", type="filepath")
                upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
                upload_btn = gr.Button("Upload", variant="primary")
                upload_out = gr.JSON(label="Response")
                upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)

            # Upload MCP Documents (no file uploader)
            with gr.Column(scale=1):
                gr.Markdown("## Upload Documents from URLs")
                gr.Markdown("🔗 Upload files (URLs) to your `data/docs` folder on MCP.")

                upload_mcp_input = gr.TextArea(
                    placeholder="Paste URLs (one per line without commas)",
                    label="URLs"
                )

                def upload_urls_tool(text, docs_path):
                    """
                    Upload documents from a list of URLs to the server's docs folder.

                    Args:
                        text (str): A newline-separated string of document URLs to download.
                        docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
                    """

                    urls = [u.strip() for u in text.split("\n") if u.strip()]
                    return upload_docs_tool(urls, docs_path)

                upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
                upload_mcp_btn = gr.Button("Upload", variant="primary")
                upload_mcp_out = gr.JSON(label="Response")

                upload_mcp_btn.click(
                    upload_urls_tool,
                    inputs=[upload_mcp_input, upload_mcp_path],
                    outputs=upload_mcp_out
                )

        gr.Markdown("---")



    with gr.Tab("⚡ Autotune"):
        # Autotune RAG
        with gr.Column():
            gr.Markdown("## Autotune RAG")
            gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.")

            with gr.Accordion("⚙ Settings", open=False):
                docs_path = gr.Textbox(value="data/docs", label="Docs Path")

                embedding_model = gr.Textbox(
                    value="sentence-transformers/all-MiniLM-L6-v2",
                    label="Embedding Model"
                )

                num_chunk_pairs = gr.Slider(
                    minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs"
                )

                metric = gr.Dropdown(
                    choices=["faithfulness"],
                    value="faithfulness",
                    label="Metric"
                )

                search_type = gr.Dropdown(
                    choices=["grid", "random", "bayesian"],
                    value="grid",
                    label="Search Type"
                )

                trials = gr.Slider(
                    minimum=1, maximum=100, step=1, value=5, label="Optimization Trials"
                )

                validation_choice = gr.Dropdown(
                    choices=["generate", ""],
                    value="generate",
                    label="Validation Choice"
                )

                llm_model = gr.Textbox(
                    value="gemini-2.5-flash-lite",
                    label="LLM Model"
                )

            autotune_btn = gr.Button("Autotune", variant="primary")
            autotune_out = gr.Textbox(label="Response", lines=15)


            def autotune_tool(
                    docs_path, embedding_model, num_chunk_pairs, metric,
                    search_type, trials, validation_choice, llm_model
                ):

                payload = {
                    "docs_path": docs_path,
                    "embedding_model": embedding_model,
                    "num_chunk_pairs": num_chunk_pairs,
                    "metric": metric,
                    "search_type": search_type,
                    "trials": trials,
                    "validation_choice": validation_choice,
                    "llm_model": llm_model
                }

                return autotune_tool_(json.dumps(payload))


            autotune_tool.__doc__ = AutotuneRequest.__doc__
            autotune_btn.click(
                autotune_tool,
                inputs=[
                    docs_path, embedding_model, num_chunk_pairs, metric,
                    search_type, trials, validation_choice, llm_model
                ],
                outputs=autotune_out
            )

            with gr.Accordion("➕ More Information", open=False):
                gr.Markdown(AutotuneRequest.__doc__ or "No description available.")

            gr.Markdown("---")


    with gr.Tab("🔧 Optimize"):
        # Optimize RAG
        with gr.Column():
            gr.Markdown("## Optimize RAG")
            gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.")

            # Parameters accordion
            with gr.Accordion("⚙ Settings", open=False):
                docs_path = gr.Textbox(value="data/docs", label="Docs Path")

                retriever = gr.CheckboxGroup(
                    choices=["faiss", "chroma", "numpy","bm25"],
                    value="faiss",
                    label="Search Type"
                )

                embedding_model = gr.Textbox(
                    value="sentence-transformers/all-MiniLM-L6-v2",
                    label="Embedding Model(s) (comma-separated)"
                )

                strategy = gr.CheckboxGroup(
                    choices=["fixed","token","sentence"],
                    value="fixed",
                    label="RAG Strategy"
                )

                chunk_sizes = gr.Textbox(
                    value="200,400,600",
                    label="Chunk Sizes (comma-separated integers)"
                )

                overlaps = gr.Textbox(
                    value="50,100,200",
                    label="Overlaps (comma-separated integers)"
                )

                rerankers = gr.Dropdown(
                    choices=["mmr"],
                    value="mmr",
                    label="Rerankers"
                )

                search_type = gr.Dropdown(
                    choices=["grid", "random", "bayesian"],
                    value="grid",
                    label="Search Type"
                )

                trials = gr.Slider(
                    minimum=1, maximum=100, step=1, value=5,
                    label="Number of Trials"
                )

                metric = gr.Dropdown(
                    choices=["faithfulness"],
                    value="faithfulness",
                    label="Metric"
                )

                validation_choice = gr.Dropdown(
                    choices=["generate", ""],
                    value="generate",
                    label="Validation Choice"
                )

                llm_model = gr.Textbox(
                    value="gemini-2.5-flash-lite",
                    label="LLM Model"
                )

            optimize_btn = gr.Button("Optimize", variant="primary")
            optimize_out = gr.Textbox(label="Response", lines=15)


            # Function to convert inputs into payload and call API
            def optimize_rag_tool(
                    docs_path, retriever, embedding_model, strategy, chunk_sizes,
                    overlaps, rerankers, search_type, trials, metric,
                    validation_choice, llm_model
                ):

                payload = {
                    "docs_path": docs_path,
                    "retriever": retriever,
                    "embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()],
                    "strategy": strategy,
                    "chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()],
                    "overlaps": [int(o) for o in overlaps.split(",") if o.strip()],
                    "rerankers": [r.strip() for r in rerankers.split(",") if r.strip()],
                    "search_type": search_type,
                    "trials": trials,
                    "metric": metric,
                    "validation_choice": validation_choice,
                    "llm_model": llm_model
                }

                return optimize_rag_tool_(json.dumps(payload))


            optimize_rag_tool.__doc__ = OptimizeRequest.__doc__

            optimize_btn.click(
                optimize_rag_tool,
                inputs=[
                    docs_path, retriever, embedding_model, strategy, chunk_sizes,
                    overlaps, rerankers, search_type, trials, metric,
                    validation_choice, llm_model
                ],
                outputs=optimize_out
            )


            with gr.Accordion("➕ More Information", open=False):
                gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
            gr.Markdown("---")


    with gr.Tab("🧩 Generate QA"):
        # Generate QA
        with gr.Column():
            gr.Markdown("## Generate QA")
            gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.")

            with gr.Tab("🧩 Generate QA"):

                with gr.Accordion("⚙ Settings", open=False):
                    docs_path = gr.Textbox(value="data/docs", label="Docs Path")
                    llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model")
                    batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size")
                    min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions")
                    max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions")

                qa_btn = gr.Button("Generate QA", variant="primary")
                qa_out = gr.Textbox(lines=15, label="Response")


                def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
                    return generate_qa_tool_(json.dumps({
                        "docs_path": docs_path,
                        "llm_model": llm_model,
                        "batch_size": batch_size,
                        "min_q": min_q,
                        "max_q": max_q
                    }))


                generate_qa_tool.__doc__ = QARequest.__doc__

                qa_btn.click(
                    generate_qa_tool,
                    inputs=[docs_path, llm_model, batch_size, min_q, max_q],
                    outputs=qa_out
                )

                with gr.Accordion("➕ More Information", open=False):
                    gr.Markdown(QARequest.__doc__ or "No description available.")

                gr.Markdown("---")

    with gr.Tab("🗑️ Clear Cache"):
        # Clear Cache
        with gr.Column():
            gr.Markdown("## Clear Cache")
            gr.Markdown("🗑️ Deletes all files and directories inside docs_path on the server.")
            clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear")
            clear_btn = gr.Button("Clear Cache", variant="primary")
            clear_out = gr.JSON(label="Response")
            clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out)
            gr.Markdown("---")

if __name__ == "__main__":

    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        mcp_server=True,
        show_error=True
    )