import gradio as gr import requests import json import os import threading from models import OptimizeRequest, AutotuneRequest, QARequest from api import start_api # Start FastAPI server in background threading.Thread(target=start_api, daemon=True).start() # Base URL for internal calls BASE_INTERNAL = "http://127.0.0.1:8000" def call_api(endpoint: str, payload: dict) -> str: try: r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120) return json.dumps(r.json(), indent=2) except Exception as e: return str(e) def clear_cache_tool(docs_path="data/docs"): """ 🗑️ Clear Cache MCP Tool. Deletes all files and directories inside docs_path on the server. Args: docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'. """ try: r = requests.post( f"{BASE_INTERNAL}/clear_cache", data={"docs_path": docs_path}, timeout=60 ) r.raise_for_status() return r.json() except Exception as e: return {"error": str(e)} def upload_docs_tool(files, docs_path="data/docs"): """ Upload documents to the server's docs folder via FastAPI /upload_docs. Args: files (list): A list of local file paths, remote URLs, or file-like objects. docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'. """ import shutil, tempfile os.makedirs(docs_path, exist_ok=True) files_payload = [] temp_files = [] try: for f in files: if isinstance(f, str) and f.startswith(("http://", "https://")): # Download URL to a temp file (txt aware) resp = requests.get(f, timeout=60) resp.raise_for_status() # create temp file with proper extension ext = os.path.splitext(f)[1] or ".txt" tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext) if "text" in resp.headers.get("Content-Type", "").lower(): tmp.write(resp.text.encode("utf-8")) else: tmp.write(resp.content) tmp.close() temp_files.append(tmp.name) files_payload.append(("files", open(tmp.name, "rb"))) elif isinstance(f, str): # Local file path files_payload.append(("files", open(f, "rb"))) else: # File-like object files_payload.append(("files", f)) resp = requests.post( f"{BASE_INTERNAL}/upload_docs", files=files_payload, data={"docs_path": docs_path} ) resp.raise_for_status() return resp.json() finally: # Close all file handles for _, file_obj in files_payload: if not file_obj.closed: file_obj.close() # Clean up temp files for tmp_file in temp_files: try: os.unlink(tmp_file) except Exception: pass def optimize_rag_tool_(payload: str) -> str: """🔧 Explicit optimization request: user provides all pipeline configs manually.""" return call_api("/optimize_rag", json.loads(payload)) def autotune_tool_(payload: str) -> str: """🔧 Autotune RAG: recommends chunk sizes and embedding models automatically.""" return call_api("/autotune_rag", json.loads(payload)) def generate_qa_tool_(payload: str) -> str: """🧩 Generates a validation QA dataset for RAG evaluation.""" return call_api("/generate_validation_qa", json.loads(payload)) def model_to_json(model_cls) -> str: return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2) # Default inputs DEFAULT_UPLOAD_PATH = "data/docs" DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest) DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest) DEFAULT_QA_JSON = model_to_json(QARequest) from claude_theme import Claude with gr.Blocks(theme=Claude()) as demo: gr.Markdown("# 🧠 Ragmint MCP Server") gr.HTML("""
HF Space Python PyPI License MCP Status Optuna Google Gemini 2.5 LinkedIn
""") gr.HTML("""
Ragmint Banner
""") gr.Markdown(""" **AI-Powered Optimization for RAG Pipelines** This server provides **6 MCP Tools** for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like **Claude Desktop, Cursor, VS Code MCP Extension**, and more.
## 🔧 MCP Tools - 📄 **Upload Docs**: Upload .txt files to workspace for evaluation using `upload_docs`. - 🔗 **Upload URLs**: Import remote docs via URLs with `upload_urls`. - 🔧 **Optimize RAG**: Full hyperparameter search (Grid/Random/Bayesian) with metrics on `optimize_rag`. - ⚡️ **Autotune RAG**: Automated recommendations for best chunking and embeddings with `autotune`. - 🧩 **Generate QA Dataset**: Create validation QA pairs with LLMs for benchmarking using `generate_qa`. - 🗑️ **Clear Cache**: Reset workspace and delete stored docs with `clear_cache`.
## 🧠 What Ragmint Solves - Automated RAG hyperparameter optimization. - Retriever, embedding, reranker selection. - Synthetic validation QA generation. - Evaluation metrics (faithfulness, latency, etc.). - Experiment tracking & reproducible pipeline comparison. 🔬 **Built for RAG engineers, researchers, and LLM developers** who want consistent performance improvement without trial-and-error.
## ⚙ Powered by - Optuna (Bayesian Optimization). - Google Gemini 2.5 Flash Lite/Pro. - FAISS, Chroma, BM25, scikit-learn retrievers. - Sentence-Transformers/BGE embeddings.
## 🌐 MCP Connection **HuggingFace Space** https://huggingface.co/spaces/andyolivers/ragmint-mcp-server **MCP Endpoint (SSE — Recommended)** https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse
## 📦 Example MCP Use Cases - Run Auto-Optimization for RAG pipelines. - Compare embedding + retriever combinations. - Automatically generate QA validation datasets. - Rapid experiment iteration inside Claude/Cursor. --- """) with gr.Tab("📂 Upload"): with gr.Row(): # Upload Documents with gr.Column(scale=1): gr.Markdown("## Upload Documents") gr.Markdown("📄 Upload files (local paths or URLs) to your `data/docs` folder.") upload_files = gr.File(file_count="multiple", type="filepath") upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path") upload_btn = gr.Button("Upload", variant="primary") upload_out = gr.JSON(label="Response") upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out) # Upload MCP Documents (no file uploader) with gr.Column(scale=1): gr.Markdown("## Upload Documents from URLs") gr.Markdown("🔗 Upload files (URLs) to your `data/docs` folder on MCP.") upload_mcp_input = gr.TextArea( placeholder="Paste URLs (one per line without commas)", label="URLs" ) def upload_urls_tool(text, docs_path): """ Upload documents from a list of URLs to the server's docs folder. Args: text (str): A newline-separated string of document URLs to download. docs_path (str): The destination folder path on the server. Defaults to 'data/docs'. """ urls = [u.strip() for u in text.split("\n") if u.strip()] return upload_docs_tool(urls, docs_path) upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path") upload_mcp_btn = gr.Button("Upload", variant="primary") upload_mcp_out = gr.JSON(label="Response") upload_mcp_btn.click( upload_urls_tool, inputs=[upload_mcp_input, upload_mcp_path], outputs=upload_mcp_out ) gr.Markdown("---") with gr.Tab("⚡ Autotune"): # Autotune RAG with gr.Column(): gr.Markdown("## Autotune RAG") gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.") with gr.Accordion("⚙ Settings", open=False): docs_path = gr.Textbox(value="data/docs", label="Docs Path") embedding_model = gr.Textbox( value="sentence-transformers/all-MiniLM-L6-v2", label="Embedding Model" ) num_chunk_pairs = gr.Slider( minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs" ) metric = gr.Dropdown( choices=["faithfulness"], value="faithfulness", label="Metric" ) search_type = gr.Dropdown( choices=["grid", "random", "bayesian"], value="grid", label="Search Type" ) trials = gr.Slider( minimum=1, maximum=100, step=1, value=5, label="Optimization Trials" ) validation_choice = gr.Dropdown( choices=["generate", ""], value="generate", label="Validation Choice" ) llm_model = gr.Textbox( value="gemini-2.5-flash-lite", label="LLM Model" ) autotune_btn = gr.Button("Autotune", variant="primary") autotune_out = gr.Textbox(label="Response", lines=15) def autotune_tool( docs_path, embedding_model, num_chunk_pairs, metric, search_type, trials, validation_choice, llm_model ): payload = { "docs_path": docs_path, "embedding_model": embedding_model, "num_chunk_pairs": num_chunk_pairs, "metric": metric, "search_type": search_type, "trials": trials, "validation_choice": validation_choice, "llm_model": llm_model } return autotune_tool_(json.dumps(payload)) autotune_tool.__doc__ = AutotuneRequest.__doc__ autotune_btn.click( autotune_tool, inputs=[ docs_path, embedding_model, num_chunk_pairs, metric, search_type, trials, validation_choice, llm_model ], outputs=autotune_out ) with gr.Accordion("➕ More Information", open=False): gr.Markdown(AutotuneRequest.__doc__ or "No description available.") gr.Markdown("---") with gr.Tab("🔧 Optimize"): # Optimize RAG with gr.Column(): gr.Markdown("## Optimize RAG") gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.") # Parameters accordion with gr.Accordion("⚙ Settings", open=False): docs_path = gr.Textbox(value="data/docs", label="Docs Path") retriever = gr.CheckboxGroup( choices=["faiss", "chroma", "numpy","bm25"], value="faiss", label="Search Type" ) embedding_model = gr.Textbox( value="sentence-transformers/all-MiniLM-L6-v2", label="Embedding Model(s) (comma-separated)" ) strategy = gr.CheckboxGroup( choices=["fixed","token","sentence"], value="fixed", label="RAG Strategy" ) chunk_sizes = gr.Textbox( value="200,400,600", label="Chunk Sizes (comma-separated integers)" ) overlaps = gr.Textbox( value="50,100,200", label="Overlaps (comma-separated integers)" ) rerankers = gr.Dropdown( choices=["mmr"], value="mmr", label="Rerankers" ) search_type = gr.Dropdown( choices=["grid", "random", "bayesian"], value="grid", label="Search Type" ) trials = gr.Slider( minimum=1, maximum=100, step=1, value=5, label="Number of Trials" ) metric = gr.Dropdown( choices=["faithfulness"], value="faithfulness", label="Metric" ) validation_choice = gr.Dropdown( choices=["generate", ""], value="generate", label="Validation Choice" ) llm_model = gr.Textbox( value="gemini-2.5-flash-lite", label="LLM Model" ) optimize_btn = gr.Button("Optimize", variant="primary") optimize_out = gr.Textbox(label="Response", lines=15) # Function to convert inputs into payload and call API def optimize_rag_tool( docs_path, retriever, embedding_model, strategy, chunk_sizes, overlaps, rerankers, search_type, trials, metric, validation_choice, llm_model ): payload = { "docs_path": docs_path, "retriever": retriever, "embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()], "strategy": strategy, "chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()], "overlaps": [int(o) for o in overlaps.split(",") if o.strip()], "rerankers": [r.strip() for r in rerankers.split(",") if r.strip()], "search_type": search_type, "trials": trials, "metric": metric, "validation_choice": validation_choice, "llm_model": llm_model } return optimize_rag_tool_(json.dumps(payload)) optimize_rag_tool.__doc__ = OptimizeRequest.__doc__ optimize_btn.click( optimize_rag_tool, inputs=[ docs_path, retriever, embedding_model, strategy, chunk_sizes, overlaps, rerankers, search_type, trials, metric, validation_choice, llm_model ], outputs=optimize_out ) with gr.Accordion("➕ More Information", open=False): gr.Markdown(OptimizeRequest.__doc__ or "No description available.") gr.Markdown("---") with gr.Tab("🧩 Generate QA"): # Generate QA with gr.Column(): gr.Markdown("## Generate QA") gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.") with gr.Tab("🧩 Generate QA"): with gr.Accordion("⚙ Settings", open=False): docs_path = gr.Textbox(value="data/docs", label="Docs Path") llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model") batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size") min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions") max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions") qa_btn = gr.Button("Generate QA", variant="primary") qa_out = gr.Textbox(lines=15, label="Response") def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q): return generate_qa_tool_(json.dumps({ "docs_path": docs_path, "llm_model": llm_model, "batch_size": batch_size, "min_q": min_q, "max_q": max_q })) generate_qa_tool.__doc__ = QARequest.__doc__ qa_btn.click( generate_qa_tool, inputs=[docs_path, llm_model, batch_size, min_q, max_q], outputs=qa_out ) with gr.Accordion("➕ More Information", open=False): gr.Markdown(QARequest.__doc__ or "No description available.") gr.Markdown("---") with gr.Tab("🗑️ Clear Cache"): # Clear Cache with gr.Column(): gr.Markdown("## Clear Cache") gr.Markdown("🗑️ Deletes all files and directories inside docs_path on the server.") clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear") clear_btn = gr.Button("Clear Cache", variant="primary") clear_out = gr.JSON(label="Response") clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out) gr.Markdown("---") if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, mcp_server=True, show_error=True )