André Oliveira
added linkedin post
b514ecb
import gradio as gr
import requests
import json
import os
import threading
from models import OptimizeRequest, AutotuneRequest, QARequest
from api import start_api
# Start FastAPI server in background
threading.Thread(target=start_api, daemon=True).start()
# Base URL for internal calls
BASE_INTERNAL = "http://127.0.0.1:8000"
def call_api(endpoint: str, payload: dict) -> str:
try:
r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
return json.dumps(r.json(), indent=2)
except Exception as e:
return str(e)
def clear_cache_tool(docs_path="data/docs"):
"""
🗑️ Clear Cache MCP Tool.
Deletes all files and directories inside docs_path on the server.
Args:
docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
"""
try:
r = requests.post(
f"{BASE_INTERNAL}/clear_cache",
data={"docs_path": docs_path},
timeout=60
)
r.raise_for_status()
return r.json()
except Exception as e:
return {"error": str(e)}
def upload_docs_tool(files, docs_path="data/docs"):
"""
Upload documents to the server's docs folder via FastAPI /upload_docs.
Args:
files (list): A list of local file paths, remote URLs, or file-like objects.
docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
"""
import shutil, tempfile
os.makedirs(docs_path, exist_ok=True)
files_payload = []
temp_files = []
try:
for f in files:
if isinstance(f, str) and f.startswith(("http://", "https://")):
# Download URL to a temp file (txt aware)
resp = requests.get(f, timeout=60)
resp.raise_for_status()
# create temp file with proper extension
ext = os.path.splitext(f)[1] or ".txt"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
if "text" in resp.headers.get("Content-Type", "").lower():
tmp.write(resp.text.encode("utf-8"))
else:
tmp.write(resp.content)
tmp.close()
temp_files.append(tmp.name)
files_payload.append(("files", open(tmp.name, "rb")))
elif isinstance(f, str):
# Local file path
files_payload.append(("files", open(f, "rb")))
else:
# File-like object
files_payload.append(("files", f))
resp = requests.post(
f"{BASE_INTERNAL}/upload_docs",
files=files_payload,
data={"docs_path": docs_path}
)
resp.raise_for_status()
return resp.json()
finally:
# Close all file handles
for _, file_obj in files_payload:
if not file_obj.closed:
file_obj.close()
# Clean up temp files
for tmp_file in temp_files:
try:
os.unlink(tmp_file)
except Exception:
pass
def optimize_rag_tool_(payload: str) -> str:
"""🔧 Explicit optimization request: user provides all pipeline configs manually."""
return call_api("/optimize_rag", json.loads(payload))
def autotune_tool_(payload: str) -> str:
"""🔧 Autotune RAG: recommends chunk sizes and embedding models automatically."""
return call_api("/autotune_rag", json.loads(payload))
def generate_qa_tool_(payload: str) -> str:
"""🧩 Generates a validation QA dataset for RAG evaluation."""
return call_api("/generate_validation_qa", json.loads(payload))
def model_to_json(model_cls) -> str:
return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
# Default inputs
DEFAULT_UPLOAD_PATH = "data/docs"
DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
DEFAULT_QA_JSON = model_to_json(QARequest)
from claude_theme import Claude
with gr.Blocks(theme=Claude()) as demo:
gr.Markdown("# 🧠 Ragmint MCP Server")
gr.HTML("""
<div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;">
<a href="https://huggingface.co/spaces/MCP-1st-Birthday/ragmint-mcp-server">
<img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space">
</a>
<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
<a href="https://pypi.org/project/ragmint/">
<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
</a>
<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
<img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status">
<img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna">
<img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5">
<a href="https://www.linkedin.com/posts/andyolivers_ragmint-mcp-server-a-hugging-face-space-activity-7399028674261348352-P5wy?utm_source=share&utm_medium=member_desktop&rcm=ACoAABanwk4Bp0A-FVwO9wyzwVp0g_yqZoRDptI">
<img src="https://img.shields.io/badge/LinkedIn-Post-blue" alt="LinkedIn">
</a>
</div>
""")
gr.HTML("""
<style>
.center-wrapper {
display: flex;
justify-content: center;
align-items: center;
}
.center-wrapper img {
height: 100px !important;
}
</style>
<div class="center-wrapper">
<img src="https://raw.githubusercontent.com/andyolivers/ragmint/main/src/ragmint/assets/img/ragmint_logo.png" alt="Ragmint Banner">
</div>
""")
gr.Markdown("""
**AI-Powered Optimization for RAG Pipelines**
This server provides **6 MCP Tools** for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like **Claude Desktop, Cursor, VS Code MCP Extension**, and more.
<br>
## 🔧 MCP Tools
- 📄 **Upload Docs**: Upload .txt files to workspace for evaluation using `upload_docs`.
- 🔗 **Upload URLs**: Import remote docs via URLs with `upload_urls`.
- 🔧 **Optimize RAG**: Full hyperparameter search (Grid/Random/Bayesian) with metrics on `optimize_rag`.
- ⚡️ **Autotune RAG**: Automated recommendations for best chunking and embeddings with `autotune`.
- 🧩 **Generate QA Dataset**: Create validation QA pairs with LLMs for benchmarking using `generate_qa`.
- 🗑️ **Clear Cache**: Reset workspace and delete stored docs with `clear_cache`.
<br>
## 🧠 What Ragmint Solves
- Automated RAG hyperparameter optimization.
- Retriever, embedding, reranker selection.
- Synthetic validation QA generation.
- Evaluation metrics (faithfulness, latency, etc.).
- Experiment tracking & reproducible pipeline comparison.
🔬 **Built for RAG engineers, researchers, and LLM developers** who want consistent performance improvement without trial-and-error.
<br>
## ⚙ Powered by
- Optuna (Bayesian Optimization).
- Google Gemini 2.5 Flash Lite/Pro.
- FAISS, Chroma, BM25, scikit-learn retrievers.
- Sentence-Transformers/BGE embeddings.
<br>
## 🌐 MCP Connection
**HuggingFace Space**
https://huggingface.co/spaces/andyolivers/ragmint-mcp-server
**MCP Endpoint (SSE — Recommended)**
https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse
<br>
## 📦 Example MCP Use Cases
- Run Auto-Optimization for RAG pipelines.
- Compare embedding + retriever combinations.
- Automatically generate QA validation datasets.
- Rapid experiment iteration inside Claude/Cursor.
---
""")
with gr.Tab("📂 Upload"):
with gr.Row():
# Upload Documents
with gr.Column(scale=1):
gr.Markdown("## Upload Documents")
gr.Markdown("📄 Upload files (local paths or URLs) to your `data/docs` folder.")
upload_files = gr.File(file_count="multiple", type="filepath")
upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
upload_btn = gr.Button("Upload", variant="primary")
upload_out = gr.JSON(label="Response")
upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)
# Upload MCP Documents (no file uploader)
with gr.Column(scale=1):
gr.Markdown("## Upload Documents from URLs")
gr.Markdown("🔗 Upload files (URLs) to your `data/docs` folder on MCP.")
upload_mcp_input = gr.TextArea(
placeholder="Paste URLs (one per line without commas)",
label="URLs"
)
def upload_urls_tool(text, docs_path):
"""
Upload documents from a list of URLs to the server's docs folder.
Args:
text (str): A newline-separated string of document URLs to download.
docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
"""
urls = [u.strip() for u in text.split("\n") if u.strip()]
return upload_docs_tool(urls, docs_path)
upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
upload_mcp_btn = gr.Button("Upload", variant="primary")
upload_mcp_out = gr.JSON(label="Response")
upload_mcp_btn.click(
upload_urls_tool,
inputs=[upload_mcp_input, upload_mcp_path],
outputs=upload_mcp_out
)
gr.Markdown("---")
with gr.Tab("⚡ Autotune"):
# Autotune RAG
with gr.Column():
gr.Markdown("## Autotune RAG")
gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.")
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
embedding_model = gr.Textbox(
value="sentence-transformers/all-MiniLM-L6-v2",
label="Embedding Model"
)
num_chunk_pairs = gr.Slider(
minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs"
)
metric = gr.Dropdown(
choices=["faithfulness"],
value="faithfulness",
label="Metric"
)
search_type = gr.Dropdown(
choices=["grid", "random", "bayesian"],
value="grid",
label="Search Type"
)
trials = gr.Slider(
minimum=1, maximum=100, step=1, value=5, label="Optimization Trials"
)
validation_choice = gr.Dropdown(
choices=["generate", ""],
value="generate",
label="Validation Choice"
)
llm_model = gr.Textbox(
value="gemini-2.5-flash-lite",
label="LLM Model"
)
autotune_btn = gr.Button("Autotune", variant="primary")
autotune_out = gr.Textbox(label="Response", lines=15)
def autotune_tool(
docs_path, embedding_model, num_chunk_pairs, metric,
search_type, trials, validation_choice, llm_model
):
payload = {
"docs_path": docs_path,
"embedding_model": embedding_model,
"num_chunk_pairs": num_chunk_pairs,
"metric": metric,
"search_type": search_type,
"trials": trials,
"validation_choice": validation_choice,
"llm_model": llm_model
}
return autotune_tool_(json.dumps(payload))
autotune_tool.__doc__ = AutotuneRequest.__doc__
autotune_btn.click(
autotune_tool,
inputs=[
docs_path, embedding_model, num_chunk_pairs, metric,
search_type, trials, validation_choice, llm_model
],
outputs=autotune_out
)
with gr.Accordion("➕ More Information", open=False):
gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🔧 Optimize"):
# Optimize RAG
with gr.Column():
gr.Markdown("## Optimize RAG")
gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.")
# Parameters accordion
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
retriever = gr.CheckboxGroup(
choices=["faiss", "chroma", "numpy","bm25"],
value="faiss",
label="Search Type"
)
embedding_model = gr.Textbox(
value="sentence-transformers/all-MiniLM-L6-v2",
label="Embedding Model(s) (comma-separated)"
)
strategy = gr.CheckboxGroup(
choices=["fixed","token","sentence"],
value="fixed",
label="RAG Strategy"
)
chunk_sizes = gr.Textbox(
value="200,400,600",
label="Chunk Sizes (comma-separated integers)"
)
overlaps = gr.Textbox(
value="50,100,200",
label="Overlaps (comma-separated integers)"
)
rerankers = gr.Dropdown(
choices=["mmr"],
value="mmr",
label="Rerankers"
)
search_type = gr.Dropdown(
choices=["grid", "random", "bayesian"],
value="grid",
label="Search Type"
)
trials = gr.Slider(
minimum=1, maximum=100, step=1, value=5,
label="Number of Trials"
)
metric = gr.Dropdown(
choices=["faithfulness"],
value="faithfulness",
label="Metric"
)
validation_choice = gr.Dropdown(
choices=["generate", ""],
value="generate",
label="Validation Choice"
)
llm_model = gr.Textbox(
value="gemini-2.5-flash-lite",
label="LLM Model"
)
optimize_btn = gr.Button("Optimize", variant="primary")
optimize_out = gr.Textbox(label="Response", lines=15)
# Function to convert inputs into payload and call API
def optimize_rag_tool(
docs_path, retriever, embedding_model, strategy, chunk_sizes,
overlaps, rerankers, search_type, trials, metric,
validation_choice, llm_model
):
payload = {
"docs_path": docs_path,
"retriever": retriever,
"embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()],
"strategy": strategy,
"chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()],
"overlaps": [int(o) for o in overlaps.split(",") if o.strip()],
"rerankers": [r.strip() for r in rerankers.split(",") if r.strip()],
"search_type": search_type,
"trials": trials,
"metric": metric,
"validation_choice": validation_choice,
"llm_model": llm_model
}
return optimize_rag_tool_(json.dumps(payload))
optimize_rag_tool.__doc__ = OptimizeRequest.__doc__
optimize_btn.click(
optimize_rag_tool,
inputs=[
docs_path, retriever, embedding_model, strategy, chunk_sizes,
overlaps, rerankers, search_type, trials, metric,
validation_choice, llm_model
],
outputs=optimize_out
)
with gr.Accordion("➕ More Information", open=False):
gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🧩 Generate QA"):
# Generate QA
with gr.Column():
gr.Markdown("## Generate QA")
gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.")
with gr.Tab("🧩 Generate QA"):
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model")
batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size")
min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions")
max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions")
qa_btn = gr.Button("Generate QA", variant="primary")
qa_out = gr.Textbox(lines=15, label="Response")
def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
return generate_qa_tool_(json.dumps({
"docs_path": docs_path,
"llm_model": llm_model,
"batch_size": batch_size,
"min_q": min_q,
"max_q": max_q
}))
generate_qa_tool.__doc__ = QARequest.__doc__
qa_btn.click(
generate_qa_tool,
inputs=[docs_path, llm_model, batch_size, min_q, max_q],
outputs=qa_out
)
with gr.Accordion("➕ More Information", open=False):
gr.Markdown(QARequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🗑️ Clear Cache"):
# Clear Cache
with gr.Column():
gr.Markdown("## Clear Cache")
gr.Markdown("🗑️ Deletes all files and directories inside docs_path on the server.")
clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear")
clear_btn = gr.Button("Clear Cache", variant="primary")
clear_out = gr.JSON(label="Response")
clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out)
gr.Markdown("---")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
mcp_server=True,
show_error=True
)