Spaces:

MCP-1st-Birthday
/

ragmint-mcp-server

Running

André Oliveira

added linkedin post

b514ecb 17 days ago

19.9 kB

	import gradio as gr
	import requests
	import json
	import os
	import threading
	from models import OptimizeRequest, AutotuneRequest, QARequest
	from api import start_api



	# Start FastAPI server in background
	threading.Thread(target=start_api, daemon=True).start()

	# Base URL for internal calls
	BASE_INTERNAL = "http://127.0.0.1:8000"


	def call_api(endpoint: str, payload: dict) -> str:
	try:
	r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
	return json.dumps(r.json(), indent=2)
	except Exception as e:
	return str(e)


	def clear_cache_tool(docs_path="data/docs"):
	"""
	🗑️ Clear Cache MCP Tool.

	Deletes all files and directories inside docs_path on the server.

	Args:
	docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
	"""
	try:
	r = requests.post(
	f"{BASE_INTERNAL}/clear_cache",
	data={"docs_path": docs_path},
	timeout=60
	)
	r.raise_for_status()
	return r.json()
	except Exception as e:
	return {"error": str(e)}


	def upload_docs_tool(files, docs_path="data/docs"):
	"""
	Upload documents to the server's docs folder via FastAPI /upload_docs.

	Args:
	files (list): A list of local file paths, remote URLs, or file-like objects.
	docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
	"""
	import shutil, tempfile

	os.makedirs(docs_path, exist_ok=True)
	files_payload = []

	temp_files = []

	try:
	for f in files:
	if isinstance(f, str) and f.startswith(("http://", "https://")):
	# Download URL to a temp file (txt aware)
	resp = requests.get(f, timeout=60)
	resp.raise_for_status()

	# create temp file with proper extension
	ext = os.path.splitext(f)[1] or ".txt"
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)

	if "text" in resp.headers.get("Content-Type", "").lower():
	tmp.write(resp.text.encode("utf-8"))
	else:
	tmp.write(resp.content)

	tmp.close()
	temp_files.append(tmp.name)

	files_payload.append(("files", open(tmp.name, "rb")))

	elif isinstance(f, str):
	# Local file path
	files_payload.append(("files", open(f, "rb")))

	else:
	# File-like object
	files_payload.append(("files", f))

	resp = requests.post(
	f"{BASE_INTERNAL}/upload_docs",
	files=files_payload,
	data={"docs_path": docs_path}
	)
	resp.raise_for_status()
	return resp.json()

	finally:
	# Close all file handles
	for _, file_obj in files_payload:
	if not file_obj.closed:
	file_obj.close()
	# Clean up temp files
	for tmp_file in temp_files:
	try:
	os.unlink(tmp_file)
	except Exception:
	pass


	def optimize_rag_tool_(payload: str) -> str:
	"""🔧 Explicit optimization request: user provides all pipeline configs manually."""
	return call_api("/optimize_rag", json.loads(payload))


	def autotune_tool_(payload: str) -> str:
	"""🔧 Autotune RAG: recommends chunk sizes and embedding models automatically."""
	return call_api("/autotune_rag", json.loads(payload))


	def generate_qa_tool_(payload: str) -> str:
	"""🧩 Generates a validation QA dataset for RAG evaluation."""
	return call_api("/generate_validation_qa", json.loads(payload))



	def model_to_json(model_cls) -> str:
	return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)


	# Default inputs
	DEFAULT_UPLOAD_PATH = "data/docs"
	DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
	DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
	DEFAULT_QA_JSON = model_to_json(QARequest)


	from claude_theme import Claude
	with gr.Blocks(theme=Claude()) as demo:
	gr.Markdown("# 🧠 Ragmint MCP Server")

	gr.HTML("""
	<div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;">
	<a href="https://huggingface.co/spaces/MCP-1st-Birthday/ragmint-mcp-server">
	<img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space">
	</a>
	<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
	<a href="https://pypi.org/project/ragmint/">
	<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
	</a>
	<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
	<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
	<img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status">
	<img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna">
	<img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5">
	<a href="https://www.linkedin.com/posts/andyolivers_ragmint-mcp-server-a-hugging-face-space-activity-7399028674261348352-P5wy?utm_source=share&utm_medium=member_desktop&rcm=ACoAABanwk4Bp0A-FVwO9wyzwVp0g_yqZoRDptI">
	<img src="https://img.shields.io/badge/LinkedIn-Post-blue" alt="LinkedIn">
	</a>
	</div>
	""")

	gr.HTML("""
	<style>
	.center-wrapper {
	display: flex;
	justify-content: center;
	align-items: center;
	}
	.center-wrapper img {
	height: 100px !important;
	}
	</style>

	<div class="center-wrapper">
	<img src="https://raw.githubusercontent.com/andyolivers/ragmint/main/src/ragmint/assets/img/ragmint_logo.png" alt="Ragmint Banner">
	</div>
	""")

	gr.Markdown("""
	AI-Powered Optimization for RAG Pipelines

	This server provides 6 MCP Tools for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like Claude Desktop, Cursor, VS Code MCP Extension, and more.

	<br>

	## 🔧 MCP Tools

	- 📄 Upload Docs: Upload .txt files to workspace for evaluation using `upload_docs`.
	- 🔗 Upload URLs: Import remote docs via URLs with `upload_urls`.
	- 🔧 Optimize RAG: Full hyperparameter search (Grid/Random/Bayesian) with metrics on `optimize_rag`.
	- ⚡️ Autotune RAG: Automated recommendations for best chunking and embeddings with `autotune`.
	- 🧩 Generate QA Dataset: Create validation QA pairs with LLMs for benchmarking using `generate_qa`.
	- 🗑️ Clear Cache: Reset workspace and delete stored docs with `clear_cache`.

	<br>

	## 🧠 What Ragmint Solves

	- Automated RAG hyperparameter optimization.
	- Retriever, embedding, reranker selection.
	- Synthetic validation QA generation.
	- Evaluation metrics (faithfulness, latency, etc.).
	- Experiment tracking & reproducible pipeline comparison.

	🔬 Built for RAG engineers, researchers, and LLM developers who want consistent performance improvement without trial-and-error.

	<br>

	## ⚙ Powered by

	- Optuna (Bayesian Optimization).
	- Google Gemini 2.5 Flash Lite/Pro.
	- FAISS, Chroma, BM25, scikit-learn retrievers.
	- Sentence-Transformers/BGE embeddings.

	<br>

	## 🌐 MCP Connection

	HuggingFace Space
	https://huggingface.co/spaces/andyolivers/ragmint-mcp-server

	MCP Endpoint (SSE — Recommended)
	https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse

	<br>

	## 📦 Example MCP Use Cases

	- Run Auto-Optimization for RAG pipelines.
	- Compare embedding + retriever combinations.
	- Automatically generate QA validation datasets.
	- Rapid experiment iteration inside Claude/Cursor.


	---

	""")

	with gr.Tab("📂 Upload"):
	with gr.Row():
	# Upload Documents
	with gr.Column(scale=1):
	gr.Markdown("## Upload Documents")
	gr.Markdown("📄 Upload files (local paths or URLs) to your `data/docs` folder.")
	upload_files = gr.File(file_count="multiple", type="filepath")
	upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
	upload_btn = gr.Button("Upload", variant="primary")
	upload_out = gr.JSON(label="Response")
	upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)

	# Upload MCP Documents (no file uploader)
	with gr.Column(scale=1):
	gr.Markdown("## Upload Documents from URLs")
	gr.Markdown("🔗 Upload files (URLs) to your `data/docs` folder on MCP.")

	upload_mcp_input = gr.TextArea(
	placeholder="Paste URLs (one per line without commas)",
	label="URLs"
	)

	def upload_urls_tool(text, docs_path):
	"""
	Upload documents from a list of URLs to the server's docs folder.

	Args:
	text (str): A newline-separated string of document URLs to download.
	docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
	"""

	urls = [u.strip() for u in text.split("\n") if u.strip()]
	return upload_docs_tool(urls, docs_path)

	upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
	upload_mcp_btn = gr.Button("Upload", variant="primary")
	upload_mcp_out = gr.JSON(label="Response")

	upload_mcp_btn.click(
	upload_urls_tool,
	inputs=[upload_mcp_input, upload_mcp_path],
	outputs=upload_mcp_out
	)

	gr.Markdown("---")



	with gr.Tab("⚡ Autotune"):
	# Autotune RAG
	with gr.Column():
	gr.Markdown("## Autotune RAG")
	gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.")

	with gr.Accordion("⚙ Settings", open=False):
	docs_path = gr.Textbox(value="data/docs", label="Docs Path")

	embedding_model = gr.Textbox(
	value="sentence-transformers/all-MiniLM-L6-v2",
	label="Embedding Model"
	)

	num_chunk_pairs = gr.Slider(
	minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs"
	)

	metric = gr.Dropdown(
	choices=["faithfulness"],
	value="faithfulness",
	label="Metric"
	)

	search_type = gr.Dropdown(
	choices=["grid", "random", "bayesian"],
	value="grid",
	label="Search Type"
	)

	trials = gr.Slider(
	minimum=1, maximum=100, step=1, value=5, label="Optimization Trials"
	)

	validation_choice = gr.Dropdown(
	choices=["generate", ""],
	value="generate",
	label="Validation Choice"
	)

	llm_model = gr.Textbox(
	value="gemini-2.5-flash-lite",
	label="LLM Model"
	)

	autotune_btn = gr.Button("Autotune", variant="primary")
	autotune_out = gr.Textbox(label="Response", lines=15)


	def autotune_tool(
	docs_path, embedding_model, num_chunk_pairs, metric,
	search_type, trials, validation_choice, llm_model
	):

	payload = {
	"docs_path": docs_path,
	"embedding_model": embedding_model,
	"num_chunk_pairs": num_chunk_pairs,
	"metric": metric,
	"search_type": search_type,
	"trials": trials,
	"validation_choice": validation_choice,
	"llm_model": llm_model
	}

	return autotune_tool_(json.dumps(payload))


	autotune_tool.__doc__ = AutotuneRequest.__doc__
	autotune_btn.click(
	autotune_tool,
	inputs=[
	docs_path, embedding_model, num_chunk_pairs, metric,
	search_type, trials, validation_choice, llm_model
	],
	outputs=autotune_out
	)

	with gr.Accordion("➕ More Information", open=False):
	gr.Markdown(AutotuneRequest.__doc__ or "No description available.")

	gr.Markdown("---")


	with gr.Tab("🔧 Optimize"):
	# Optimize RAG
	with gr.Column():
	gr.Markdown("## Optimize RAG")
	gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.")

	# Parameters accordion
	with gr.Accordion("⚙ Settings", open=False):
	docs_path = gr.Textbox(value="data/docs", label="Docs Path")

	retriever = gr.CheckboxGroup(
	choices=["faiss", "chroma", "numpy","bm25"],
	value="faiss",
	label="Search Type"
	)

	embedding_model = gr.Textbox(
	value="sentence-transformers/all-MiniLM-L6-v2",
	label="Embedding Model(s) (comma-separated)"
	)

	strategy = gr.CheckboxGroup(
	choices=["fixed","token","sentence"],
	value="fixed",
	label="RAG Strategy"
	)

	chunk_sizes = gr.Textbox(
	value="200,400,600",
	label="Chunk Sizes (comma-separated integers)"
	)

	overlaps = gr.Textbox(
	value="50,100,200",
	label="Overlaps (comma-separated integers)"
	)

	rerankers = gr.Dropdown(
	choices=["mmr"],
	value="mmr",
	label="Rerankers"
	)

	search_type = gr.Dropdown(
	choices=["grid", "random", "bayesian"],
	value="grid",
	label="Search Type"
	)

	trials = gr.Slider(
	minimum=1, maximum=100, step=1, value=5,
	label="Number of Trials"
	)

	metric = gr.Dropdown(
	choices=["faithfulness"],
	value="faithfulness",
	label="Metric"
	)

	validation_choice = gr.Dropdown(
	choices=["generate", ""],
	value="generate",
	label="Validation Choice"
	)

	llm_model = gr.Textbox(
	value="gemini-2.5-flash-lite",
	label="LLM Model"
	)

	optimize_btn = gr.Button("Optimize", variant="primary")
	optimize_out = gr.Textbox(label="Response", lines=15)


	# Function to convert inputs into payload and call API
	def optimize_rag_tool(
	docs_path, retriever, embedding_model, strategy, chunk_sizes,
	overlaps, rerankers, search_type, trials, metric,
	validation_choice, llm_model
	):

	payload = {
	"docs_path": docs_path,
	"retriever": retriever,
	"embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()],
	"strategy": strategy,
	"chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()],
	"overlaps": [int(o) for o in overlaps.split(",") if o.strip()],
	"rerankers": [r.strip() for r in rerankers.split(",") if r.strip()],
	"search_type": search_type,
	"trials": trials,
	"metric": metric,
	"validation_choice": validation_choice,
	"llm_model": llm_model
	}

	return optimize_rag_tool_(json.dumps(payload))


	optimize_rag_tool.__doc__ = OptimizeRequest.__doc__

	optimize_btn.click(
	optimize_rag_tool,
	inputs=[
	docs_path, retriever, embedding_model, strategy, chunk_sizes,
	overlaps, rerankers, search_type, trials, metric,
	validation_choice, llm_model
	],
	outputs=optimize_out
	)


	with gr.Accordion("➕ More Information", open=False):
	gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
	gr.Markdown("---")


	with gr.Tab("🧩 Generate QA"):
	# Generate QA
	with gr.Column():
	gr.Markdown("## Generate QA")
	gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.")

	with gr.Tab("🧩 Generate QA"):

	with gr.Accordion("⚙ Settings", open=False):
	docs_path = gr.Textbox(value="data/docs", label="Docs Path")
	llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model")
	batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size")
	min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions")
	max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions")

	qa_btn = gr.Button("Generate QA", variant="primary")
	qa_out = gr.Textbox(lines=15, label="Response")


	def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
	return generate_qa_tool_(json.dumps({
	"docs_path": docs_path,
	"llm_model": llm_model,
	"batch_size": batch_size,
	"min_q": min_q,
	"max_q": max_q
	}))


	generate_qa_tool.__doc__ = QARequest.__doc__

	qa_btn.click(
	generate_qa_tool,
	inputs=[docs_path, llm_model, batch_size, min_q, max_q],
	outputs=qa_out
	)

	with gr.Accordion("➕ More Information", open=False):
	gr.Markdown(QARequest.__doc__ or "No description available.")

	gr.Markdown("---")

	with gr.Tab("🗑️ Clear Cache"):
	# Clear Cache
	with gr.Column():
	gr.Markdown("## Clear Cache")
	gr.Markdown("🗑️ Deletes all files and directories inside docs_path on the server.")
	clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear")
	clear_btn = gr.Button("Clear Cache", variant="primary")
	clear_out = gr.JSON(label="Response")
	clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out)
	gr.Markdown("---")

	if __name__ == "__main__":

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	mcp_server=True,
	show_error=True
	)