AndrΓ© Oliveira commited on
Commit
4f9b2d4
Β·
1 Parent(s): ab0a773

refactor: added docstrings and verbose

Browse files
Files changed (2) hide show
  1. app.py +42 -6
  2. models.py +37 -2
app.py CHANGED
@@ -3,16 +3,16 @@ import requests
3
  import json
4
  import os
5
  import shutil
6
- from models import OptimizeRequest, AutotuneRequest, QARequest
7
  import threading
 
8
  from api import start_api
9
 
10
  threading.Thread(target=start_api, daemon=True).start()
11
 
12
-
13
  # Base URL for internal calls
14
  BASE_INTERNAL = "http://127.0.0.1:8000"
15
 
 
16
  def call_api(endpoint: str, payload: dict) -> str:
17
  try:
18
  r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
@@ -20,7 +20,18 @@ def call_api(endpoint: str, payload: dict) -> str:
20
  except Exception as e:
21
  return str(e)
22
 
 
23
  def upload_docs_tool(files, docs_path="data/docs"):
 
 
 
 
 
 
 
 
 
 
24
  os.makedirs(docs_path, exist_ok=True)
25
  saved = []
26
  for f in files:
@@ -30,18 +41,33 @@ def upload_docs_tool(files, docs_path="data/docs"):
30
  saved.append(fname)
31
  return {"status": "ok", "uploaded_files": saved, "docs_path": docs_path}
32
 
 
33
  def optimize_rag_tool(payload: str) -> str:
 
34
  return call_api("/optimize_rag", json.loads(payload))
35
 
 
36
  def autotune_tool(payload: str) -> str:
 
37
  return call_api("/autotune_rag", json.loads(payload))
38
 
 
39
  def generate_qa_tool(payload: str) -> str:
 
40
  return call_api("/generate_validation_qa", json.loads(payload))
41
 
 
 
 
 
 
 
 
42
  def model_to_json(model_cls) -> str:
43
  return json.dumps({k: v.default for k, v in model_cls.__fields__.items()}, indent=2)
44
 
 
 
45
  DEFAULT_UPLOAD_PATH = "data/docs"
46
  DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
47
  DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
@@ -49,8 +75,11 @@ DEFAULT_QA_JSON = model_to_json(QARequest)
49
 
50
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
51
  gr.Markdown("# Ragmint MCP Client")
 
 
52
  with gr.Column():
53
  gr.Markdown("## Upload Documents")
 
54
  upload_files = gr.File(file_count="multiple", type="filepath")
55
  upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
56
  upload_btn = gr.Button("Upload", variant="primary")
@@ -58,27 +87,33 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
58
  upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)
59
  gr.Markdown("---")
60
 
 
61
  with gr.Column():
62
  gr.Markdown("## Optimize RAG")
 
63
  optimize_input = gr.Textbox(lines=12, value=DEFAULT_OPTIMIZE_JSON, label="OptimizeRequest JSON")
64
  optimize_btn = gr.Button("Submit", variant="primary")
65
- optimize_out = gr.Textbox(lines=15,label="Response")
66
  optimize_btn.click(optimize_rag_tool, inputs=optimize_input, outputs=optimize_out)
67
  gr.Markdown("---")
68
 
 
69
  with gr.Column():
70
  gr.Markdown("## Autotune RAG")
 
71
  autotune_input = gr.Textbox(lines=12, value=DEFAULT_AUTOTUNE_JSON, label="AutotuneRequest JSON")
72
  autotune_btn = gr.Button("Submit", variant="primary")
73
  autotune_out = gr.Textbox(lines=15)
74
  autotune_btn.click(autotune_tool, inputs=autotune_input, outputs=autotune_out)
75
  gr.Markdown("---")
76
 
 
77
  with gr.Column():
78
  gr.Markdown("## Generate QA")
 
79
  qa_input = gr.Textbox(lines=12, value=DEFAULT_QA_JSON, label="QARequest JSON")
80
  qa_btn = gr.Button("Submit", variant="primary")
81
- qa_out = gr.Textbox(lines=15,label="Response")
82
  qa_btn.click(generate_qa_tool, inputs=qa_input, outputs=qa_out)
83
  gr.Markdown("---")
84
 
@@ -86,5 +121,6 @@ if __name__ == "__main__":
86
  demo.launch(
87
  server_name="0.0.0.0",
88
  server_port=7860,
89
- mcp_server=True
90
- )
 
 
3
  import json
4
  import os
5
  import shutil
 
6
  import threading
7
+ from models import OptimizeRequest, AutotuneRequest, QARequest
8
  from api import start_api
9
 
10
  threading.Thread(target=start_api, daemon=True).start()
11
 
 
12
  # Base URL for internal calls
13
  BASE_INTERNAL = "http://127.0.0.1:8000"
14
 
15
+
16
  def call_api(endpoint: str, payload: dict) -> str:
17
  try:
18
  r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
 
20
  except Exception as e:
21
  return str(e)
22
 
23
+
24
  def upload_docs_tool(files, docs_path="data/docs"):
25
+ """
26
+ Upload documents to the server's docs folder.
27
+
28
+ Parameters:
29
+ - files: List of file paths to upload
30
+ - docs_path: Destination folder (default: data/docs)
31
+
32
+ Returns:
33
+ - dict with uploaded filenames and docs_path
34
+ """
35
  os.makedirs(docs_path, exist_ok=True)
36
  saved = []
37
  for f in files:
 
41
  saved.append(fname)
42
  return {"status": "ok", "uploaded_files": saved, "docs_path": docs_path}
43
 
44
+
45
  def optimize_rag_tool(payload: str) -> str:
46
+ """πŸ”§ Explicit optimization request: user provides all pipeline configs manually."""
47
  return call_api("/optimize_rag", json.loads(payload))
48
 
49
+
50
  def autotune_tool(payload: str) -> str:
51
+ """πŸ”§ Autotune RAG: recommends chunk sizes and embedding models automatically."""
52
  return call_api("/autotune_rag", json.loads(payload))
53
 
54
+
55
  def generate_qa_tool(payload: str) -> str:
56
+ """🧩 Generates a validation QA dataset for RAG evaluation."""
57
  return call_api("/generate_validation_qa", json.loads(payload))
58
 
59
+
60
+ # Dynamically assign Pydantic model docstrings to MCP tool functions
61
+ optimize_rag_tool.__doc__ = OptimizeRequest.__doc__
62
+ autotune_tool.__doc__ = AutotuneRequest.__doc__
63
+ generate_qa_tool.__doc__ = QARequest.__doc__
64
+
65
+
66
  def model_to_json(model_cls) -> str:
67
  return json.dumps({k: v.default for k, v in model_cls.__fields__.items()}, indent=2)
68
 
69
+
70
+ # Default inputs
71
  DEFAULT_UPLOAD_PATH = "data/docs"
72
  DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
73
  DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
 
75
 
76
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
  gr.Markdown("# Ragmint MCP Client")
78
+
79
+ # Upload Documents
80
  with gr.Column():
81
  gr.Markdown("## Upload Documents")
82
+ gr.Markdown("πŸ“‚ Upload files to your `data/docs` folder")
83
  upload_files = gr.File(file_count="multiple", type="filepath")
84
  upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
85
  upload_btn = gr.Button("Upload", variant="primary")
 
87
  upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)
88
  gr.Markdown("---")
89
 
90
+ # Optimize RAG
91
  with gr.Column():
92
  gr.Markdown("## Optimize RAG")
93
+ gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
94
  optimize_input = gr.Textbox(lines=12, value=DEFAULT_OPTIMIZE_JSON, label="OptimizeRequest JSON")
95
  optimize_btn = gr.Button("Submit", variant="primary")
96
+ optimize_out = gr.Textbox(lines=15, label="Response")
97
  optimize_btn.click(optimize_rag_tool, inputs=optimize_input, outputs=optimize_out)
98
  gr.Markdown("---")
99
 
100
+ # Autotune RAG
101
  with gr.Column():
102
  gr.Markdown("## Autotune RAG")
103
+ gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
104
  autotune_input = gr.Textbox(lines=12, value=DEFAULT_AUTOTUNE_JSON, label="AutotuneRequest JSON")
105
  autotune_btn = gr.Button("Submit", variant="primary")
106
  autotune_out = gr.Textbox(lines=15)
107
  autotune_btn.click(autotune_tool, inputs=autotune_input, outputs=autotune_out)
108
  gr.Markdown("---")
109
 
110
+ # Generate QA
111
  with gr.Column():
112
  gr.Markdown("## Generate QA")
113
+ gr.Markdown(QARequest.__doc__ or "No description available.")
114
  qa_input = gr.Textbox(lines=12, value=DEFAULT_QA_JSON, label="QARequest JSON")
115
  qa_btn = gr.Button("Submit", variant="primary")
116
+ qa_out = gr.Textbox(lines=15, label="Response")
117
  qa_btn.click(generate_qa_tool, inputs=qa_input, outputs=qa_out)
118
  gr.Markdown("---")
119
 
 
121
  demo.launch(
122
  server_name="0.0.0.0",
123
  server_port=7860,
124
+ mcp_server=True,
125
+ show_error=True
126
+ )
models.py CHANGED
@@ -6,7 +6,21 @@ from pydantic import BaseModel, Field
6
  # Models
7
  class OptimizeRequest(BaseModel):
8
  """
9
- πŸ”§ Explicit optimization request: user provides all pipeline configs manually.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  """
11
  docs_path: Optional[str] = Field(
12
  default="data/docs",
@@ -66,6 +80,20 @@ class OptimizeRequest(BaseModel):
66
 
67
 
68
  class AutotuneRequest(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  docs_path: Optional[str] = Field(
70
  default="data/docs",
71
  description="πŸ“‚ Folder containing your documents for RAG optimization. Example: 'data/docs'"
@@ -108,7 +136,14 @@ class AutotuneRequest(BaseModel):
108
 
109
  class QARequest(BaseModel):
110
  """
111
- 🧩 Generates a validation QA dataset for RAG evaluation.
 
 
 
 
 
 
 
112
  """
113
  docs_path: str = Field(
114
  description="πŸ“‚ Folder containing your documents to generate QA pairs from. Example: 'data/docs'",
 
6
  # Models
7
  class OptimizeRequest(BaseModel):
8
  """
9
+ πŸ”§ Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
10
+
11
+ Parameters:
12
+ - docs_path (str, optional): πŸ“‚ Folder containing your documents for RAG optimization. Default: "data/docs"
13
+ - retriever (List[str], optional): πŸ” Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
14
+ - embedding_model (List[str], optional): 🧠 Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
15
+ - strategy (List[str], optional): 🎯 RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
16
+ - chunk_sizes (List[int], optional): πŸ“ List of chunk sizes to evaluate. Default: [200, 400, 600]
17
+ - overlaps (List[int], optional): πŸ” List of overlap values to test. Default: [50, 100, 200]
18
+ - rerankers (List[str], optional): βš–οΈ Rerankers to apply after retrieval. Default: ['mmr']
19
+ - search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
20
+ - trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
21
+ - metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
22
+ - validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
23
+ - llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
24
  """
25
  docs_path: Optional[str] = Field(
26
  default="data/docs",
 
80
 
81
 
82
  class AutotuneRequest(BaseModel):
83
+ """
84
+ ⚑ Automatically tunes RAG pipeline parameters based on document analysis.
85
+
86
+ Parameters:
87
+ - docs_path (str, optional): πŸ“‚ Folder containing documents for RAG optimization. Default: "data/docs"
88
+ - embedding_model (str, optional): 🧠 Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
89
+ - num_chunk_pairs (int, optional): πŸ”’ Number of chunk pairs to analyze. Default: 5
90
+ - metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
91
+ - search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
92
+ - trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
93
+ - validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
94
+ - llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
95
+ """
96
+
97
  docs_path: Optional[str] = Field(
98
  default="data/docs",
99
  description="πŸ“‚ Folder containing your documents for RAG optimization. Example: 'data/docs'"
 
136
 
137
  class QARequest(BaseModel):
138
  """
139
+ 🧩 Generate a validation QA dataset from documents for RAG evaluation.
140
+
141
+ Parameters:
142
+ - docs_path (str): πŸ“‚ Folder containing documents. Default: 'data/docs'
143
+ - llm_model (str): πŸ€– LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
144
+ - batch_size (int): πŸ“¦ Number of documents per batch. Default: 5
145
+ - min_q (int): ❓ Minimum number of questions per document. Default: 3
146
+ - max_q (int): ❓ Maximum number of questions per document. Default: 25
147
  """
148
  docs_path: str = Field(
149
  description="πŸ“‚ Folder containing your documents to generate QA pairs from. Example: 'data/docs'",