scott12355 commited on
Commit
f1a8641
·
1 Parent(s): dbc3ed8

first test - working locally

Browse files
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the Python 3.9
2
+ FROM python:3.9
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the current directory contents into the container at /code
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Set up a new user named "user" with user ID 1000
14
+ RUN useradd -m -u 1000 user
15
+ # Switch to the "user" user
16
+ USER user
17
+ # Set home to the user's home directory
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ # Set the working directory to the user's home directory
22
+ WORKDIR $HOME/app
23
+
24
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
+ COPY --chown=user . $HOME/app
26
+
27
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
Supabase.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from supabase import create_client, Client
4
+ from fastapi import HTTPException
5
+ from typing import List, Dict
6
+
7
+ def initSupabase():
8
+ # Load environment variables from .env file
9
+ load_dotenv()
10
+
11
+ url: str = os.environ.get("SUPABASE_URL")
12
+ key: str = os.environ.get("SUPABASE_SERVICE_ROLE_KEY")
13
+
14
+ # Add error checking
15
+ if not url or not key:
16
+ raise ValueError("Supabase URL and key must be set in environment variables")
17
+
18
+ supabase: Client = create_client(url, key)
19
+ print("Supabase client initialized")
20
+ return supabase
21
+
22
+
23
+ def updateSupabaseChatHistory(generated_text: List[Dict[str, str]], chat_id: int, supabase: Client, status: bool = False):
24
+ """
25
+ Updates the chat history in Supabase.
26
+
27
+ Args:
28
+ generated_text: The generated text to add to the chat history.
29
+ chat_id: The ID of the chat to update.
30
+
31
+ Raises:
32
+ HTTPException: If there is an error updating Supabase.
33
+ """
34
+ try:
35
+ response = supabase.table("Chats").update({"chat_history": generated_text, "awaiting_response": status}).eq("id", chat_id).execute()
36
+ if hasattr(response, 'error') and response.error:
37
+ raise HTTPException(
38
+ status_code=500, detail=f"Error updating chat history: {response.error}"
39
+ )
40
+ except Exception as e:
41
+ raise HTTPException(
42
+ status_code=500, detail=f"Error updating Supabase: {str(e)}"
43
+ ) from e
44
+
45
+ def updateSupabaseChatStatus(status: bool, chat_id: int, supabase: Client):
46
+ """
47
+ Updates the status of a chat in Supabase.
48
+
49
+ Args:
50
+ status: The status to update the chat to.
51
+ chat_id: The ID of the chat to update.
52
+
53
+ Raises:
54
+ HTTPException: If there is an error updating Supabase.
55
+ """
56
+ try:
57
+ response = supabase.table("Chats").update({"awaiting_response": status}).eq("id", chat_id).execute()
58
+ if hasattr(response, 'error') and response.error:
59
+ raise HTTPException(
60
+ status_code=500, detail=f"Error updating chat status: {response.error}"
61
+ )
62
+ except Exception as e:
63
+ raise HTTPException(
64
+ status_code=500, detail=f"Error updating Supabase: {str(e)}"
65
+ ) from e
VectorDB.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from sentence_transformers import SentenceTransformer
3
+ from config import RAG_CONFIG
4
+ import os
5
+ from PyPDF2 import PdfReader
6
+ import chromadb
7
+
8
+ # Initialize the embeddings model
9
+ embeddings_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
10
+
11
+ # Create or get collection
12
+ chroma_client = chromadb.PersistentClient(path="./chroma_db")
13
+ # Initialize ChromaDB client
14
+ collection = chroma_client.get_or_create_collection(
15
+ name="RagDocuments",
16
+ metadata={
17
+ "hnsw:space": "cosine"
18
+ }, # cosine similarity will be used to measure the distance between vectors
19
+ )
20
+
21
+ def initRAG(device):
22
+ # Initialize documents if collection is empty
23
+ if collection.count() == 0:
24
+ print("Loading documents into ChromaDB...")
25
+ texts = load_pdfs(RAG_CONFIG["path"])
26
+ all_chunks = []
27
+ for text in texts:
28
+ all_chunks.extend(chunk_text(text, chunk_size=100, overlap=5))
29
+
30
+ # Generate embeddings and add to ChromaDB
31
+ embeddings = embeddings_model.encode(all_chunks)
32
+ collection.add(
33
+ embeddings=embeddings.tolist(),
34
+ documents=all_chunks,
35
+ ids=[f"doc_{i}" for i in range(len(all_chunks))],
36
+ )
37
+
38
+
39
+
40
+
41
+ def load_pdfs(directory):
42
+ texts = []
43
+ for filename in os.listdir(directory):
44
+ if filename.endswith(".pdf"):
45
+ filepath = os.path.join(directory, filename)
46
+ with open(filepath, "rb") as file:
47
+ pdf = PdfReader(file)
48
+ for page in pdf.pages:
49
+ texts.append(page.extract_text())
50
+ return texts
51
+
52
+
53
+ def chunk_text(text, chunk_size=100, overlap=10):
54
+ words = text.split()
55
+ chunks = []
56
+ i = 0
57
+
58
+ while i < len(words):
59
+ # Calculate end index for current chunk
60
+ end = min(i + chunk_size, len(words))
61
+ # Create chunk from words
62
+ chunk = " ".join(words[i:end])
63
+ chunks.append(chunk)
64
+ # Move index forward by chunk_size - overlap
65
+ i += chunk_size - overlap
66
+
67
+ # If near the end and have leftover words that are less than overlap
68
+ if i < len(words) and len(words) - i < overlap:
69
+ break
70
+
71
+ # Add final chunk if there are remaining words
72
+ if i < len(words):
73
+ chunks.append(" ".join(words[i:]))
74
+
75
+ return chunks
76
+
77
+
78
+ def search_docs(query, top_k=3):
79
+ query_embedding = embeddings_model.encode(query)
80
+ results = collection.query(
81
+ query_embeddings=[query_embedding.tolist()], n_results=top_k
82
+ )
83
+
84
+
85
+ return "".join(
86
+ f"Result {i + 1}:\n{doc}\n\n" for i, doc in enumerate(results["documents"][0]) # type: ignore
87
+ )
__pycache__/InitRAG.cpython-39.pyc ADDED
Binary file (1.45 kB). View file
 
__pycache__/Supabase.cpython-39.pyc ADDED
Binary file (2.27 kB). View file
 
__pycache__/VectorDB.cpython-39.pyc ADDED
Binary file (2.52 kB). View file
 
__pycache__/api_schemas.cpython-39.pyc ADDED
Binary file (676 Bytes). View file
 
__pycache__/config.cpython-39.pyc ADDED
Binary file (352 Bytes). View file
 
__pycache__/main.cpython-39.pyc ADDED
Binary file (7.06 kB). View file
 
__pycache__/model.cpython-39.pyc ADDED
Binary file (3.77 kB). View file
 
api_schemas.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ API_RESPONSES = {
2
+ 200: {
3
+ "description": "Successful response",
4
+ "content": {
5
+ "application/json": {
6
+ "example": {
7
+ "status": "success",
8
+ "generated_text": [
9
+ {"role": "user", "content": "hey"},
10
+ {
11
+ "role": "assistant",
12
+ "content": "Hello! How can I assist you today?",
13
+ },
14
+ ],
15
+ }
16
+ }
17
+ },
18
+ },
19
+ 400: {
20
+ "description": "Invalid input",
21
+ "content": {
22
+ "application/json": {"example": {"detail": "Input text cannot be empty"}}
23
+ },
24
+ },
25
+ 500: {
26
+ "description": "Server error",
27
+ "content": {
28
+ "application/json": {
29
+ "example": {
30
+ "detail": "Error generating response: Model failed to generate"
31
+ }
32
+ }
33
+ },
34
+ },
35
+ }
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import uvicorn
3
+ from fastapi import FastAPI
4
+ import gradio as gr
5
+
6
+ # Initialize FastAPI
7
+ app = FastAPI()
8
+
9
+ @app.get("/status")
10
+ async def status():
11
+ return {"status": "success", "message": "Service is running"}
12
+
13
+ # Function to run FastAPI in a separate thread
14
+ def run_fastapi():
15
+ uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info", reload=False)
16
+
17
+ # Start FastAPI in a separate thread
18
+ fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
19
+ fastapi_thread.start()
20
+
21
+ # Gradio Interface
22
+ def chatbot_interface(user_input):
23
+ return f"You said: {user_input}" # Replace with actual chatbot logic
24
+
25
+ demo = gr.ChatInterface(chatbot_interface)
26
+
27
+ if __name__ == "__main__":
28
+ demo.launch(server_name="0.0.0.0", server_port=7860)
chroma_db/.DS_Store ADDED
Binary file (6.15 kB). View file
 
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13e72541800c513c73dccea69f79e39cf4baef4fa23f7e117c0d6b0f5f99670
3
+ size 3212000
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
3
+ size 100
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ba334d8ebdfac8bcd8cc32ea3be61109caabe0333365e6ca734b50350f713a
3
+ size 4000
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/link_lists.bin ADDED
File without changes
chroma_db/chroma.sqlite3 ADDED
Binary file (307 kB). View file
 
config.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_CONFIG = {
2
+ # "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", #"Qwen/Qwen2.5-1.5B-Instruct",
3
+ # "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
4
+ "model_name": "qwen/Qwen2.5-0.5B-Instruct",
5
+ "max_new_tokens": 250,
6
+ "num_return_sequences": 1,
7
+ "batch_size": 8,
8
+ "max_conversation_history_size": 100
9
+ }
10
+
11
+ RAG_CONFIG = {
12
+ "path": "documents"
13
+ }
documents/Northwest.pdf ADDED
Binary file (66.8 kB). View file
 
documents/Southwest - London.pdf ADDED
Binary file (68.4 kB). View file
 
main.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from uuid import UUID
2
+ from fastapi import FastAPI, HTTPException
3
+ from transformers import pipeline
4
+ import torch
5
+ import gradio as gr
6
+ from Supabase import initSupabase, updateSupabaseChatHistory, updateSupabaseChatStatus
7
+ from supabase import Client
8
+ from config import MODEL_CONFIG
9
+ from typing import Dict, Any, List
10
+ from api_schemas import API_RESPONSES
11
+ from VectorDB import *
12
+ from pydantic import BaseModel
13
+ import threading
14
+ import uvicorn
15
+
16
+ # Pick the best available device - MPS (Mac), CUDA (NVIDIA), or CPU
17
+ if torch.backends.mps.is_available():
18
+ device = torch.device("mps")
19
+ elif torch.cuda.is_available():
20
+ device = torch.device("cuda")
21
+ else:
22
+ device = torch.device("cpu")
23
+ #print(device)
24
+
25
+ initRAG(device)
26
+ supabase: Client = initSupabase()
27
+
28
+ # print(search_docs("how much employment in manchester"))
29
+
30
+ # Initialize the LLM
31
+ try:
32
+ pipe = pipeline(
33
+ "text-generation",
34
+ model=MODEL_CONFIG["model_name"],
35
+ device=device,
36
+ max_new_tokens=256,
37
+ temperature=0.3,
38
+ do_sample=True, # Allow sampling to generate diverse responses. More conversational and human-like
39
+ top_k=50, # Limit the top-k tokens to sample from
40
+ top_p=0.95, # Limit the cumulative probability distribution for sampling
41
+ # num_beams=2, # Use beam search to generate multiple responses... too slow
42
+ )
43
+ except Exception as e:
44
+ print(f"Error loading model: {str(e)}")
45
+ raise RuntimeError("Failed to initialize the model")
46
+
47
+ # Define the system prompt that sets the behavior and role of the LLM
48
+ SYSTEM_PROMPT = """Your name is SophiaAI.
49
+ You are a friendly chatbot designed to assist refugee women with their questions.
50
+ You should always be friendly. Use emoji in all of your responses to be relatable. You may consider 😊😌🤗 """
51
+
52
+ # Serve the API docs as our landing page
53
+ app = FastAPI(docs_url="/", title="SophiaAi - 21312701", version="1", description="SophiaAi is a Chatbot created for a university final project.\nDesigned to empower refugee women, there is a RAG pipeline containing resources to support refuges connected to a finetuned LLM.")
54
+ print("App Startup Complete!")
55
+
56
+
57
+
58
+ class ChatRequest(BaseModel):
59
+ conversationHistory: List[Dict[str, str]]
60
+ chatID: UUID
61
+ model_config = {
62
+ "json_schema_extra": {
63
+ "example": {
64
+ "conversationHistory": [
65
+ {
66
+ "role": "user",
67
+ "content": "hi"
68
+ },
69
+ {
70
+ "role": "assistant",
71
+ "content": "Hello! How can I assist you today?"
72
+ },
73
+ {
74
+ "role": "user",
75
+ "content": "whats the weather in MCR"
76
+ }
77
+ ],
78
+ "chatID": "123e4567-e89b-12d3-a456-426614174000"
79
+ }
80
+ }
81
+ }
82
+
83
+
84
+ @app.post(
85
+ "/generateFromChatHistory",
86
+ responses={
87
+ 200: {
88
+ "description": "Successful response",
89
+ "content": {
90
+ "application/json": {
91
+ "example": {
92
+ "status": "success",
93
+ "generated_text": {
94
+ "role": "assistant",
95
+ "content": "I don't have real-time weather data for Manchester. To get accurate information, please check a weather service like BBC Weather or the Met Office website."
96
+ }
97
+ }
98
+ }
99
+ }
100
+ },
101
+ 400: API_RESPONSES[400],
102
+ 500: API_RESPONSES[500]
103
+ }
104
+ )
105
+
106
+ async def generateFromChatHistory(input: ChatRequest):
107
+ """
108
+ Generate AI responses based on a given conversation history.
109
+ Updates Supabase chat
110
+
111
+
112
+ Args:
113
+ input (ChatRequest): Structured request containing a list of previous responses"
114
+ """
115
+ # Notify database a response is being generated so the user cannot update the chat
116
+ # Input validation
117
+ if not input.conversationHistory or len(input.conversationHistory) == 0:
118
+ raise HTTPException(status_code=400, detail="Conversation history cannot be empty")
119
+
120
+ if len(input.conversationHistory) > MODEL_CONFIG["max_conversation_history_size"]: # Arbitrary limit to avoid overloading LLM, adjust as needed
121
+ raise HTTPException(status_code=400, detail="Conversation history too long")
122
+
123
+ try:
124
+ # Map Conversation history
125
+ content = [
126
+ {
127
+ "role": "system",
128
+ "content": SYSTEM_PROMPT,
129
+ }
130
+ ]
131
+ content.extend(
132
+ {"role": message["role"], "content": message["content"]}
133
+ for message in input.conversationHistory
134
+ )
135
+ updateSupabaseChatHistory(content[1:], input.chatID, supabase, True) # Update supabase
136
+
137
+ # Combine system prompt with user input
138
+ LastQuestion = input.conversationHistory[-1]["content"] # Users last question
139
+ RAG_Results = search_docs(LastQuestion, 3) # search Vector Database for user input.
140
+
141
+ # Retrieve RAG results
142
+ RAG_Results = search_docs(LastQuestion, 3)
143
+ RagPrompt = f"""_RAG_
144
+ Use the following information to assist in answering the users question most recent question. Do not make anything up or guess.
145
+ Relevant information retrieved: {RAG_Results}
146
+
147
+ If you don't know, simply let the user know, or ask for more detail. The user has not seen this message, it is for your reference only."""
148
+
149
+
150
+ # Append RAG results with a dedicated role
151
+ rag_message = {
152
+ "role": "user",
153
+ "content": RagPrompt
154
+ }
155
+ content.append(rag_message)
156
+
157
+ # print(content)
158
+ # Generate response
159
+ output = pipe(content, num_return_sequences=1, max_new_tokens=250)
160
+ generated_text = output[0]["generated_text"] # Get the entire conversation history including new generated item
161
+ generated_text.pop(0) # Remove the system prompt from the generated text
162
+
163
+ updateSupabaseChatHistory(generated_text, input.chatID, supabase)# Update supabase
164
+ return {
165
+ "status": "success",
166
+ "generated_text": generated_text # generated_text[-1], # return only the input prompt and the generated response
167
+ }
168
+ except Exception as e:
169
+ updateSupabaseChatStatus(False, input.chatID, supabase) # Notify database that an a chat isn't being processed
170
+ raise HTTPException(
171
+ status_code=500, detail=f"Error generating response: {str(e)}"
172
+ ) from e
173
+
174
+ @app.get(
175
+ "/test-searchRAG",
176
+ responses={
177
+ 200: {
178
+ "description": "Successful RAG search results",
179
+ "content": {
180
+ "application/json": {
181
+ "example": {
182
+ "status": "success",
183
+ "results": [
184
+ {"content": "Example content 1", "metadata": {"source": "doc1.pdf"}},
185
+ {"content": "Example content 2", "metadata": {"source": "doc2.pdf"}}
186
+ ]
187
+ }
188
+ }
189
+ }
190
+ },
191
+ 400: API_RESPONSES[400],
192
+ 500: API_RESPONSES[500]
193
+ }
194
+ )
195
+ async def search_rag(query: str, limit: int = 3):
196
+ """
197
+ Search the RAG system directly with a query
198
+ Args:
199
+ query (str): The search query
200
+ limit (int): Maximum number of results to return (default: 3
201
+ Returns:
202
+ Dict: Search results with relevant document
203
+ Raises:
204
+ HTTPException: If the query is invalid or search fails
205
+ """
206
+ # Input validation
207
+ if not query or not query.strip():
208
+ raise HTTPException(status_code=400, detail="Search query cannot be empty")
209
+ if len(query) > 1000: # Arbitrary limit
210
+ raise HTTPException(status_code=400, detail="Query text too long")
211
+ try:
212
+ # Get results from vector database
213
+ results = search_docs(query, limit)
214
+
215
+ return {
216
+ "status": "success",
217
+ "results": results
218
+ }
219
+ except Exception as e:
220
+ raise HTTPException(
221
+ status_code=500, detail=f"Error searching documents: {str(e)}"
222
+ ) from e
223
+
224
+ @app.get(
225
+ "/test-generateSingleResponse",
226
+ responses={
227
+ 200: {
228
+ "description": "Successful response",
229
+ "content": {
230
+ "application/json": {
231
+ "example": {
232
+ "status": "success",
233
+ "generated_text": [
234
+ {
235
+ "role": "user",
236
+ "content": "hey"
237
+ },
238
+ {
239
+ "role": "assistant",
240
+ "content": "Hello! How can I assist you today? Is there something specific you'd like to talk about or learn more about?"
241
+ }
242
+ ]
243
+ }
244
+ }
245
+ }
246
+ },
247
+ 400: API_RESPONSES[400],
248
+ 500: API_RESPONSES[500]
249
+ }
250
+ )
251
+ async def generateSingleResponse(input: str):
252
+ """
253
+ Generate AI responses.
254
+
255
+ Args:
256
+ input (str): The user's question or prompt
257
+
258
+ Returns:
259
+ Dict[str, str]: Structured response containing the generated text
260
+
261
+ Raises:
262
+ HTTPException: If input is invalid or generation fails
263
+ """
264
+ # Input validation
265
+ if not input or not input.strip():
266
+ raise HTTPException(status_code=400, detail="Input text cannot be empty")
267
+
268
+ if len(input) > 1000: # Arbitrary limit, adjust as needed
269
+ raise HTTPException(status_code=400, detail="Input text too long")
270
+
271
+ # search Vector Database for user input.
272
+ RAG_Results = search_docs(input, 3)
273
+ # print(RAG_Results)
274
+
275
+ combined_input = f"""
276
+ Here is the users questions: {input}.
277
+
278
+ Use the following information to assist in answering the users question. Do not make anything up or guess.
279
+ If you don't know, simply let the user know.
280
+ {RAG_Results}
281
+ """
282
+
283
+ try:
284
+ # Combine system prompt with user input
285
+ content = [
286
+ {"role": "system", "content": SYSTEM_PROMPT},
287
+ {"role": "user", "content": combined_input},
288
+ ]
289
+
290
+ # Generate response
291
+ output = pipe(content, num_return_sequences=1, max_new_tokens=250)
292
+
293
+ # Extract the conversation text from the output
294
+ generated_text = output[0]["generated_text"]
295
+ print(generated_text)
296
+ # Remove the system prompt from the generated text
297
+ # Structure the response
298
+ return {
299
+ "status": "success",
300
+ "generated_text": generated_text[-1], # return only the input prompt and the generated response
301
+ }
302
+
303
+ except Exception as e:
304
+ raise HTTPException(
305
+ status_code=500, detail=f"Error generating response: {str(e)}"
306
+ ) from e
307
+
308
+
309
+ @app.get(
310
+ "/status",
311
+ responses={
312
+ 200: {
313
+ "description": "Successful response",
314
+ "content": {
315
+ "application/json": {
316
+ "example": {
317
+ "status": "success",
318
+ "message": "Service is running"
319
+ }
320
+ }
321
+ }
322
+ }
323
+ }
324
+ )
325
+ async def status():
326
+ """
327
+ Check the service status
328
+ """
329
+ return {"status": "success", "message": "Service is running"}
330
+ def run_fastapi():
331
+ print("Starting FastAPI server on http://0.0.0.0:8000")
332
+ uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info", reload=False)
333
+
334
+ # Start FastAPI in a separate thread
335
+ fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
336
+ fastapi_thread.start()
337
+
338
+ # Gradio Interface
339
+ def chatbot_interface(user_input, history): # Add history parameter
340
+ return f"You said: {user_input}" # Replace with actual chatbot logic
341
+
342
+ demo = gr.ChatInterface(chatbot_interface)
343
+
344
+ if __name__ == "__main__":
345
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ transformers
3
+ torch
4
+ sentence-transformers
5
+ numpy
6
+ PyPDF2
7
+ chromadb
8
+ uvicorn
9
+ supabase
10
+ python-dotenv