Spaces:

scott12355
/

TestGradio

Paused

App Files Files Community

scott12355 commited on Mar 20, 2025

Commit

f1a8641

1 Parent(s): dbc3ed8

first test - working locally

Browse files

Files changed (23) hide show

Dockerfile +27 -0
Supabase.py +65 -0
VectorDB.py +87 -0
__pycache__/InitRAG.cpython-39.pyc +0 -0
__pycache__/Supabase.cpython-39.pyc +0 -0
__pycache__/VectorDB.cpython-39.pyc +0 -0
__pycache__/api_schemas.cpython-39.pyc +0 -0
__pycache__/config.cpython-39.pyc +0 -0
__pycache__/main.cpython-39.pyc +0 -0
__pycache__/model.cpython-39.pyc +0 -0
api_schemas.py +35 -0
app.py +28 -0
chroma_db/.DS_Store +0 -0
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/data_level0.bin +3 -0
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/header.bin +3 -0
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/length.bin +3 -0
chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/link_lists.bin +0 -0
chroma_db/chroma.sqlite3 +0 -0
config.py +13 -0
documents/Northwest.pdf +0 -0
documents/Southwest - London.pdf +0 -0
main.py +345 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use the Python 3.9
+FROM python:3.9
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

Supabase.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+from dotenv import load_dotenv
+from supabase import create_client, Client
+from fastapi import HTTPException
+from typing import List, Dict
+def initSupabase():
+    # Load environment variables from .env file
+    load_dotenv()
+    url: str = os.environ.get("SUPABASE_URL")
+    key: str = os.environ.get("SUPABASE_SERVICE_ROLE_KEY")
+    # Add error checking
+    if not url or not key:
+        raise ValueError("Supabase URL and key must be set in environment variables")
+    supabase: Client = create_client(url, key)
+    print("Supabase client initialized")
+    return supabase
+def updateSupabaseChatHistory(generated_text: List[Dict[str, str]], chat_id: int, supabase: Client, status: bool = False):
+    """
+    Updates the chat history in Supabase.
+    Args:
+        generated_text: The generated text to add to the chat history.
+        chat_id: The ID of the chat to update.
+    Raises:
+        HTTPException: If there is an error updating Supabase.
+    """
+    try:
+        response = supabase.table("Chats").update({"chat_history": generated_text, "awaiting_response": status}).eq("id", chat_id).execute()
+        if hasattr(response, 'error') and response.error:
+            raise HTTPException(
+                status_code=500, detail=f"Error updating chat history: {response.error}"
+            )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Error updating Supabase: {str(e)}"
+        ) from e
+def updateSupabaseChatStatus(status: bool, chat_id: int, supabase: Client):
+    """
+    Updates the status of a chat in Supabase.
+    Args:
+        status: The status to update the chat to.
+        chat_id: The ID of the chat to update.
+    Raises:
+        HTTPException: If there is an error updating Supabase.
+    """
+    try:
+        response = supabase.table("Chats").update({"awaiting_response": status}).eq("id", chat_id).execute()
+        if hasattr(response, 'error') and response.error:
+            raise HTTPException(
+                status_code=500, detail=f"Error updating chat status: {response.error}"
+            )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Error updating Supabase: {str(e)}"
+        ) from e

VectorDB.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+from config import RAG_CONFIG
+import os
+from PyPDF2 import PdfReader
+import chromadb
+# Initialize the embeddings model
+embeddings_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
+# Create or get collection
+chroma_client = chromadb.PersistentClient(path="./chroma_db")
+# Initialize ChromaDB client
+collection = chroma_client.get_or_create_collection(
+    name="RagDocuments",
+    metadata={
+        "hnsw:space": "cosine"
+    },  # cosine similarity will be used to measure the distance between vectors
+)
+def initRAG(device):
+    # Initialize documents if collection is empty
+    if collection.count() == 0:
+        print("Loading documents into ChromaDB...")
+        texts = load_pdfs(RAG_CONFIG["path"])
+        all_chunks = []
+        for text in texts:
+            all_chunks.extend(chunk_text(text, chunk_size=100, overlap=5))
+        # Generate embeddings and add to ChromaDB
+        embeddings = embeddings_model.encode(all_chunks)
+        collection.add(
+            embeddings=embeddings.tolist(),
+            documents=all_chunks,
+            ids=[f"doc_{i}" for i in range(len(all_chunks))],
+        )
+def load_pdfs(directory):
+    texts = []
+    for filename in os.listdir(directory):
+        if filename.endswith(".pdf"):
+            filepath = os.path.join(directory, filename)
+            with open(filepath, "rb") as file:
+                pdf = PdfReader(file)
+                for page in pdf.pages:
+                    texts.append(page.extract_text())
+    return texts
+def chunk_text(text, chunk_size=100, overlap=10):
+    words = text.split()
+    chunks = []
+    i = 0
+    while i < len(words):
+        # Calculate end index for current chunk
+        end = min(i + chunk_size, len(words))
+        # Create chunk from words
+        chunk = " ".join(words[i:end])
+        chunks.append(chunk)
+        # Move index forward by chunk_size - overlap
+        i += chunk_size - overlap
+        # If near the end and have leftover words that are less than overlap
+        if i < len(words) and len(words) - i < overlap:
+            break
+    # Add final chunk if there are remaining words
+    if i < len(words):
+        chunks.append(" ".join(words[i:]))
+    return chunks
+def search_docs(query, top_k=3):
+    query_embedding = embeddings_model.encode(query)
+    results = collection.query(
+        query_embeddings=[query_embedding.tolist()], n_results=top_k
+    )
+    return "".join(
+        f"Result {i + 1}:\n{doc}\n\n" for i, doc in enumerate(results["documents"][0]) # type: ignore
+    )

__pycache__/InitRAG.cpython-39.pyc ADDED Viewed

Binary file (1.45 kB). View file

__pycache__/Supabase.cpython-39.pyc ADDED Viewed

Binary file (2.27 kB). View file

__pycache__/VectorDB.cpython-39.pyc ADDED Viewed

Binary file (2.52 kB). View file

__pycache__/api_schemas.cpython-39.pyc ADDED Viewed

Binary file (676 Bytes). View file

__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (352 Bytes). View file

__pycache__/main.cpython-39.pyc ADDED Viewed

Binary file (7.06 kB). View file

__pycache__/model.cpython-39.pyc ADDED Viewed

Binary file (3.77 kB). View file

api_schemas.py ADDED Viewed

	@@ -0,0 +1,35 @@

+API_RESPONSES = {
+    200: {
+        "description": "Successful response",
+        "content": {
+            "application/json": {
+                "example": {
+                    "status": "success",
+                    "generated_text": [
+                        {"role": "user", "content": "hey"},
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?",
+                        },
+                    ],
+                }
+            }
+        },
+    },
+    400: {
+        "description": "Invalid input",
+        "content": {
+            "application/json": {"example": {"detail": "Input text cannot be empty"}}
+        },
+    },
+    500: {
+        "description": "Server error",
+        "content": {
+            "application/json": {
+                "example": {
+                    "detail": "Error generating response: Model failed to generate"
+                }
+            }
+        },
+    },
+}

app.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import threading
+import uvicorn
+from fastapi import FastAPI
+import gradio as gr
+# Initialize FastAPI
+app = FastAPI()
+@app.get("/status")
+async def status():
+    return {"status": "success", "message": "Service is running"}
+# Function to run FastAPI in a separate thread
+def run_fastapi():
+    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info", reload=False)
+# Start FastAPI in a separate thread
+fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
+fastapi_thread.start()
+# Gradio Interface
+def chatbot_interface(user_input):
+    return f"You said: {user_input}"  # Replace with actual chatbot logic
+demo = gr.ChatInterface(chatbot_interface)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

chroma_db/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a13e72541800c513c73dccea69f79e39cf4baef4fa23f7e117c0d6b0f5f99670
+size 3212000

chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
+size 100

chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20ba334d8ebdfac8bcd8cc32ea3be61109caabe0333365e6ca734b50350f713a
+size 4000

chroma_db/7fcd22e3-358a-4deb-a923-2709fc544c61/link_lists.bin ADDED Viewed

File without changes

chroma_db/chroma.sqlite3 ADDED Viewed

Binary file (307 kB). View file

config.py ADDED Viewed

	@@ -0,0 +1,13 @@

+MODEL_CONFIG = {
+#    "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", #"Qwen/Qwen2.5-1.5B-Instruct",
+    # "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+    "model_name": "qwen/Qwen2.5-0.5B-Instruct",
+    "max_new_tokens": 250,
+    "num_return_sequences": 1,
+    "batch_size": 8,
+    "max_conversation_history_size": 100
+}
+RAG_CONFIG = {
+    "path": "documents"
+}

documents/Northwest.pdf ADDED Viewed

Binary file (66.8 kB). View file

documents/Southwest - London.pdf ADDED Viewed

Binary file (68.4 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,345 @@

+from uuid import UUID
+from fastapi import FastAPI, HTTPException
+from transformers import pipeline
+import torch
+import gradio as gr
+from Supabase import initSupabase, updateSupabaseChatHistory, updateSupabaseChatStatus
+from supabase import Client
+from config import MODEL_CONFIG
+from typing import Dict, Any, List
+from api_schemas import API_RESPONSES
+from VectorDB import *
+from pydantic import BaseModel
+import threading
+import uvicorn
+# Pick the best available device - MPS (Mac), CUDA (NVIDIA), or CPU
+if torch.backends.mps.is_available():
+    device = torch.device("mps")
+elif torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+#print(device)
+initRAG(device)
+supabase: Client = initSupabase()
+# print(search_docs("how much employment in manchester"))
+# Initialize the LLM
+try:
+    pipe = pipeline(
+        "text-generation",
+        model=MODEL_CONFIG["model_name"],
+        device=device,
+        max_new_tokens=256,
+        temperature=0.3,
+        do_sample=True, # Allow sampling to generate diverse responses. More conversational and human-like
+        top_k=50, # Limit the top-k tokens to sample from
+        top_p=0.95, # Limit the cumulative probability distribution for sampling
+        # num_beams=2, # Use beam search to generate multiple responses... too slow
+        )
+except Exception as e:
+    print(f"Error loading model: {str(e)}")
+    raise RuntimeError("Failed to initialize the model")
+# Define the system prompt that sets the behavior and role of the LLM
+SYSTEM_PROMPT = """Your name is SophiaAI.
+You are a friendly chatbot designed to assist refugee women with their questions.
+You should always be friendly. Use emoji in all of your responses to be relatable. You may consider 😊😌🤗 """
+# Serve the API docs as our landing page
+app = FastAPI(docs_url="/", title="SophiaAi - 21312701", version="1", description="SophiaAi is a Chatbot created for a university final project.\nDesigned to empower refugee women, there is a RAG pipeline containing resources to support refuges connected to a finetuned LLM.")
+print("App Startup Complete!")
+class ChatRequest(BaseModel):
+    conversationHistory: List[Dict[str, str]]
+    chatID: UUID
+    model_config = {
+        "json_schema_extra": {
+            "example": {
+                "conversationHistory": [
+                    {
+                        "role": "user",
+                        "content": "hi"
+                    },
+                    {
+                        "role": "assistant",
+                        "content": "Hello! How can I assist you today?"
+                    },
+                    {
+                        "role": "user",
+                        "content": "whats the weather in MCR"
+                    }
+                ],
+                "chatID": "123e4567-e89b-12d3-a456-426614174000"
+            }
+        }
+    }
+@app.post(
+    "/generateFromChatHistory",
+    responses={
+        200: {
+            "description": "Successful response",
+            "content": {
+                "application/json": {
+                    "example": {
+                        "status": "success",
+                        "generated_text": {
+                            "role": "assistant",
+                            "content": "I don't have real-time weather data for Manchester. To get accurate information, please check a weather service like BBC Weather or the Met Office website."
+                        }
+                    }
+                }
+            }
+        },
+        400: API_RESPONSES[400],
+        500: API_RESPONSES[500]
+    }
+)
+async def generateFromChatHistory(input: ChatRequest):
+    """
+    Generate AI responses based on a given conversation history.
+    Updates Supabase chat
+    Args:
+    input (ChatRequest): Structured request containing a list of previous responses"
+    """
+    # Notify database a response is being generated so the user cannot update the chat
+    # Input validation
+    if not input.conversationHistory or len(input.conversationHistory) == 0:
+        raise HTTPException(status_code=400, detail="Conversation history cannot be empty")
+    if len(input.conversationHistory) > MODEL_CONFIG["max_conversation_history_size"]:  # Arbitrary limit to avoid overloading LLM, adjust as needed
+        raise HTTPException(status_code=400, detail="Conversation history too long")
+    try:
+        # Map Conversation history
+        content = [
+            {
+                "role": "system",
+                "content": SYSTEM_PROMPT,
+            }
+        ]
+        content.extend(
+            {"role": message["role"], "content": message["content"]}
+            for message in input.conversationHistory
+        )
+        updateSupabaseChatHistory(content[1:], input.chatID, supabase, True) # Update supabase
+        # Combine system prompt with user input
+        LastQuestion = input.conversationHistory[-1]["content"] # Users last question
+        RAG_Results = search_docs(LastQuestion, 3)  # search Vector Database for user input.
+        # Retrieve RAG results
+        RAG_Results = search_docs(LastQuestion, 3)
+        RagPrompt = f"""_RAG_
+Use the following information to assist in answering the users question most recent question. Do not make anything up or guess.
+Relevant information retrieved: {RAG_Results}
+If you don't know, simply let the user know, or ask for more detail. The user has not seen this message, it is for your reference only."""
+        # Append RAG results with a dedicated role
+        rag_message = {
+            "role": "user",
+            "content": RagPrompt
+        }
+        content.append(rag_message)
+        # print(content)
+        # Generate response
+        output = pipe(content, num_return_sequences=1, max_new_tokens=250)
+        generated_text = output[0]["generated_text"] # Get the entire conversation history including new generated item
+        generated_text.pop(0) # Remove the system prompt from the generated text
+        updateSupabaseChatHistory(generated_text, input.chatID, supabase)# Update supabase
+        return {
+            "status": "success",
+            "generated_text": generated_text # generated_text[-1],  # return only the input prompt and the generated response
+        }
+    except Exception as e:
+        updateSupabaseChatStatus(False, input.chatID, supabase)  # Notify database that an a chat isn't being processed
+        raise HTTPException(
+            status_code=500, detail=f"Error generating response: {str(e)}"
+        ) from e
+@app.get(
+    "/test-searchRAG",
+    responses={
+        200: {
+            "description": "Successful RAG search results",
+            "content": {
+                "application/json": {
+                    "example": {
+                        "status": "success",
+                        "results": [
+                            {"content": "Example content 1", "metadata": {"source": "doc1.pdf"}},
+                            {"content": "Example content 2", "metadata": {"source": "doc2.pdf"}}
+                        ]
+                    }
+                }
+            }
+        },
+        400: API_RESPONSES[400],
+        500: API_RESPONSES[500]
+    }
+)
+async def search_rag(query: str, limit: int = 3):
+    """
+    Search the RAG system directly with a query
+    Args:
+        query (str): The search query
+        limit (int): Maximum number of results to return (default: 3
+    Returns:
+        Dict: Search results with relevant document
+    Raises:
+        HTTPException: If the query is invalid or search fails
+    """
+    # Input validation
+    if not query or not query.strip():
+        raise HTTPException(status_code=400, detail="Search query cannot be empty")
+    if len(query) > 1000:  # Arbitrary limit
+        raise HTTPException(status_code=400, detail="Query text too long")
+    try:
+        # Get results from vector database
+        results = search_docs(query, limit)
+        return {
+            "status": "success",
+            "results": results
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Error searching documents: {str(e)}"
+        ) from e
+@app.get(
+    "/test-generateSingleResponse",
+    responses={
+        200: {
+            "description": "Successful response",
+            "content": {
+                "application/json": {
+                    "example": {
+                        "status": "success",
+                        "generated_text": [
+                            {
+                                "role": "user",
+                                "content": "hey"
+                            },
+                            {
+                                "role": "assistant",
+                                "content": "Hello! How can I assist you today? Is there something specific you'd like to talk about or learn more about?"
+                            }
+                        ]
+                    }
+                }
+            }
+        },
+        400: API_RESPONSES[400],
+        500: API_RESPONSES[500]
+    }
+)
+async def generateSingleResponse(input: str):
+    """
+    Generate AI responses.
+    Args:
+        input (str): The user's question or prompt
+    Returns:
+        Dict[str, str]: Structured response containing the generated text
+    Raises:
+        HTTPException: If input is invalid or generation fails
+    """
+    # Input validation
+    if not input or not input.strip():
+        raise HTTPException(status_code=400, detail="Input text cannot be empty")
+    if len(input) > 1000:  # Arbitrary limit, adjust as needed
+        raise HTTPException(status_code=400, detail="Input text too long")
+    # search Vector Database for user input.
+    RAG_Results = search_docs(input, 3)
+    # print(RAG_Results)
+    combined_input = f"""
+    Here is the users questions: {input}.
+    Use the following information to assist in answering the users question. Do not make anything up or guess.
+    If you don't know, simply let the user know.
+    {RAG_Results}
+    """
+    try:
+        # Combine system prompt with user input
+        content = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": combined_input},
+        ]
+        # Generate response
+        output = pipe(content, num_return_sequences=1, max_new_tokens=250)
+        # Extract the conversation text from the output
+        generated_text = output[0]["generated_text"]
+        print(generated_text)
+        # Remove the system prompt from the generated text
+        # Structure the response
+        return {
+            "status": "success",
+            "generated_text": generated_text[-1],  # return only the input prompt and the generated response
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Error generating response: {str(e)}"
+        ) from e
+@app.get(
+    "/status",
+    responses={
+        200: {
+            "description": "Successful response",
+            "content": {
+                "application/json": {
+                    "example": {
+                        "status": "success",
+                        "message": "Service is running"
+                    }
+                }
+            }
+        }
+    }
+)
+async def status():
+    """
+    Check the service status
+    """
+    return {"status": "success", "message": "Service is running"}
+def run_fastapi():
+    print("Starting FastAPI server on http://0.0.0.0:8000")
+    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info", reload=False)
+# Start FastAPI in a separate thread
+fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
+fastapi_thread.start()
+# Gradio Interface
+def chatbot_interface(user_input, history):  # Add history parameter
+    return f"You said: {user_input}"  # Replace with actual chatbot logic
+demo = gr.ChatInterface(chatbot_interface)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi
+transformers
+torch
+sentence-transformers
+numpy
+PyPDF2
+chromadb
+uvicorn
+supabase
+python-dotenv