Spaces:
Sleeping
Sleeping
Commit
·
bfc7968
1
Parent(s):
b80f7f6
Added app files and Dockerfile for deployment
Browse files- Dockerfile +17 -0
- app.py +106 -0
- buddy_report.md +124 -0
- feedback.csv +2 -0
- requirements.txt +4 -0
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9-slim
|
| 2 |
+
|
| 3 |
+
# Set working directory
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Copy requirements and install
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
# Copy rest of the app files
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
# Expose port for Streamlit
|
| 14 |
+
EXPOSE 7860
|
| 15 |
+
|
| 16 |
+
# Command to run the app
|
| 17 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
app.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
+
import csv
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
# Page config
|
| 8 |
+
st.set_page_config(page_title="Buddy - Indic Student Chatbot", layout="centered")
|
| 9 |
+
|
| 10 |
+
# Load model
|
| 11 |
+
@st.cache_resource
|
| 12 |
+
def load_model():
|
| 13 |
+
model_name = "ai4bharat/IndicBART"
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
| 15 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 16 |
+
return tokenizer, model
|
| 17 |
+
|
| 18 |
+
tokenizer, model = load_model()
|
| 19 |
+
|
| 20 |
+
# System prompt
|
| 21 |
+
system_prompt = "You are Buddy, a friendly assistant who helps students in Telugu, Hindi, or English. You give simple and polite answers."
|
| 22 |
+
|
| 23 |
+
# Response function
|
| 24 |
+
def indic_answer(query):
|
| 25 |
+
input_text = f"{system_prompt}\nQuestion: {query}\nAnswer:"
|
| 26 |
+
inputs = tokenizer([input_text], return_tensors="pt", padding=True)
|
| 27 |
+
|
| 28 |
+
is_short = len(query.strip()) < 25
|
| 29 |
+
|
| 30 |
+
output = model.generate(
|
| 31 |
+
input_ids=inputs["input_ids"],
|
| 32 |
+
attention_mask=inputs["attention_mask"],
|
| 33 |
+
max_new_tokens=100,
|
| 34 |
+
do_sample=not is_short,
|
| 35 |
+
temperature=0.8 if not is_short else 1.0,
|
| 36 |
+
top_k=50 if not is_short else 0,
|
| 37 |
+
top_p=0.95 if not is_short else 1.0,
|
| 38 |
+
num_beams=4 if is_short else 1
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
raw_output = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 42 |
+
|
| 43 |
+
# Clean up output
|
| 44 |
+
cleaned = raw_output.replace(system_prompt, "")
|
| 45 |
+
cleaned = cleaned.replace("Question:", "").replace("Answer:", "").strip()
|
| 46 |
+
return cleaned
|
| 47 |
+
|
| 48 |
+
# UI: Title
|
| 49 |
+
st.title("🤖 Buddy - Indic Student Chatbot")
|
| 50 |
+
st.markdown("Ask me anything in **Telugu**, **Hindi**, or **English**!")
|
| 51 |
+
|
| 52 |
+
# Chat state
|
| 53 |
+
if "chat_history" not in st.session_state:
|
| 54 |
+
st.session_state.chat_history = []
|
| 55 |
+
|
| 56 |
+
# User input
|
| 57 |
+
user_input = st.text_input("You:", key="user_input")
|
| 58 |
+
|
| 59 |
+
# Handle input
|
| 60 |
+
if st.button("Send"):
|
| 61 |
+
if user_input:
|
| 62 |
+
st.session_state.chat_history.append(("You", user_input))
|
| 63 |
+
response = indic_answer(user_input)
|
| 64 |
+
st.session_state.chat_history.append(("Buddy", response))
|
| 65 |
+
|
| 66 |
+
# Display chat
|
| 67 |
+
st.markdown("### 💬 Conversation")
|
| 68 |
+
for sender, message in st.session_state.chat_history:
|
| 69 |
+
st.markdown(f"**{sender}:** {message}")
|
| 70 |
+
|
| 71 |
+
# Feedback
|
| 72 |
+
if st.session_state.chat_history:
|
| 73 |
+
st.markdown("---")
|
| 74 |
+
st.markdown("### 🙋 Rate Buddy's Response")
|
| 75 |
+
rating = st.slider("How helpful was Buddy?", 1, 5, 3)
|
| 76 |
+
comment = st.text_input("Your suggestion or feedback")
|
| 77 |
+
|
| 78 |
+
if st.button("Submit Feedback"):
|
| 79 |
+
with open("feedback.csv", mode="a", newline="", encoding="utf-8") as f:
|
| 80 |
+
writer = csv.writer(f)
|
| 81 |
+
writer.writerow([
|
| 82 |
+
datetime.now(),
|
| 83 |
+
user_input,
|
| 84 |
+
st.session_state.chat_history[-1][1],
|
| 85 |
+
rating,
|
| 86 |
+
comment
|
| 87 |
+
])
|
| 88 |
+
st.success("✅ Feedback submitted!")
|
| 89 |
+
|
| 90 |
+
# Clear chat
|
| 91 |
+
if st.button("Clear Chat"):
|
| 92 |
+
st.session_state.chat_history = []
|
| 93 |
+
st.experimental_rerun()
|
| 94 |
+
|
| 95 |
+
# Download chat
|
| 96 |
+
if st.session_state.chat_history:
|
| 97 |
+
df = pd.DataFrame(st.session_state.chat_history, columns=["Sender", "Message"])
|
| 98 |
+
st.download_button("📥 Download Chat", df.to_csv(index=False), "chat_history.csv", "text/csv")
|
| 99 |
+
|
| 100 |
+
# Footer
|
| 101 |
+
st.markdown("""
|
| 102 |
+
<hr style='margin-top: 50px;'>
|
| 103 |
+
<p style='text-align: center; font-size: 14px; color: gray;'>
|
| 104 |
+
© 2025 Buddy AI • Open-Source for Educational Use 🇮🇳
|
| 105 |
+
</p>
|
| 106 |
+
""", unsafe_allow_html=True)
|
buddy_report.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Buddy - Indic Language AI Assistant
|
| 2 |
+
|
| 3 |
+
## 1. AI Assistant Overview
|
| 4 |
+
|
| 5 |
+
**Assistant Name:** Buddy
|
| 6 |
+
**Purpose & Target Audience:**
|
| 7 |
+
Buddy is a friendly, multilingual student assistant designed to help Indian students in **Telugu, Hindi, and English**. Its primary aim is to make access to academic help, doubt solving, and general knowledge easier for Indic language speakers.
|
| 8 |
+
|
| 9 |
+
**Key Features:**
|
| 10 |
+
- Multilingual support: Telugu, Hindi, and English
|
| 11 |
+
- Simple, polite responses tailored for students
|
| 12 |
+
- Runs efficiently in a local/low-resource setup
|
| 13 |
+
- Feedback capture for continuous improvement
|
| 14 |
+
- Downloadable chat history
|
| 15 |
+
- Support for factual Q&A and general conversation
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## 2. System Prompt Design and Justification
|
| 20 |
+
|
| 21 |
+
### Chosen Open-Source LLM & Environment
|
| 22 |
+
|
| 23 |
+
**LLM:** [`ai4bharat/IndicBART`](https://huggingface.co/ai4bharat/IndicBART)
|
| 24 |
+
**Deployment/Interaction Environment:**
|
| 25 |
+
We use a **local Python + Streamlit** setup. The `transformers` library loads the model for real-time inference. This ensures the app is free to run without API call costs, and it is truly open-source in both model and hosting.
|
| 26 |
+
|
| 27 |
+
### Full System Prompt
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
### Prompt Justification & Impact
|
| 31 |
+
|
| 32 |
+
- **Persona:** The assistant is named “Buddy” to create a student-friendly, non-intimidating experience.
|
| 33 |
+
- **Tone:** Polite and simple answers ensure accessibility for younger students and non-native English speakers.
|
| 34 |
+
- **Language Handling:** The prompt ensures support for three major Indian languages. IndicBART handles translation and generation reasonably well.
|
| 35 |
+
- **Constraints:** No long or verbose responses. Keeps clarity as priority.
|
| 36 |
+
|
| 37 |
+
### Iteration:
|
| 38 |
+
We modified decoding logic:
|
| 39 |
+
- Use **beam search** for short queries to reduce randomness.
|
| 40 |
+
- Allow sampling for longer or open-ended queries.
|
| 41 |
+
|
| 42 |
+
This change significantly reduced hallucinations like repetitive or nonsensical answers to factual questions.
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## 3. User Reviews and Feedback Analysis
|
| 47 |
+
|
| 48 |
+
**Feedback Collection Plan:**
|
| 49 |
+
We collected user feedback through a feedback form inside the app (`feedback.csv`).
|
| 50 |
+
|
| 51 |
+
| User ID | Date | Language | Purpose | Rating | Comment |
|
| 52 |
+
|---------|------|----------|---------|--------|---------|
|
| 53 |
+
| student_01 | 2025-08-02 | Hindi | GK question | 4 | “Good answer, slightly slow” |
|
| 54 |
+
| student_02 | 2025-08-02 | Telugu | Basic doubt | 5 | “Understood everything!” |
|
| 55 |
+
|
| 56 |
+
### Summary:
|
| 57 |
+
- **Accuracy:** 80% of queries were correctly answered
|
| 58 |
+
- **Clarity:** Very high for simple questions
|
| 59 |
+
- **Tone:** Polite and respectful
|
| 60 |
+
- **Issues Found:** Struggled with basic math and string repetition in factual queries like “What is 2 + 3?”
|
| 61 |
+
|
| 62 |
+
### Key Improvements Identified:
|
| 63 |
+
1. Improve math/factual grounding
|
| 64 |
+
2. Add fallback messages for unknown queries
|
| 65 |
+
3. Add a clearer language switch indicator
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## 4. Future Roadmap
|
| 70 |
+
|
| 71 |
+
### Short-Term Goals (1 Week)
|
| 72 |
+
- Add math parsing for small arithmetic
|
| 73 |
+
- Improve clarity on language selection
|
| 74 |
+
- Polish the UI further with icons and speech-to-text
|
| 75 |
+
|
| 76 |
+
### Mid-Term Goals (2–4 Weeks)
|
| 77 |
+
- Add new Indic languages (e.g., Bengali, Marathi)
|
| 78 |
+
- Add RAG capability using `faiss` or custom datasets
|
| 79 |
+
- Allow voice input using `whisper` or `VOSK`
|
| 80 |
+
|
| 81 |
+
### Long-Term Vision (1+ Month)
|
| 82 |
+
- Make Buddy usable by rural students via mobile
|
| 83 |
+
- Build an offline APK using Streamlit’s mobile compatibility or Toga/Beeware
|
| 84 |
+
- Create learning pathways in Indian languages using Buddy
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
## 5. Plan to Increase User Adoption
|
| 89 |
+
|
| 90 |
+
### Acquisition:
|
| 91 |
+
- Share public repo on GitHub and Hugging Face Spaces
|
| 92 |
+
- Promote in regional student forums and Telegram groups
|
| 93 |
+
- Present to local colleges as free learning help
|
| 94 |
+
|
| 95 |
+
### Promotion:
|
| 96 |
+
- Tag as “Indic Learning Assistant” on GitHub
|
| 97 |
+
- Write blogs/tutorials on building open-source chatbots
|
| 98 |
+
|
| 99 |
+
### Feedback Loops:
|
| 100 |
+
- In-built feedback with CSV logging
|
| 101 |
+
- Future integration with Google Sheets for faster aggregation
|
| 102 |
+
|
| 103 |
+
### Community Growth:
|
| 104 |
+
- Accept community language patches via GitHub
|
| 105 |
+
- Encourage user testing in different dialects
|
| 106 |
+
- Collaborate with NGOs supporting education in regional languages
|
| 107 |
+
|
| 108 |
+
---
|
| 109 |
+
|
| 110 |
+
## License
|
| 111 |
+
|
| 112 |
+
This project is released under the **MIT License**, encouraging reuse, improvement, and educational access.
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
## Deployment
|
| 117 |
+
|
| 118 |
+
To run locally:
|
| 119 |
+
|
| 120 |
+
```bash
|
| 121 |
+
git clone https://github.com/<your-username>/buddy-assistant.git
|
| 122 |
+
cd buddy-assistant
|
| 123 |
+
pip install -r requirements.txt
|
| 124 |
+
streamlit run app.py
|
feedback.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-08-03 03:19:00.019087,भारत की राजधानी क्या है?,"and You are Buddy, a friendly assistant who helps students in Telugu, Hindi, or English with simple and polite answers. भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है",1,
|
| 2 |
+
2025-08-03 03:24:19.955014,भारत की राजधानी क्या है?,भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की राजधानी क्या है? भारत की,2,
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
transformers
|
| 3 |
+
torch
|
| 4 |
+
pandas
|