Spaces:
Runtime error
Runtime error
Upload 9 files
Browse files- app.py +64 -64
- data_manager.py +42 -0
- main.py +5 -0
- stt_engine.py +35 -0
- translation.py +12 -1
- tts_engine.py +61 -61
app.py
CHANGED
|
@@ -1,64 +1,64 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
from tts_engine import TTSEngine
|
| 3 |
-
from translation import Translator, CustomTranslator
|
| 4 |
-
from data_manager import save_uploaded_file, convert_to_jsonl
|
| 5 |
-
from training.train_translation import train_from_jsonl
|
| 6 |
-
from stt_engine import STTEngine
|
| 7 |
-
import os
|
| 8 |
-
|
| 9 |
-
# Init engines
|
| 10 |
-
stt_engine = STTEngine()
|
| 11 |
-
tts_engine = TTSEngine(use_coqui=True)
|
| 12 |
-
translator = CustomTranslator() if os.path.exists("./training/outputs/model") else Translator()
|
| 13 |
-
|
| 14 |
-
LANGUAGES = ["english", "yoruba", "igbo", "hausa", "pidgin", "esan", "tiv", "calabar", "benin"]
|
| 15 |
-
|
| 16 |
-
def handle_conversation(audio, src_lang, tgt_lang, clone_voice):
|
| 17 |
-
if audio is None:
|
| 18 |
-
return "", None
|
| 19 |
-
|
| 20 |
-
# Step 1: Speech to Text
|
| 21 |
-
text = stt_engine.transcribe(audio, language=src_lang)
|
| 22 |
-
|
| 23 |
-
# Step 2: Translate
|
| 24 |
-
translated = translator.translate(text, src_lang, tgt_lang)
|
| 25 |
-
|
| 26 |
-
# Step 3: Text to Speech
|
| 27 |
-
audio_path = tts_engine.speak(translated, lang=tgt_lang, voice_clone=clone_voice)
|
| 28 |
-
|
| 29 |
-
return translated, audio_path
|
| 30 |
-
|
| 31 |
-
def admin_upload(file):
|
| 32 |
-
file_path = save_uploaded_file(file, file.name)
|
| 33 |
-
jsonl_path = convert_to_jsonl(file_path)
|
| 34 |
-
train_from_jsonl(jsonl_path)
|
| 35 |
-
return "✅ Training done. Model updated!"
|
| 36 |
-
|
| 37 |
-
with gr.Blocks(title="🌍 Two-Way Voice Translator") as demo:
|
| 38 |
-
gr.Markdown("# 🌍 Nigerian Two-Way Voice Translator")
|
| 39 |
-
with gr.Tab("Translator"):
|
| 40 |
-
with gr.Row():
|
| 41 |
-
src_lang = gr.Dropdown(LANGUAGES, value="english", label="Speaker A Language")
|
| 42 |
-
tgt_lang = gr.Dropdown(LANGUAGES, value="hausa", label="Speaker B Language")
|
| 43 |
-
|
| 44 |
-
with gr.Row():
|
| 45 |
-
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
|
| 46 |
-
translated = gr.Textbox(label="Translated Text", interactive=False)
|
| 47 |
-
audio_out = gr.Audio(label="🔊 Translation Audio")
|
| 48 |
-
|
| 49 |
-
clone_voice = gr.Checkbox(value=False, label="🎙️ Use my cloned voice (if my_voice.wav exists)")
|
| 50 |
-
|
| 51 |
-
audio_in.change(
|
| 52 |
-
handle_conversation,
|
| 53 |
-
inputs=[audio_in, src_lang, tgt_lang, clone_voice],
|
| 54 |
-
outputs=[translated, audio_out]
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
with gr.Tab("Admin (Training)"):
|
| 58 |
-
gr.Markdown("Upload Hausa ↔ English data (.csv, .xlsx, .tsv, .jsonl)")
|
| 59 |
-
file_in = gr.File(label="Upload dataset")
|
| 60 |
-
train_btn = gr.Button("🚀 Train Model")
|
| 61 |
-
output_box = gr.Textbox(label="Training Status")
|
| 62 |
-
train_btn.click(admin_upload, inputs=file_in, outputs=output_box)
|
| 63 |
-
|
| 64 |
-
demo.launch()
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from tts_engine import TTSEngine
|
| 3 |
+
from translation import Translator, CustomTranslator
|
| 4 |
+
from data_manager import save_uploaded_file, convert_to_jsonl
|
| 5 |
+
from training.train_translation import train_from_jsonl
|
| 6 |
+
from stt_engine import STTEngine
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# Init engines
|
| 10 |
+
stt_engine = STTEngine()
|
| 11 |
+
tts_engine = TTSEngine(use_coqui=True)
|
| 12 |
+
translator = CustomTranslator() if os.path.exists("./training/outputs/model") else Translator()
|
| 13 |
+
|
| 14 |
+
LANGUAGES = ["english", "yoruba", "igbo", "hausa", "pidgin", "esan", "tiv", "calabar", "benin"]
|
| 15 |
+
|
| 16 |
+
def handle_conversation(audio, src_lang, tgt_lang, clone_voice):
|
| 17 |
+
if audio is None:
|
| 18 |
+
return "", None
|
| 19 |
+
|
| 20 |
+
# Step 1: Speech to Text
|
| 21 |
+
text = stt_engine.transcribe(audio, language=src_lang)
|
| 22 |
+
|
| 23 |
+
# Step 2: Translate
|
| 24 |
+
translated = translator.translate(text, src_lang, tgt_lang)
|
| 25 |
+
|
| 26 |
+
# Step 3: Text to Speech
|
| 27 |
+
audio_path = tts_engine.speak(translated, lang=tgt_lang, voice_clone=clone_voice)
|
| 28 |
+
|
| 29 |
+
return translated, audio_path
|
| 30 |
+
|
| 31 |
+
def admin_upload(file):
|
| 32 |
+
file_path = save_uploaded_file(file, file.name)
|
| 33 |
+
jsonl_path = convert_to_jsonl(file_path)
|
| 34 |
+
train_from_jsonl(jsonl_path)
|
| 35 |
+
return "✅ Training done. Model updated!"
|
| 36 |
+
|
| 37 |
+
with gr.Blocks(title="🌍 Two-Way Voice Translator") as demo:
|
| 38 |
+
gr.Markdown("# 🌍 Nigerian Two-Way Voice Translator")
|
| 39 |
+
with gr.Tab("Translator"):
|
| 40 |
+
with gr.Row():
|
| 41 |
+
src_lang = gr.Dropdown(LANGUAGES, value="english", label="Speaker A Language")
|
| 42 |
+
tgt_lang = gr.Dropdown(LANGUAGES, value="hausa", label="Speaker B Language")
|
| 43 |
+
|
| 44 |
+
with gr.Row():
|
| 45 |
+
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
|
| 46 |
+
translated = gr.Textbox(label="Translated Text", interactive=False)
|
| 47 |
+
audio_out = gr.Audio(label="🔊 Translation Audio")
|
| 48 |
+
|
| 49 |
+
clone_voice = gr.Checkbox(value=False, label="🎙️ Use my cloned voice (if my_voice.wav exists)")
|
| 50 |
+
|
| 51 |
+
audio_in.change(
|
| 52 |
+
handle_conversation,
|
| 53 |
+
inputs=[audio_in, src_lang, tgt_lang, clone_voice],
|
| 54 |
+
outputs=[translated, audio_out]
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
with gr.Tab("Admin (Training)"):
|
| 58 |
+
gr.Markdown("Upload Hausa ↔ English data (.csv, .xlsx, .tsv, .jsonl)")
|
| 59 |
+
file_in = gr.File(label="Upload dataset")
|
| 60 |
+
train_btn = gr.Button("🚀 Train Model")
|
| 61 |
+
output_box = gr.Textbox(label="Training Status")
|
| 62 |
+
train_btn.click(admin_upload, inputs=file_in, outputs=output_box)
|
| 63 |
+
|
| 64 |
+
demo.launch()
|
data_manager.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
DATA_DIR = "./training/data"
|
| 6 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
| 7 |
+
|
| 8 |
+
def save_uploaded_file(file, filename):
|
| 9 |
+
"""Save uploaded file to data folder"""
|
| 10 |
+
path = os.path.join(DATA_DIR, filename)
|
| 11 |
+
with open(path, "wb") as f:
|
| 12 |
+
f.write(file.read())
|
| 13 |
+
return path
|
| 14 |
+
|
| 15 |
+
def convert_to_jsonl(file_path, src_col="src", tgt_col="tgt"):
|
| 16 |
+
"""Detect file type (csv, xlsx, tsv, jsonl) and normalize to JSONL"""
|
| 17 |
+
ext = os.path.splitext(file_path)[-1].lower()
|
| 18 |
+
data = None
|
| 19 |
+
|
| 20 |
+
if ext == ".csv":
|
| 21 |
+
data = pd.read_csv(file_path)
|
| 22 |
+
elif ext == ".xlsx":
|
| 23 |
+
data = pd.read_excel(file_path)
|
| 24 |
+
elif ext == ".tsv":
|
| 25 |
+
data = pd.read_csv(file_path, sep="\t")
|
| 26 |
+
elif ext == ".jsonl":
|
| 27 |
+
return file_path # already JSONL
|
| 28 |
+
else:
|
| 29 |
+
raise ValueError("Unsupported file format")
|
| 30 |
+
|
| 31 |
+
# Ensure we have two columns: src (Hausa) and tgt (English)
|
| 32 |
+
if len(data.columns) < 2:
|
| 33 |
+
raise ValueError("Dataset must have at least two columns")
|
| 34 |
+
|
| 35 |
+
data = data.rename(columns={data.columns[0]: "src", data.columns[1]: "tgt"})
|
| 36 |
+
jsonl_path = file_path.rsplit(".", 1)[0] + ".jsonl"
|
| 37 |
+
|
| 38 |
+
with open(jsonl_path, "w", encoding="utf-8") as f:
|
| 39 |
+
for _, row in data.iterrows():
|
| 40 |
+
f.write(json.dumps({"src": str(row["src"]), "tgt": str(row["tgt"])}, ensure_ascii=False) + "\n")
|
| 41 |
+
|
| 42 |
+
return jsonl_path
|
main.py
CHANGED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from assistant import VoiceTranslatorAssistant
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
assistant = VoiceTranslatorAssistant("config.yaml")
|
| 5 |
+
assistant.run()
|
stt_engine.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
|
| 4 |
+
class STTEngine:
|
| 5 |
+
def __init__(self, model_name="openai/whisper-small"):
|
| 6 |
+
# Load Whisper pipeline for automatic speech recognition
|
| 7 |
+
device = 0 if torch.cuda.is_available() else -1
|
| 8 |
+
self.asr = pipeline(
|
| 9 |
+
"automatic-speech-recognition",
|
| 10 |
+
model=model_name,
|
| 11 |
+
device=device
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
def transcribe(self, audio_path, language="en"):
|
| 15 |
+
"""
|
| 16 |
+
Transcribe audio file to text using Whisper.
|
| 17 |
+
Args:
|
| 18 |
+
audio_path (str): path to .wav file
|
| 19 |
+
language (str): ISO code ('en', 'ha', 'yo', 'ig')
|
| 20 |
+
"""
|
| 21 |
+
if audio_path is None:
|
| 22 |
+
return ""
|
| 23 |
+
|
| 24 |
+
# Map Nigerian language names to Whisper codes
|
| 25 |
+
lang_map = {
|
| 26 |
+
"english": "en",
|
| 27 |
+
"hausa": "ha",
|
| 28 |
+
"yoruba": "yo",
|
| 29 |
+
"igbo": "ig",
|
| 30 |
+
"pidgin": "pcm",
|
| 31 |
+
}
|
| 32 |
+
whisper_lang = lang_map.get(language.lower(), "en")
|
| 33 |
+
|
| 34 |
+
result = self.asr(audio_path, generate_kwargs={"language": whisper_lang})
|
| 35 |
+
return result["text"].strip()
|
translation.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import yaml
|
| 2 |
-
from transformers import MarianMTModel, MarianTokenizer
|
| 3 |
|
| 4 |
# ---------------- Load config ----------------
|
| 5 |
CONFIG_FILE = "config.yaml"
|
|
@@ -79,3 +79,14 @@ class Translator:
|
|
| 79 |
return text
|
| 80 |
else:
|
| 81 |
return f"(⚠️ Dictionary doesn't support {input_lang}→{output_lang})"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import yaml
|
| 2 |
+
from transformers import MarianMTModel, MarianTokenizer, pipeline
|
| 3 |
|
| 4 |
# ---------------- Load config ----------------
|
| 5 |
CONFIG_FILE = "config.yaml"
|
|
|
|
| 79 |
return text
|
| 80 |
else:
|
| 81 |
return f"(⚠️ Dictionary doesn't support {input_lang}→{output_lang})"
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class CustomTranslator:
|
| 86 |
+
def __init__(self, model_dir="./training/outputs/model"):
|
| 87 |
+
self.tokenizer = MarianTokenizer.from_pretrained(model_dir)
|
| 88 |
+
self.model = MarianMTModel.from_pretrained(model_dir)
|
| 89 |
+
self.pipeline = pipeline("translation", model=self.model, tokenizer=self.tokenizer)
|
| 90 |
+
|
| 91 |
+
def translate(self, text):
|
| 92 |
+
return self.pipeline(text)[0]["translation_text"]
|
tts_engine.py
CHANGED
|
@@ -1,61 +1,61 @@
|
|
| 1 |
-
import os
|
| 2 |
-
from TTS.api import TTS
|
| 3 |
-
|
| 4 |
-
LANG_MAP = {
|
| 5 |
-
"english": "en",
|
| 6 |
-
"yoruba": "en",
|
| 7 |
-
"hausa": "en",
|
| 8 |
-
"igbo": "en",
|
| 9 |
-
"pidgin": "en",
|
| 10 |
-
"esan": "en",
|
| 11 |
-
"tiv": "en",
|
| 12 |
-
"calabar": "en",
|
| 13 |
-
"benin": "en",
|
| 14 |
-
"french": "fr-fr",
|
| 15 |
-
"portuguese": "pt-br"
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
class TTSEngine:
|
| 19 |
-
def __init__(self, use_coqui=False):
|
| 20 |
-
self.use_coqui = use_coqui
|
| 21 |
-
self.tts = None
|
| 22 |
-
|
| 23 |
-
if self.use_coqui:
|
| 24 |
-
self.tts = TTS(
|
| 25 |
-
"tts_models/multilingual/multi-dataset/your_tts",
|
| 26 |
-
progress_bar=False,
|
| 27 |
-
gpu=False
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
def speak(self, text, lang="english", voice_clone=False):
|
| 31 |
-
if not text:
|
| 32 |
-
return None
|
| 33 |
-
|
| 34 |
-
out_file = "output.wav"
|
| 35 |
-
|
| 36 |
-
if self.use_coqui:
|
| 37 |
-
lang_code = LANG_MAP.get(lang.lower(), "en")
|
| 38 |
-
|
| 39 |
-
if voice_clone and os.path.exists("my_voice.wav"):
|
| 40 |
-
#
|
| 41 |
-
self.tts.tts_to_file(
|
| 42 |
-
text=text,
|
| 43 |
-
file_path=out_file,
|
| 44 |
-
speaker_wav="my_voice.wav",
|
| 45 |
-
language=lang_code
|
| 46 |
-
)
|
| 47 |
-
else:
|
| 48 |
-
#
|
| 49 |
-
self.tts.tts_to_file(
|
| 50 |
-
text=text,
|
| 51 |
-
file_path=out_file,
|
| 52 |
-
|
| 53 |
-
language=lang_code
|
| 54 |
-
)
|
| 55 |
-
else:
|
| 56 |
-
import pyttsx3
|
| 57 |
-
engine = pyttsx3.init()
|
| 58 |
-
engine.save_to_file(text, out_file)
|
| 59 |
-
engine.runAndWait()
|
| 60 |
-
|
| 61 |
-
return out_file
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from TTS.api import TTS
|
| 3 |
+
|
| 4 |
+
LANG_MAP = {
|
| 5 |
+
"english": "en",
|
| 6 |
+
"yoruba": "en",
|
| 7 |
+
"hausa": "en",
|
| 8 |
+
"igbo": "en",
|
| 9 |
+
"pidgin": "en",
|
| 10 |
+
"esan": "en",
|
| 11 |
+
"tiv": "en",
|
| 12 |
+
"calabar": "en",
|
| 13 |
+
"benin": "en",
|
| 14 |
+
"french": "fr-fr",
|
| 15 |
+
"portuguese": "pt-br"
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
class TTSEngine:
|
| 19 |
+
def __init__(self, use_coqui=False):
|
| 20 |
+
self.use_coqui = use_coqui
|
| 21 |
+
self.tts = None
|
| 22 |
+
|
| 23 |
+
if self.use_coqui:
|
| 24 |
+
self.tts = TTS(
|
| 25 |
+
"tts_models/multilingual/multi-dataset/your_tts",
|
| 26 |
+
progress_bar=False,
|
| 27 |
+
gpu=False
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
def speak(self, text, lang="english", voice_clone=False):
|
| 31 |
+
if not text:
|
| 32 |
+
return None
|
| 33 |
+
|
| 34 |
+
out_file = "output.wav"
|
| 35 |
+
|
| 36 |
+
if self.use_coqui:
|
| 37 |
+
lang_code = LANG_MAP.get(lang.lower(), "en")
|
| 38 |
+
|
| 39 |
+
if voice_clone and os.path.exists("my_voice.wav"):
|
| 40 |
+
# clone your own voice
|
| 41 |
+
self.tts.tts_to_file(
|
| 42 |
+
text=text,
|
| 43 |
+
file_path=out_file,
|
| 44 |
+
speaker_wav="my_voice.wav",
|
| 45 |
+
language=lang_code
|
| 46 |
+
)
|
| 47 |
+
else:
|
| 48 |
+
# fallback to a neutral synthetic voice
|
| 49 |
+
self.tts.tts_to_file(
|
| 50 |
+
text=text,
|
| 51 |
+
file_path=out_file,
|
| 52 |
+
speaker_wav=None, # Let model pick default embedding
|
| 53 |
+
language=lang_code
|
| 54 |
+
)
|
| 55 |
+
else:
|
| 56 |
+
import pyttsx3
|
| 57 |
+
engine = pyttsx3.init()
|
| 58 |
+
engine.save_to_file(text, out_file)
|
| 59 |
+
engine.runAndWait()
|
| 60 |
+
|
| 61 |
+
return out_file
|