innocentpeter commited on
Commit
c3a047c
·
verified ·
1 Parent(s): 64547af

Upload 9 files

Browse files
Files changed (6) hide show
  1. app.py +64 -64
  2. data_manager.py +42 -0
  3. main.py +5 -0
  4. stt_engine.py +35 -0
  5. translation.py +12 -1
  6. tts_engine.py +61 -61
app.py CHANGED
@@ -1,64 +1,64 @@
1
- import gradio as gr
2
- from tts_engine import TTSEngine
3
- from translation import Translator, CustomTranslator
4
- from data_manager import save_uploaded_file, convert_to_jsonl
5
- from training.train_translation import train_from_jsonl
6
- from stt_engine import STTEngine
7
- import os
8
-
9
- # Init engines
10
- stt_engine = STTEngine()
11
- tts_engine = TTSEngine(use_coqui=True)
12
- translator = CustomTranslator() if os.path.exists("./training/outputs/model") else Translator()
13
-
14
- LANGUAGES = ["english", "yoruba", "igbo", "hausa", "pidgin", "esan", "tiv", "calabar", "benin"]
15
-
16
- def handle_conversation(audio, src_lang, tgt_lang, clone_voice):
17
- if audio is None:
18
- return "", None
19
-
20
- # Step 1: Speech to Text
21
- text = stt_engine.transcribe(audio, language=src_lang)
22
-
23
- # Step 2: Translate
24
- translated = translator.translate(text, src_lang, tgt_lang)
25
-
26
- # Step 3: Text to Speech
27
- audio_path = tts_engine.speak(translated, lang=tgt_lang, voice_clone=clone_voice)
28
-
29
- return translated, audio_path
30
-
31
- def admin_upload(file):
32
- file_path = save_uploaded_file(file, file.name)
33
- jsonl_path = convert_to_jsonl(file_path)
34
- train_from_jsonl(jsonl_path)
35
- return "✅ Training done. Model updated!"
36
-
37
- with gr.Blocks(title="🌍 Two-Way Voice Translator") as demo:
38
- gr.Markdown("# 🌍 Nigerian Two-Way Voice Translator")
39
- with gr.Tab("Translator"):
40
- with gr.Row():
41
- src_lang = gr.Dropdown(LANGUAGES, value="english", label="Speaker A Language")
42
- tgt_lang = gr.Dropdown(LANGUAGES, value="hausa", label="Speaker B Language")
43
-
44
- with gr.Row():
45
- audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
46
- translated = gr.Textbox(label="Translated Text", interactive=False)
47
- audio_out = gr.Audio(label="🔊 Translation Audio")
48
-
49
- clone_voice = gr.Checkbox(value=False, label="🎙️ Use my cloned voice (if my_voice.wav exists)")
50
-
51
- audio_in.change(
52
- handle_conversation,
53
- inputs=[audio_in, src_lang, tgt_lang, clone_voice],
54
- outputs=[translated, audio_out]
55
- )
56
-
57
- with gr.Tab("Admin (Training)"):
58
- gr.Markdown("Upload Hausa ↔ English data (.csv, .xlsx, .tsv, .jsonl)")
59
- file_in = gr.File(label="Upload dataset")
60
- train_btn = gr.Button("🚀 Train Model")
61
- output_box = gr.Textbox(label="Training Status")
62
- train_btn.click(admin_upload, inputs=file_in, outputs=output_box)
63
-
64
- demo.launch()
 
1
+ import gradio as gr
2
+ from tts_engine import TTSEngine
3
+ from translation import Translator, CustomTranslator
4
+ from data_manager import save_uploaded_file, convert_to_jsonl
5
+ from training.train_translation import train_from_jsonl
6
+ from stt_engine import STTEngine
7
+ import os
8
+
9
+ # Init engines
10
+ stt_engine = STTEngine()
11
+ tts_engine = TTSEngine(use_coqui=True)
12
+ translator = CustomTranslator() if os.path.exists("./training/outputs/model") else Translator()
13
+
14
+ LANGUAGES = ["english", "yoruba", "igbo", "hausa", "pidgin", "esan", "tiv", "calabar", "benin"]
15
+
16
+ def handle_conversation(audio, src_lang, tgt_lang, clone_voice):
17
+ if audio is None:
18
+ return "", None
19
+
20
+ # Step 1: Speech to Text
21
+ text = stt_engine.transcribe(audio, language=src_lang)
22
+
23
+ # Step 2: Translate
24
+ translated = translator.translate(text, src_lang, tgt_lang)
25
+
26
+ # Step 3: Text to Speech
27
+ audio_path = tts_engine.speak(translated, lang=tgt_lang, voice_clone=clone_voice)
28
+
29
+ return translated, audio_path
30
+
31
+ def admin_upload(file):
32
+ file_path = save_uploaded_file(file, file.name)
33
+ jsonl_path = convert_to_jsonl(file_path)
34
+ train_from_jsonl(jsonl_path)
35
+ return "✅ Training done. Model updated!"
36
+
37
+ with gr.Blocks(title="🌍 Two-Way Voice Translator") as demo:
38
+ gr.Markdown("# 🌍 Nigerian Two-Way Voice Translator")
39
+ with gr.Tab("Translator"):
40
+ with gr.Row():
41
+ src_lang = gr.Dropdown(LANGUAGES, value="english", label="Speaker A Language")
42
+ tgt_lang = gr.Dropdown(LANGUAGES, value="hausa", label="Speaker B Language")
43
+
44
+ with gr.Row():
45
+ audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak")
46
+ translated = gr.Textbox(label="Translated Text", interactive=False)
47
+ audio_out = gr.Audio(label="🔊 Translation Audio")
48
+
49
+ clone_voice = gr.Checkbox(value=False, label="🎙️ Use my cloned voice (if my_voice.wav exists)")
50
+
51
+ audio_in.change(
52
+ handle_conversation,
53
+ inputs=[audio_in, src_lang, tgt_lang, clone_voice],
54
+ outputs=[translated, audio_out]
55
+ )
56
+
57
+ with gr.Tab("Admin (Training)"):
58
+ gr.Markdown("Upload Hausa ↔ English data (.csv, .xlsx, .tsv, .jsonl)")
59
+ file_in = gr.File(label="Upload dataset")
60
+ train_btn = gr.Button("🚀 Train Model")
61
+ output_box = gr.Textbox(label="Training Status")
62
+ train_btn.click(admin_upload, inputs=file_in, outputs=output_box)
63
+
64
+ demo.launch()
data_manager.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import json
4
+
5
+ DATA_DIR = "./training/data"
6
+ os.makedirs(DATA_DIR, exist_ok=True)
7
+
8
+ def save_uploaded_file(file, filename):
9
+ """Save uploaded file to data folder"""
10
+ path = os.path.join(DATA_DIR, filename)
11
+ with open(path, "wb") as f:
12
+ f.write(file.read())
13
+ return path
14
+
15
+ def convert_to_jsonl(file_path, src_col="src", tgt_col="tgt"):
16
+ """Detect file type (csv, xlsx, tsv, jsonl) and normalize to JSONL"""
17
+ ext = os.path.splitext(file_path)[-1].lower()
18
+ data = None
19
+
20
+ if ext == ".csv":
21
+ data = pd.read_csv(file_path)
22
+ elif ext == ".xlsx":
23
+ data = pd.read_excel(file_path)
24
+ elif ext == ".tsv":
25
+ data = pd.read_csv(file_path, sep="\t")
26
+ elif ext == ".jsonl":
27
+ return file_path # already JSONL
28
+ else:
29
+ raise ValueError("Unsupported file format")
30
+
31
+ # Ensure we have two columns: src (Hausa) and tgt (English)
32
+ if len(data.columns) < 2:
33
+ raise ValueError("Dataset must have at least two columns")
34
+
35
+ data = data.rename(columns={data.columns[0]: "src", data.columns[1]: "tgt"})
36
+ jsonl_path = file_path.rsplit(".", 1)[0] + ".jsonl"
37
+
38
+ with open(jsonl_path, "w", encoding="utf-8") as f:
39
+ for _, row in data.iterrows():
40
+ f.write(json.dumps({"src": str(row["src"]), "tgt": str(row["tgt"])}, ensure_ascii=False) + "\n")
41
+
42
+ return jsonl_path
main.py CHANGED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from assistant import VoiceTranslatorAssistant
2
+
3
+ if __name__ == "__main__":
4
+ assistant = VoiceTranslatorAssistant("config.yaml")
5
+ assistant.run()
stt_engine.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+
4
+ class STTEngine:
5
+ def __init__(self, model_name="openai/whisper-small"):
6
+ # Load Whisper pipeline for automatic speech recognition
7
+ device = 0 if torch.cuda.is_available() else -1
8
+ self.asr = pipeline(
9
+ "automatic-speech-recognition",
10
+ model=model_name,
11
+ device=device
12
+ )
13
+
14
+ def transcribe(self, audio_path, language="en"):
15
+ """
16
+ Transcribe audio file to text using Whisper.
17
+ Args:
18
+ audio_path (str): path to .wav file
19
+ language (str): ISO code ('en', 'ha', 'yo', 'ig')
20
+ """
21
+ if audio_path is None:
22
+ return ""
23
+
24
+ # Map Nigerian language names to Whisper codes
25
+ lang_map = {
26
+ "english": "en",
27
+ "hausa": "ha",
28
+ "yoruba": "yo",
29
+ "igbo": "ig",
30
+ "pidgin": "pcm",
31
+ }
32
+ whisper_lang = lang_map.get(language.lower(), "en")
33
+
34
+ result = self.asr(audio_path, generate_kwargs={"language": whisper_lang})
35
+ return result["text"].strip()
translation.py CHANGED
@@ -1,5 +1,5 @@
1
  import yaml
2
- from transformers import MarianMTModel, MarianTokenizer
3
 
4
  # ---------------- Load config ----------------
5
  CONFIG_FILE = "config.yaml"
@@ -79,3 +79,14 @@ class Translator:
79
  return text
80
  else:
81
  return f"(⚠️ Dictionary doesn't support {input_lang}→{output_lang})"
 
 
 
 
 
 
 
 
 
 
 
 
1
  import yaml
2
+ from transformers import MarianMTModel, MarianTokenizer, pipeline
3
 
4
  # ---------------- Load config ----------------
5
  CONFIG_FILE = "config.yaml"
 
79
  return text
80
  else:
81
  return f"(⚠️ Dictionary doesn't support {input_lang}→{output_lang})"
82
+
83
+
84
+
85
+ class CustomTranslator:
86
+ def __init__(self, model_dir="./training/outputs/model"):
87
+ self.tokenizer = MarianTokenizer.from_pretrained(model_dir)
88
+ self.model = MarianMTModel.from_pretrained(model_dir)
89
+ self.pipeline = pipeline("translation", model=self.model, tokenizer=self.tokenizer)
90
+
91
+ def translate(self, text):
92
+ return self.pipeline(text)[0]["translation_text"]
tts_engine.py CHANGED
@@ -1,61 +1,61 @@
1
- import os
2
- from TTS.api import TTS
3
-
4
- LANG_MAP = {
5
- "english": "en",
6
- "yoruba": "en",
7
- "hausa": "en",
8
- "igbo": "en",
9
- "pidgin": "en",
10
- "esan": "en",
11
- "tiv": "en",
12
- "calabar": "en",
13
- "benin": "en",
14
- "french": "fr-fr",
15
- "portuguese": "pt-br"
16
- }
17
-
18
- class TTSEngine:
19
- def __init__(self, use_coqui=False):
20
- self.use_coqui = use_coqui
21
- self.tts = None
22
-
23
- if self.use_coqui:
24
- self.tts = TTS(
25
- "tts_models/multilingual/multi-dataset/your_tts",
26
- progress_bar=False,
27
- gpu=False
28
- )
29
-
30
- def speak(self, text, lang="english", voice_clone=False):
31
- if not text:
32
- return None
33
-
34
- out_file = "output.wav"
35
-
36
- if self.use_coqui:
37
- lang_code = LANG_MAP.get(lang.lower(), "en")
38
-
39
- if voice_clone and os.path.exists("my_voice.wav"):
40
- # clone your own voice
41
- self.tts.tts_to_file(
42
- text=text,
43
- file_path=out_file,
44
- speaker_wav="my_voice.wav",
45
- language=lang_code
46
- )
47
- else:
48
- # fallback to a demo synthetic voice (pretrained speaker)
49
- self.tts.tts_to_file(
50
- text=text,
51
- file_path=out_file,
52
- speaker="female-en-5", # <-- safe default
53
- language=lang_code
54
- )
55
- else:
56
- import pyttsx3
57
- engine = pyttsx3.init()
58
- engine.save_to_file(text, out_file)
59
- engine.runAndWait()
60
-
61
- return out_file
 
1
+ import os
2
+ from TTS.api import TTS
3
+
4
+ LANG_MAP = {
5
+ "english": "en",
6
+ "yoruba": "en",
7
+ "hausa": "en",
8
+ "igbo": "en",
9
+ "pidgin": "en",
10
+ "esan": "en",
11
+ "tiv": "en",
12
+ "calabar": "en",
13
+ "benin": "en",
14
+ "french": "fr-fr",
15
+ "portuguese": "pt-br"
16
+ }
17
+
18
+ class TTSEngine:
19
+ def __init__(self, use_coqui=False):
20
+ self.use_coqui = use_coqui
21
+ self.tts = None
22
+
23
+ if self.use_coqui:
24
+ self.tts = TTS(
25
+ "tts_models/multilingual/multi-dataset/your_tts",
26
+ progress_bar=False,
27
+ gpu=False
28
+ )
29
+
30
+ def speak(self, text, lang="english", voice_clone=False):
31
+ if not text:
32
+ return None
33
+
34
+ out_file = "output.wav"
35
+
36
+ if self.use_coqui:
37
+ lang_code = LANG_MAP.get(lang.lower(), "en")
38
+
39
+ if voice_clone and os.path.exists("my_voice.wav"):
40
+ # clone your own voice
41
+ self.tts.tts_to_file(
42
+ text=text,
43
+ file_path=out_file,
44
+ speaker_wav="my_voice.wav",
45
+ language=lang_code
46
+ )
47
+ else:
48
+ # fallback to a neutral synthetic voice
49
+ self.tts.tts_to_file(
50
+ text=text,
51
+ file_path=out_file,
52
+ speaker_wav=None, # Let model pick default embedding
53
+ language=lang_code
54
+ )
55
+ else:
56
+ import pyttsx3
57
+ engine = pyttsx3.init()
58
+ engine.save_to_file(text, out_file)
59
+ engine.runAndWait()
60
+
61
+ return out_file