Spaces:
Sleeping
Sleeping
| # imports | |
| import gradio as gr | |
| import os | |
| import requests | |
| from transformers import pipeline | |
| # Set your FastAPI backend endpoint | |
| BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback" | |
| # Language-to-model map | |
| model_map = { | |
| "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0", | |
| "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0", | |
| "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0", | |
| "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1", | |
| "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1", | |
| "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1", | |
| "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1", | |
| "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1", | |
| "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1", | |
| "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1", | |
| "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2", | |
| "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1", | |
| "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1", | |
| "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4", | |
| "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours", | |
| "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours", | |
| "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset", | |
| "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset", | |
| } | |
| # Create storage directory | |
| os.makedirs("responses", exist_ok=True) | |
| # Transcription function | |
| def transcribe(audio, language): | |
| asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0) | |
| text = asr(audio)["text"] | |
| return text, audio | |
| # Save feedback by sending it to FastAPI backend | |
| def save_feedback(audio_file, transcription, lang, age_group, gender, speak_level, write_level, | |
| native, native_language, education_level, multilingual, other_languages, | |
| regional_accent, accent_desc, env, device, domain, accuracy, orthography, orthography_issues, | |
| meaning, meaning_loss, errors, error_examples, performance, improvement, | |
| usability, technical_issues_bool, technical_issues_desc, final_comments, email): | |
| try: | |
| with open(audio_file, "rb") as f: | |
| audio_content = f.read() | |
| metadata = { | |
| "transcription": transcription, | |
| "age_group": age_group, | |
| "gender": gender, | |
| "evaluated_language": lang, | |
| "speak_level": speak_level, | |
| "write_level": write_level, | |
| "native": native, | |
| "native_language": native_language, | |
| "education_level": education_level, | |
| "multilingual": multilingual, | |
| "other_languages": other_languages, | |
| "regional_accent": regional_accent, | |
| "accent_description": accent_desc, | |
| "environment": env, | |
| "device": device, | |
| "domain": domain, | |
| "accuracy": accuracy, | |
| "orthography": orthography, | |
| "orthography_issues": orthography_issues, | |
| "meaning": meaning, | |
| "meaning_loss": meaning_loss, | |
| "errors": ",".join(errors) if errors else "", | |
| "error_examples": error_examples, | |
| "performance": performance, | |
| "improvement": improvement, | |
| "usability": usability, | |
| "technical_issues": technical_issues_bool, | |
| "technical_issues_desc": technical_issues_desc, | |
| "final_comments": final_comments, | |
| "email": email | |
| } | |
| files = { | |
| "audio_file": ("audio.wav", audio_content, "audio/wav") | |
| } | |
| response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20) | |
| if response.status_code == 201: | |
| return "β Feedback submitted successfully. Thank you!" | |
| else: | |
| return f"β οΈ Submission failed: {response.status_code} β {response.text}" | |
| except Exception as e: | |
| return f"β Could not connect to the backend: {str(e)}" | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## African ASR + Feedback") | |
| with gr.Row(): | |
| audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio") | |
| lang = gr.Dropdown(list(model_map.keys()), label="Select Language") | |
| transcribed_text = gr.Textbox(label="Transcribed Text") | |
| submit_btn = gr.Button("Transcribe") | |
| submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input]) | |
| gr.Markdown("---\n## Feedback Form") | |
| age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group *") | |
| gender = gr.Dropdown(["Male", "Female", "Prefer not to say"], label="Gender *") | |
| speak_level = gr.Slider(1, 10, step=1, label="How well do you speak this language? *") | |
| write_level = gr.Slider(1, 10, step=1, label="How well do you write the language? *") | |
| native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language? *") | |
| native_language = gr.Textbox(label="If you are not a native speaker, what is your native language?") | |
| # β NEW: Education level | |
| education_level = gr.Dropdown(["Primary", "Secondary", "Tertiary", "None", "Prefer not to say"], label="What is your highest level of education? *") | |
| # β NEW: Multilingual + gated text input | |
| multilingual = gr.Radio(["Yes", "No"], label="Are you multilingual (i.e., speak more than one language)? *") | |
| other_languages = gr.Textbox(label="What other languages do you speak?") | |
| multilingual.change(fn=lambda x: gr.update(visible=x == "Yes"), inputs=multilingual, outputs=other_languages) | |
| # β NEW: Regional Accent + gated text input | |
| regional_accent = gr.Radio(["Yes", "No", "Unsure"], label="Did the speaker in the audio have a regional accent? *") | |
| accent_desc = gr.Textbox(label="If yes, please describe the accent or region.") | |
| regional_accent.change(fn=lambda x: gr.update(visible=x == "Yes"), inputs=regional_accent, outputs=accent_desc) | |
| env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)", "Multiple Environments", "Unsure"], label="What was the type of recording environment for the speech you evaluated? *") | |
| device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)", "Unsure"], label="What type of recording device was used? *") | |
| domain = gr.Textbox(label="If yes, please specify the domain/topic (e.g., news broadcast, casual conversation, lecture, medical, parliamentary, religious).") | |
| accuracy = gr.Slider(1, 10, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *") | |
| orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable / Unsure"], label="Did the transcription correctly use the standard orthography?") | |
| orthography_issues = gr.Textbox(label="If you selected 'No' or 'Partially', please describe the issues.") | |
| meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *") | |
| meaning_loss = gr.Textbox(label="If the meaning was not fully preserved, please explain how.") | |
| errors = gr.CheckboxGroup([ | |
| "Substitutions (wrong words used)", | |
| "Omissions (words missing)", | |
| "Insertions (extra words added)", | |
| "Pronunciation-related errors (phonetically plausible but wrong word/spelling)", | |
| "Diacritic/Tone/Special Character errors", | |
| "Code-switching errors (mixing languages incorrectly)", | |
| "Named Entity errors (names of people/places wrong)", | |
| "Punctuation errors", | |
| "No significant errors observed" | |
| ], label="Which types of errors were most prominent or impactful in the transcriptions? *") | |
| error_examples = gr.Textbox(label="(Optional) Can you provide 1β2 examples of significant errors and how you would correct them?") | |
| performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *") | |
| improvement = gr.Textbox(label="How could this ASR model be improved? What features would be most helpful? *") | |
| usability = gr.Slider(1, 5, step=1, label="How easy was it to use the Hugging Face evaluation tool/interface? *") | |
| technical_issues_bool = gr.Radio(["Yes", "No"], label="Did you encounter any technical issues using the tool? *") | |
| technical_issues_desc = gr.Textbox(label="If yes, please describe the technical issues you encountered.") | |
| final_comments = gr.Textbox(label="Any other comments or suggestions regarding the evaluation process or ASR model?") | |
| email = gr.Textbox(label="Email") | |
| save_btn = gr.Button("Submit Feedback") | |
| output_msg = gr.Textbox(interactive=False) | |
| save_btn.click( | |
| fn=save_feedback, | |
| inputs=[ | |
| audio_input, transcribed_text, lang, age_group, gender, speak_level, write_level, | |
| native, native_language, education_level, multilingual, other_languages, | |
| regional_accent, accent_desc, env, device, domain, accuracy, orthography, orthography_issues, | |
| meaning, meaning_loss, errors, error_examples, performance, improvement, | |
| usability, technical_issues_bool, technical_issues_desc, final_comments, email | |
| ], | |
| outputs=[output_msg] | |
| ) | |
| # Launch | |
| demo.launch() | |