Spaces:

Beijuka
/

ASR_AFRICA

Sleeping

App Files Files Community

ASR_AFRICA / app.py

Beijuka

Update app.py

b847e4f verified 7 months ago

raw

history blame contribute delete

9.72 kB

	# imports
	import gradio as gr
	import os
	import requests
	from transformers import pipeline

	# Set your FastAPI backend endpoint
	BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback"

	# Language-to-model map
	model_map = {
	"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
	"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
	"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
	"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
	"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
	"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
	"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
	"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
	"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
	"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
	"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
	"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
	"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
	"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
	"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
	"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
	"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
	"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
	}

	# Create storage directory
	os.makedirs("responses", exist_ok=True)

	# Transcription function
	def transcribe(audio, language):
	asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
	text = asr(audio)["text"]
	return text, audio

	# Save feedback by sending it to FastAPI backend
	def save_feedback(audio_file, transcription, lang, age_group, gender, speak_level, write_level,
	native, native_language, education_level, multilingual, other_languages,
	regional_accent, accent_desc, env, device, domain, accuracy, orthography, orthography_issues,
	meaning, meaning_loss, errors, error_examples, performance, improvement,
	usability, technical_issues_bool, technical_issues_desc, final_comments, email):

	try:
	with open(audio_file, "rb") as f:
	audio_content = f.read()

	metadata = {
	"transcription": transcription,
	"age_group": age_group,
	"gender": gender,
	"evaluated_language": lang,
	"speak_level": speak_level,
	"write_level": write_level,
	"native": native,
	"native_language": native_language,
	"education_level": education_level,
	"multilingual": multilingual,
	"other_languages": other_languages,
	"regional_accent": regional_accent,
	"accent_description": accent_desc,
	"environment": env,
	"device": device,
	"domain": domain,
	"accuracy": accuracy,
	"orthography": orthography,
	"orthography_issues": orthography_issues,
	"meaning": meaning,
	"meaning_loss": meaning_loss,
	"errors": ",".join(errors) if errors else "",
	"error_examples": error_examples,
	"performance": performance,
	"improvement": improvement,
	"usability": usability,
	"technical_issues": technical_issues_bool,
	"technical_issues_desc": technical_issues_desc,
	"final_comments": final_comments,
	"email": email
	}

	files = {
	"audio_file": ("audio.wav", audio_content, "audio/wav")
	}

	response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20)

	if response.status_code == 201:
	return "✅ Feedback submitted successfully. Thank you!"
	else:
	return f"⚠️ Submission failed: {response.status_code} — {response.text}"

	except Exception as e:
	return f"❌ Could not connect to the backend: {str(e)}"

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## African ASR + Feedback")

	with gr.Row():
	audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
	lang = gr.Dropdown(list(model_map.keys()), label="Select Language")

	transcribed_text = gr.Textbox(label="Transcribed Text")
	submit_btn = gr.Button("Transcribe")
	submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])

	gr.Markdown("---\n## Feedback Form")

	age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group *")
	gender = gr.Dropdown(["Male", "Female", "Prefer not to say"], label="Gender *")
	speak_level = gr.Slider(1, 10, step=1, label="How well do you speak this language? *")
	write_level = gr.Slider(1, 10, step=1, label="How well do you write the language? *")
	native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language? *")
	native_language = gr.Textbox(label="If you are not a native speaker, what is your native language?")

	# ✅ NEW: Education level
	education_level = gr.Dropdown(["Primary", "Secondary", "Tertiary", "None", "Prefer not to say"], label="What is your highest level of education? *")

	# ✅ NEW: Multilingual + gated text input
	multilingual = gr.Radio(["Yes", "No"], label="Are you multilingual (i.e., speak more than one language)? *")
	other_languages = gr.Textbox(label="What other languages do you speak?")
	multilingual.change(fn=lambda x: gr.update(visible=x == "Yes"), inputs=multilingual, outputs=other_languages)

	# ✅ NEW: Regional Accent + gated text input
	regional_accent = gr.Radio(["Yes", "No", "Unsure"], label="Did the speaker in the audio have a regional accent? *")
	accent_desc = gr.Textbox(label="If yes, please describe the accent or region.")
	regional_accent.change(fn=lambda x: gr.update(visible=x == "Yes"), inputs=regional_accent, outputs=accent_desc)

	env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)", "Multiple Environments", "Unsure"], label="What was the type of recording environment for the speech you evaluated? *")
	device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)", "Unsure"], label="What type of recording device was used? *")
	domain = gr.Textbox(label="If yes, please specify the domain/topic (e.g., news broadcast, casual conversation, lecture, medical, parliamentary, religious).")
	accuracy = gr.Slider(1, 10, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *")
	orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable / Unsure"], label="Did the transcription correctly use the standard orthography?")
	orthography_issues = gr.Textbox(label="If you selected 'No' or 'Partially', please describe the issues.")
	meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *")
	meaning_loss = gr.Textbox(label="If the meaning was not fully preserved, please explain how.")
	errors = gr.CheckboxGroup([
	"Substitutions (wrong words used)",
	"Omissions (words missing)",
	"Insertions (extra words added)",
	"Pronunciation-related errors (phonetically plausible but wrong word/spelling)",
	"Diacritic/Tone/Special Character errors",
	"Code-switching errors (mixing languages incorrectly)",
	"Named Entity errors (names of people/places wrong)",
	"Punctuation errors",
	"No significant errors observed"
	], label="Which types of errors were most prominent or impactful in the transcriptions? *")
	error_examples = gr.Textbox(label="(Optional) Can you provide 1–2 examples of significant errors and how you would correct them?")
	performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *")
	improvement = gr.Textbox(label="How could this ASR model be improved? What features would be most helpful? *")
	usability = gr.Slider(1, 5, step=1, label="How easy was it to use the Hugging Face evaluation tool/interface? *")
	technical_issues_bool = gr.Radio(["Yes", "No"], label="Did you encounter any technical issues using the tool? *")
	technical_issues_desc = gr.Textbox(label="If yes, please describe the technical issues you encountered.")
	final_comments = gr.Textbox(label="Any other comments or suggestions regarding the evaluation process or ASR model?")
	email = gr.Textbox(label="Email")

	save_btn = gr.Button("Submit Feedback")
	output_msg = gr.Textbox(interactive=False)

	save_btn.click(
	fn=save_feedback,
	inputs=[
	audio_input, transcribed_text, lang, age_group, gender, speak_level, write_level,
	native, native_language, education_level, multilingual, other_languages,
	regional_accent, accent_desc, env, device, domain, accuracy, orthography, orthography_issues,
	meaning, meaning_loss, errors, error_examples, performance, improvement,
	usability, technical_issues_bool, technical_issues_desc, final_comments, email
	],
	outputs=[output_msg]
	)

	# Launch
	demo.launch()