| import os | |
| import gradio as gr | |
| import whisper | |
| model = whisper.load_model("small") | |
| print(model.device) | |
| def inference(audio): | |
| audio = whisper.load_audio(audio) | |
| audio = whisper.pad_or_trim(audio) | |
| mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
| _, probs = model.detect_language(mel) | |
| options = whisper.DecodingOptions(fp16 = False) | |
| result = whisper.decode(model, mel, options) | |
| print(result.text) | |
| return result.text | |
| block = gr.Blocks() | |
| with block: | |
| with gr.Group(): | |
| with gr.Box(): | |
| with gr.Row(): | |
| audio = gr.Audio( | |
| label="Input Audio", | |
| source="microphone", | |
| type="filepath" | |
| ) | |
| btn = gr.Button("Transcribe") | |
| text = gr.Textbox() | |
| btn.click(inference, inputs=[audio], outputs=[text], api_name="transcribe") | |
| block.launch() |