| | import os
|
| | from uuid import uuid4
|
| | import edge_tts
|
| | from groq import Groq
|
| | from dotenv import load_dotenv
|
| |
|
| | load_dotenv()
|
| |
|
| | client = Groq()
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | async def STT(audio_file):
|
| | os.makedirs("uploads", exist_ok=True)
|
| | file_path = f"uploads/{uuid4().hex}.wav"
|
| |
|
| | with open(file_path, "wb") as f:
|
| | f.write(await audio_file.read())
|
| |
|
| | with open(file_path, "rb") as f:
|
| | transcription = client.audio.transcriptions.create(
|
| | file=f,
|
| | model="whisper-large-v3-turbo",
|
| | response_format="verbose_json",
|
| | temperature=0.0
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | return {
|
| | "text": transcription.text,
|
| | "segments": transcription.segments,
|
| | "language": transcription.language
|
| | }
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | async def TTS(text: str, voice: str = "en-US-AriaNeural") -> str:
|
| | """
|
| | Converts text to speech and saves it to a file.
|
| | Returns the path to the generated audio file.
|
| | """
|
| | os.makedirs("outputs", exist_ok=True)
|
| | filename = f"outputs/{uuid4().hex}.mp3"
|
| |
|
| | communicate = edge_tts.Communicate(text, voice)
|
| | await communicate.save(filename)
|
| |
|
| | return filename |