Spaces:
Running
Running
feat: Handle markdown in model output and fix dependencies
Browse files- main.py +14 -2
- requirements.txt +10 -9
main.py
CHANGED
|
@@ -173,6 +173,16 @@ def split_audio_webrtcvad(audio_segment, min_silence_len):
|
|
| 173 |
return final_chunks
|
| 174 |
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
def validate_and_correct_segments(segments_from_api, chunk_duration_ms):
|
| 177 |
corrected_segments, last_corrected_end_ms = [], 0
|
| 178 |
for seg in segments_from_api:
|
|
@@ -234,7 +244,8 @@ def transcribe_chunk(chunk_audio, api_key, system_prompt, pydantic_schema, model
|
|
| 234 |
response_schema=list[pydantic_schema]
|
| 235 |
)
|
| 236 |
)
|
| 237 |
-
|
|
|
|
| 238 |
|
| 239 |
|
| 240 |
def transcribe_chunk_plain_text(chunk_audio, api_key, system_prompt, model_name, user_prompt):
|
|
@@ -260,7 +271,8 @@ def transcribe_chunk_plain_text(chunk_audio, api_key, system_prompt, model_name,
|
|
| 260 |
contents=contents,
|
| 261 |
config=types.GenerateContentConfig(system_instruction=system_prompt)
|
| 262 |
)
|
| 263 |
-
|
|
|
|
| 264 |
except Exception as e:
|
| 265 |
logging.error(f"Error in transcribe_chunk_plain_text: {e}")
|
| 266 |
return None, str(e)
|
|
|
|
| 173 |
return final_chunks
|
| 174 |
|
| 175 |
|
| 176 |
+
def _trim_markdown_fences(text: str) -> str:
|
| 177 |
+
"""Removes markdown code block fences from a string."""
|
| 178 |
+
text = text.strip()
|
| 179 |
+
if text.startswith("```") and text.endswith("```"):
|
| 180 |
+
text = text[3:-3].strip()
|
| 181 |
+
if text.startswith("json"):
|
| 182 |
+
text = text[4:].strip()
|
| 183 |
+
return text
|
| 184 |
+
|
| 185 |
+
|
| 186 |
def validate_and_correct_segments(segments_from_api, chunk_duration_ms):
|
| 187 |
corrected_segments, last_corrected_end_ms = [], 0
|
| 188 |
for seg in segments_from_api:
|
|
|
|
| 244 |
response_schema=list[pydantic_schema]
|
| 245 |
)
|
| 246 |
)
|
| 247 |
+
cleaned_text = _trim_markdown_fences(response.text)
|
| 248 |
+
return json.loads(cleaned_text), None
|
| 249 |
|
| 250 |
|
| 251 |
def transcribe_chunk_plain_text(chunk_audio, api_key, system_prompt, model_name, user_prompt):
|
|
|
|
| 271 |
contents=contents,
|
| 272 |
config=types.GenerateContentConfig(system_instruction=system_prompt)
|
| 273 |
)
|
| 274 |
+
cleaned_text = _trim_markdown_fences(response.text)
|
| 275 |
+
return cleaned_text, None
|
| 276 |
except Exception as e:
|
| 277 |
logging.error(f"Error in transcribe_chunk_plain_text: {e}")
|
| 278 |
return None, str(e)
|
requirements.txt
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
-
fastapi
|
| 2 |
-
uvicorn[standard]
|
| 3 |
-
python-multipart
|
| 4 |
-
google-genai
|
| 5 |
-
pydub
|
| 6 |
-
PyYAML
|
| 7 |
-
Jinja2
|
| 8 |
-
pydantic
|
| 9 |
-
webrtcvad
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
python-multipart
|
| 4 |
+
google-genai
|
| 5 |
+
pydub
|
| 6 |
+
PyYAML
|
| 7 |
+
Jinja2
|
| 8 |
+
pydantic
|
| 9 |
+
webrtcvad
|
| 10 |
+
setuptools
|