Fix audio saving with torchcodec dependency
Browse files- Add torchcodec to requirements.txt for proper audio encoding
- Add soundfile as backup audio saving library
- Implement fallback audio saving methods:
1. torchaudio.save (primary)
2. soundfile.write (backup)
3. scipy.io.wavfile.write (last resort)
- Improve error handling for audio saving failures
- Ensure robust audio output regardless of codec availability
- app.py +19 -2
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -291,8 +291,25 @@ class AudioFoleyModel:
|
|
| 291 |
output_filename = f"generated_audio_{timestamp}.wav"
|
| 292 |
permanent_path = f"/tmp/{output_filename}"
|
| 293 |
|
| 294 |
-
# Save audio file
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
# Verify file was created
|
| 298 |
if not os.path.exists(permanent_path):
|
|
|
|
| 291 |
output_filename = f"generated_audio_{timestamp}.wav"
|
| 292 |
permanent_path = f"/tmp/{output_filename}"
|
| 293 |
|
| 294 |
+
# Save audio file with fallback methods
|
| 295 |
+
try:
|
| 296 |
+
# Try with torchaudio first
|
| 297 |
+
torchaudio.save(permanent_path, audio, self.model.seq_cfg.sampling_rate)
|
| 298 |
+
except Exception as e:
|
| 299 |
+
log.warning(f"torchaudio.save failed: {e}, trying alternative method...")
|
| 300 |
+
try:
|
| 301 |
+
# Fallback: use soundfile if available
|
| 302 |
+
import soundfile as sf
|
| 303 |
+
sf.write(permanent_path, audio.numpy().T, self.model.seq_cfg.sampling_rate)
|
| 304 |
+
except ImportError:
|
| 305 |
+
try:
|
| 306 |
+
# Fallback: use scipy.io.wavfile
|
| 307 |
+
from scipy.io.wavfile import write
|
| 308 |
+
# Convert to int16 for wav format
|
| 309 |
+
audio_int16 = (audio * 32767).clamp(-32768, 32767).to(torch.int16)
|
| 310 |
+
write(permanent_path, self.model.seq_cfg.sampling_rate, audio_int16.numpy().T)
|
| 311 |
+
except Exception as e2:
|
| 312 |
+
return None, f"❌ 音频保存失败: {str(e2)}"
|
| 313 |
|
| 314 |
# Verify file was created
|
| 315 |
if not os.path.exists(permanent_path):
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
torch>=2.0.0
|
| 2 |
torchvision
|
| 3 |
torchaudio
|
|
|
|
| 4 |
gradio>=4.0.0
|
| 5 |
huggingface_hub>=0.26.0
|
| 6 |
numpy>=1.21.0,<2.1
|
|
@@ -11,6 +12,7 @@ tqdm>=4.66.1
|
|
| 11 |
einops>=0.6.0
|
| 12 |
requests
|
| 13 |
librosa>=0.8.1
|
|
|
|
| 14 |
av>=14.0.1
|
| 15 |
timm>=1.0.12
|
| 16 |
open_clip_torch>=2.29.0
|
|
|
|
| 1 |
torch>=2.0.0
|
| 2 |
torchvision
|
| 3 |
torchaudio
|
| 4 |
+
torchcodec
|
| 5 |
gradio>=4.0.0
|
| 6 |
huggingface_hub>=0.26.0
|
| 7 |
numpy>=1.21.0,<2.1
|
|
|
|
| 12 |
einops>=0.6.0
|
| 13 |
requests
|
| 14 |
librosa>=0.8.1
|
| 15 |
+
soundfile>=0.12.1
|
| 16 |
av>=14.0.1
|
| 17 |
timm>=1.0.12
|
| 18 |
open_clip_torch>=2.29.0
|