Spaces:

learnmlf
/

Acfoley

Sleeping

learnmlf commited on 5 days ago

Commit

d4263af

1 Parent(s): 2c1dff6

Fix audio saving with torchcodec dependency

- Add torchcodec to requirements.txt for proper audio encoding
- Add soundfile as backup audio saving library
- Implement fallback audio saving methods:
1. torchaudio.save (primary)
2. soundfile.write (backup)
3. scipy.io.wavfile.write (last resort)
- Improve error handling for audio saving failures
- Ensure robust audio output regardless of codec availability

Files changed (2) hide show

app.py +19 -2
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -291,8 +291,25 @@ class AudioFoleyModel:
                 output_filename = f"generated_audio_{timestamp}.wav"
                 permanent_path = f"/tmp/{output_filename}"
-                # Save audio file
-                torchaudio.save(permanent_path, audio, self.model.seq_cfg.sampling_rate)
                 # Verify file was created
                 if not os.path.exists(permanent_path):

                 output_filename = f"generated_audio_{timestamp}.wav"
                 permanent_path = f"/tmp/{output_filename}"
+                # Save audio file with fallback methods
+                try:
+                    # Try with torchaudio first
+                    torchaudio.save(permanent_path, audio, self.model.seq_cfg.sampling_rate)
+                except Exception as e:
+                    log.warning(f"torchaudio.save failed: {e}, trying alternative method...")
+                    try:
+                        # Fallback: use soundfile if available
+                        import soundfile as sf
+                        sf.write(permanent_path, audio.numpy().T, self.model.seq_cfg.sampling_rate)
+                    except ImportError:
+                        try:
+                            # Fallback: use scipy.io.wavfile
+                            from scipy.io.wavfile import write
+                            # Convert to int16 for wav format
+                            audio_int16 = (audio * 32767).clamp(-32768, 32767).to(torch.int16)
+                            write(permanent_path, self.model.seq_cfg.sampling_rate, audio_int16.numpy().T)
+                        except Exception as e2:
+                            return None, f"❌ 音频保存失败: {str(e2)}"
                 # Verify file was created
                 if not os.path.exists(permanent_path):

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 torch>=2.0.0
 torchvision
 torchaudio
 gradio>=4.0.0
 huggingface_hub>=0.26.0
 numpy>=1.21.0,<2.1
@@ -11,6 +12,7 @@ tqdm>=4.66.1
 einops>=0.6.0
 requests
 librosa>=0.8.1
 av>=14.0.1
 timm>=1.0.12
 open_clip_torch>=2.29.0

 torch>=2.0.0
 torchvision
 torchaudio
+torchcodec
 gradio>=4.0.0
 huggingface_hub>=0.26.0
 numpy>=1.21.0,<2.1
 einops>=0.6.0
 requests
 librosa>=0.8.1
+soundfile>=0.12.1
 av>=14.0.1
 timm>=1.0.12
 open_clip_torch>=2.29.0