|
|
|
|
|
"""
|
|
|
Script de transcription audio avec Whisper
|
|
|
Traite tous les fichiers audio du dossier input et génère les transcriptions dans output/transcriptions
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
import whisper
|
|
|
from pathlib import Path
|
|
|
import time
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
try:
|
|
|
from portable_env import setup_portable_env
|
|
|
setup_portable_env()
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
ffmpeg_paths = [
|
|
|
r"C:\FFmpeg\bin",
|
|
|
r"C:\Program Files\FFmpeg\bin",
|
|
|
r"C:\Users\victo\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin"
|
|
|
]
|
|
|
|
|
|
for ffmpeg_path in ffmpeg_paths:
|
|
|
if os.path.exists(ffmpeg_path) and ffmpeg_path not in os.environ.get("PATH", ""):
|
|
|
os.environ["PATH"] = ffmpeg_path + ";" + os.environ.get("PATH", "")
|
|
|
|
|
|
|
|
|
|
|
|
INPUT_DIR = Path(os.environ.get("BOB_INPUT_DIR", Path(__file__).parent.parent / "input"))
|
|
|
OUTPUT_DIR = Path(os.environ.get("BOB_TRANSCRIPTIONS_DIR", Path(__file__).parent.parent / "output" / "transcriptions"))
|
|
|
WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "small")
|
|
|
SUPPORTED_FORMATS = ['.mp3', '.wav', '.m4a', '.flac', '.ogg', '.mp4', '.avi', '.mov']
|
|
|
|
|
|
def load_whisper_model(model_name):
|
|
|
"""Charge un modèle de transcription.
|
|
|
- 'faster-whisper:small|medium|large-v3|...' utilise faster_whisper.WhisperModel
|
|
|
- sinon utilise openai whisper.load_model
|
|
|
Retourne un tuple (backend, model)
|
|
|
backend in {"openai", "faster"}
|
|
|
"""
|
|
|
print(f"Chargement du modèle Whisper: {model_name}")
|
|
|
if isinstance(model_name, str) and model_name.startswith("faster-whisper"):
|
|
|
|
|
|
size = model_name.split(":", 1)[1] if ":" in model_name else "small"
|
|
|
try:
|
|
|
from faster_whisper import WhisperModel
|
|
|
except Exception as e:
|
|
|
raise RuntimeError(f"faster-whisper non disponible: {e}")
|
|
|
|
|
|
|
|
|
compute_type = "int8"
|
|
|
model = WhisperModel(size, device="cpu", compute_type=compute_type)
|
|
|
print("Modèle faster-whisper prêt.")
|
|
|
return ("faster", model)
|
|
|
else:
|
|
|
model = whisper.load_model(model_name)
|
|
|
print("Modèle OpenAI Whisper prêt.")
|
|
|
return ("openai", model)
|
|
|
|
|
|
def get_audio_files(input_dir):
|
|
|
"""Récupère tous les fichiers audio du dossier input"""
|
|
|
audio_files = []
|
|
|
if not input_dir.exists():
|
|
|
return audio_files
|
|
|
|
|
|
for file in input_dir.iterdir():
|
|
|
if file.is_file():
|
|
|
|
|
|
file_ext = file.suffix.lower()
|
|
|
if file_ext in [ext.lower() for ext in SUPPORTED_FORMATS]:
|
|
|
audio_files.append(file)
|
|
|
|
|
|
return sorted(list(set(audio_files)))
|
|
|
|
|
|
def transcribe_file(model, audio_file, output_dir):
|
|
|
"""Transcrit un fichier audio et sauvegarde le résultat"""
|
|
|
print(f"Transcription de: {audio_file.name}")
|
|
|
print(f"Chemin complet: {audio_file.absolute()}")
|
|
|
print(f"Fichier existe: {audio_file.exists()}")
|
|
|
print(f"Taille du fichier: {audio_file.stat().st_size if audio_file.exists() else 'N/A'} bytes")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not audio_file.exists():
|
|
|
print(f"✗ Fichier introuvable: {audio_file}")
|
|
|
return False
|
|
|
|
|
|
|
|
|
audio_path = str(audio_file.absolute())
|
|
|
print(f"Chemin utilisé pour Whisper: {audio_path}")
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
if isinstance(model, tuple) and model and model[0] in ("openai", "faster"):
|
|
|
backend, engine = model
|
|
|
else:
|
|
|
|
|
|
backend, engine = ("openai", model)
|
|
|
|
|
|
if backend == "openai":
|
|
|
result = engine.transcribe(audio_path, language="fr", verbose=False)
|
|
|
text = result["text"]
|
|
|
else:
|
|
|
|
|
|
segments, info = engine.transcribe(audio_path, language="fr")
|
|
|
pieces = []
|
|
|
for seg in segments:
|
|
|
pieces.append(seg.text)
|
|
|
text = " ".join(pieces).strip()
|
|
|
end_time = time.time()
|
|
|
|
|
|
|
|
|
output_filename = audio_file.stem + "_transcription.txt"
|
|
|
output_path = output_dir / output_filename
|
|
|
|
|
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
|
f.write(f"Fichier source: {audio_file.name}\n")
|
|
|
f.write(f"Date de transcription: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}\n")
|
|
|
f.write(f"Durée de traitement: {end_time - start_time:.2f} secondes\n")
|
|
|
f.write(f"Modèle utilisé: {WHISPER_MODEL}\n")
|
|
|
f.write("-" * 50 + "\n\n")
|
|
|
f.write(text)
|
|
|
|
|
|
print(f"✓ Transcription sauvegardée: {output_filename}")
|
|
|
print(f" Durée de traitement: {end_time - start_time:.2f}s")
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"✗ Erreur lors de la transcription de {audio_file.name}: {e}")
|
|
|
print(f"Type d'erreur: {type(e).__name__}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return False
|
|
|
|
|
|
def main():
|
|
|
"""Fonction principale"""
|
|
|
print("=" * 60)
|
|
|
print("TRANSCRIPTION AUTOMATIQUE DES BOB")
|
|
|
print("=" * 60)
|
|
|
|
|
|
|
|
|
script_dir = Path(__file__).parent.absolute()
|
|
|
input_dir = Path(os.environ.get("BOB_INPUT_DIR", script_dir.parent / "input"))
|
|
|
output_dir = Path(os.environ.get("BOB_TRANSCRIPTIONS_DIR", script_dir.parent / "output" / "transcriptions"))
|
|
|
|
|
|
print(f"Dossier script: {script_dir}")
|
|
|
print(f"Dossier input: {input_dir}")
|
|
|
print(f"Dossier output: {output_dir}")
|
|
|
print()
|
|
|
|
|
|
if not input_dir.exists():
|
|
|
print(f"Erreur: Le dossier input n'existe pas: {input_dir}")
|
|
|
return
|
|
|
|
|
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
audio_files = get_audio_files(input_dir)
|
|
|
|
|
|
if not audio_files:
|
|
|
print(f"Aucun fichier audio trouvé dans {input_dir}")
|
|
|
print(f"Formats supportés: {', '.join(SUPPORTED_FORMATS)}")
|
|
|
return
|
|
|
|
|
|
print(f"Trouvé {len(audio_files)} fichier(s) audio à traiter:")
|
|
|
for i, file in enumerate(audio_files, 1):
|
|
|
print(f" {i}. {file.name}")
|
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
try:
|
|
|
model = load_whisper_model(WHISPER_MODEL)
|
|
|
except Exception as e:
|
|
|
print(f"Erreur lors du chargement du modèle: {e}")
|
|
|
return
|
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
success_count = 0
|
|
|
total_start_time = time.time()
|
|
|
|
|
|
for i, audio_file in enumerate(audio_files, 1):
|
|
|
print(f"[{i}/{len(audio_files)}] ", end="")
|
|
|
|
|
|
if transcribe_file(model, audio_file, output_dir):
|
|
|
success_count += 1
|
|
|
|
|
|
print()
|
|
|
|
|
|
total_end_time = time.time()
|
|
|
|
|
|
|
|
|
print("=" * 60)
|
|
|
print("RÉSUMÉ")
|
|
|
print("=" * 60)
|
|
|
print(f"Fichiers traités: {len(audio_files)}")
|
|
|
print(f"Réussites: {success_count}")
|
|
|
print(f"Échecs: {len(audio_files) - success_count}")
|
|
|
print(f"Durée totale: {total_end_time - total_start_time:.2f} secondes")
|
|
|
print(f"Transcriptions sauvegardées dans: {output_dir}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main() |