|
|
|
|
|
""" |
|
|
BOB Processor - Interface graphique (Hugging Face version) |
|
|
Application GUI pour traiter automatiquement les fichiers audio BOB |
|
|
""" |
|
|
|
|
|
import sys |
|
|
import os |
|
|
import threading |
|
|
import time |
|
|
from pathlib import Path |
|
|
from PyQt5.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, |
|
|
QWidget, QPushButton, QLabel, QProgressBar, QTextEdit, |
|
|
QFileDialog, QMessageBox, QFrame, QGridLayout, QCheckBox, |
|
|
QComboBox, QGroupBox) |
|
|
from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer |
|
|
from PyQt5.QtGui import QFont, QPixmap, QIcon |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
from portable_env import setup_portable_env |
|
|
setup_portable_env() |
|
|
except Exception as _e: |
|
|
|
|
|
pass |
|
|
|
|
|
from transcribe_audio import transcribe_file, load_whisper_model, get_audio_files |
|
|
from analyze_bob_hf import analyze_files_hf |
|
|
|
|
|
|
|
|
def _hide_windows_console(): |
|
|
if os.name == 'nt': |
|
|
try: |
|
|
import ctypes |
|
|
hwnd = ctypes.windll.kernel32.GetConsoleWindow() |
|
|
if hwnd: |
|
|
ctypes.windll.user32.ShowWindow(hwnd, 0) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
class WorkerThread(QThread): |
|
|
"""Thread pour le traitement en arrière-plan""" |
|
|
progress = pyqtSignal(int) |
|
|
log = pyqtSignal(str) |
|
|
finished = pyqtSignal(str) |
|
|
|
|
|
def __init__(self, input_dir, output_dir, whisper_model, hf_model_name: str, fast_mode: bool): |
|
|
super().__init__() |
|
|
self.input_dir = Path(input_dir) |
|
|
self.output_dir = Path(output_dir) |
|
|
self.whisper_model = whisper_model |
|
|
self.hf_model_name = hf_model_name |
|
|
self.fast_mode = fast_mode |
|
|
self.is_cancelled = False |
|
|
|
|
|
def run(self): |
|
|
try: |
|
|
|
|
|
transcriptions_dir = self.output_dir / "transcriptions" |
|
|
transcriptions_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
self.log.emit("🚀 Début du traitement BOB...") |
|
|
self.log.emit(f"📁 Dossier d'entrée: {self.input_dir}") |
|
|
self.log.emit(f"📁 Dossier de sortie: {self.output_dir}") |
|
|
|
|
|
|
|
|
audio_files = get_audio_files(self.input_dir) |
|
|
if not audio_files: |
|
|
self.log.emit("❌ Aucun fichier audio trouvé!") |
|
|
self.finished.emit("Aucun fichier audio trouvé dans le dossier sélectionné.") |
|
|
return |
|
|
|
|
|
total_files = len(audio_files) |
|
|
self.log.emit(f"📊 {total_files} fichier(s) audio trouvé(s)") |
|
|
|
|
|
|
|
|
self.log.emit(f"🧠 Chargement du modèle Whisper ({self.whisper_model})...") |
|
|
model = load_whisper_model(self.whisper_model) |
|
|
self.log.emit("✅ Modèle Whisper chargé avec succès!") |
|
|
|
|
|
|
|
|
self.log.emit("🎵 Début de la transcription...") |
|
|
for i, audio_file in enumerate(audio_files): |
|
|
if self.is_cancelled: |
|
|
break |
|
|
|
|
|
self.log.emit(f"📝 [{i+1}/{total_files}] Transcription: {audio_file.name}") |
|
|
|
|
|
|
|
|
success = transcribe_file(model, audio_file, transcriptions_dir) |
|
|
|
|
|
if success: |
|
|
self.log.emit(f"✅ Transcrit: {audio_file.name}") |
|
|
else: |
|
|
self.log.emit(f"❌ Erreur: {audio_file.name}") |
|
|
|
|
|
|
|
|
progress = int(((i + 1) / total_files) * 70) |
|
|
self.progress.emit(progress) |
|
|
|
|
|
if self.is_cancelled: |
|
|
self.finished.emit("Traitement annulé par l'utilisateur.") |
|
|
return |
|
|
|
|
|
|
|
|
self.log.emit("🤖 Début de l'analyse avec Hugging Face...") |
|
|
self.log.emit(f"🤖 Modèle HF sélectionné: {self.hf_model_name} | Mode rapide: {'ON' if self.fast_mode else 'OFF'}") |
|
|
|
|
|
|
|
|
import io |
|
|
import contextlib |
|
|
|
|
|
|
|
|
captured_output = io.StringIO() |
|
|
with contextlib.redirect_stdout(captured_output): |
|
|
try: |
|
|
|
|
|
os.environ["HF_MODEL"] = self.hf_model_name |
|
|
|
|
|
|
|
|
def prog(cur, total): |
|
|
base = 70 |
|
|
span = 30 |
|
|
val = base + int((cur / total) * span) |
|
|
self.progress.emit(min(99, val)) |
|
|
def cancelled(): |
|
|
return self.is_cancelled |
|
|
def logger(*args, **kwargs): |
|
|
msg = " ".join(str(a) for a in args) |
|
|
print(msg) |
|
|
|
|
|
result = analyze_files_hf( |
|
|
transcriptions_dir=transcriptions_dir, |
|
|
input_dir=Path(self.input_dir), |
|
|
output_file=Path(self.output_dir) / "resume_bob.txt", |
|
|
log_fn=logger, |
|
|
progress_fn=prog, |
|
|
cancel_fn=cancelled, |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
self.log.emit(f"❌ Erreur lors de l'analyse: {e}") |
|
|
|
|
|
|
|
|
analysis_output = captured_output.getvalue() |
|
|
for line in analysis_output.split('\n'): |
|
|
if line.strip(): |
|
|
self.log.emit(line) |
|
|
|
|
|
self.progress.emit(100) |
|
|
self.log.emit("🎉 Traitement terminé avec succès!") |
|
|
|
|
|
|
|
|
result_file = self.output_dir / "resume_bob.txt" |
|
|
if result_file.exists(): |
|
|
self.finished.emit(f"Traitement terminé! Résultats sauvegardés dans:\n{result_file}") |
|
|
else: |
|
|
self.finished.emit("Traitement terminé mais fichier de résultat introuvable.") |
|
|
|
|
|
except Exception as e: |
|
|
self.log.emit(f"❌ Erreur critique: {e}") |
|
|
self.finished.emit(f"Erreur lors du traitement: {e}") |
|
|
|
|
|
def cancel(self): |
|
|
self.is_cancelled = True |
|
|
|
|
|
class BOBProcessorGUI(QMainWindow): |
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
self.worker_thread = None |
|
|
self.input_dir = None |
|
|
self.output_dir = None |
|
|
|
|
|
self.elapsed_seconds = 0 |
|
|
self.timer = QTimer(self) |
|
|
self.timer.timeout.connect(self._tick_elapsed) |
|
|
self.init_ui() |
|
|
|
|
|
def init_ui(self): |
|
|
self.setWindowTitle("BOB Processor - Hugging Face Version") |
|
|
self.setGeometry(100, 100, 800, 600) |
|
|
|
|
|
|
|
|
central_widget = QWidget() |
|
|
self.setCentralWidget(central_widget) |
|
|
|
|
|
|
|
|
layout = QVBoxLayout(central_widget) |
|
|
|
|
|
|
|
|
title = QLabel("🎵 BOB 🤖 (Hugging Face)") |
|
|
title.setAlignment(Qt.AlignCenter) |
|
|
title.setFont(QFont("Arial", 18, QFont.Bold)) |
|
|
title.setStyleSheet("color: #2c3e50; margin: 10px;") |
|
|
layout.addWidget(title) |
|
|
|
|
|
subtitle = QLabel("Transcription automatique et analyse intelligente des sujets") |
|
|
subtitle.setAlignment(Qt.AlignCenter) |
|
|
subtitle.setFont(QFont("Arial", 10)) |
|
|
subtitle.setStyleSheet("color: #7f8c8d; margin-bottom: 20px;") |
|
|
layout.addWidget(subtitle) |
|
|
|
|
|
|
|
|
line = QFrame() |
|
|
line.setFrameShape(QFrame.HLine) |
|
|
line.setFrameShadow(QFrame.Sunken) |
|
|
layout.addWidget(line) |
|
|
|
|
|
|
|
|
config_group = QGroupBox("⚙️ Configuration") |
|
|
config_group.setFont(QFont("Arial", 12, QFont.Bold)) |
|
|
config_layout = QGridLayout(config_group) |
|
|
|
|
|
|
|
|
self.input_label = QLabel("📁 Dossier d'entrée (MP3): Aucun dossier sélectionné") |
|
|
self.input_label.setFont(QFont("Arial", 11)) |
|
|
self.input_label.setStyleSheet("padding: 8px; background-color: #ecf0f1; border-radius: 3px; font-size: 11pt;") |
|
|
config_layout.addWidget(self.input_label, 0, 0, 1, 2) |
|
|
|
|
|
self.input_btn = QPushButton("Choisir dossier MP3") |
|
|
self.input_btn.setFont(QFont("Arial", 11, QFont.Bold)) |
|
|
self.input_btn.clicked.connect(self.select_input_dir) |
|
|
self.input_btn.setStyleSheet("padding: 10px; background-color: #3498db; color: white; border-radius: 3px; font-size: 11pt; font-weight: bold;") |
|
|
config_layout.addWidget(self.input_btn, 0, 2) |
|
|
|
|
|
self.output_label = QLabel("📁 Dossier de sortie: Aucun dossier sélectionné") |
|
|
self.output_label.setFont(QFont("Arial", 11)) |
|
|
self.output_label.setStyleSheet("padding: 8px; background-color: #ecf0f1; border-radius: 3px; font-size: 11pt;") |
|
|
config_layout.addWidget(self.output_label, 1, 0, 1, 2) |
|
|
|
|
|
self.output_btn = QPushButton("Choisir dossier de sortie") |
|
|
self.output_btn.setFont(QFont("Arial", 11, QFont.Bold)) |
|
|
self.output_btn.clicked.connect(self.select_output_dir) |
|
|
self.output_btn.setStyleSheet("padding: 10px; background-color: #3498db; color: white; border-radius: 3px; font-size: 11pt; font-weight: bold;") |
|
|
config_layout.addWidget(self.output_btn, 1, 2) |
|
|
|
|
|
|
|
|
model_label = QLabel("🧠 Modèle Whisper:") |
|
|
model_label.setFont(QFont("Arial", 11, QFont.Bold)) |
|
|
config_layout.addWidget(model_label, 2, 0) |
|
|
self.model_combo = QComboBox() |
|
|
self.model_combo.setFont(QFont("Arial", 11)) |
|
|
self.model_combo.addItems([ |
|
|
"medium (recommandé)", |
|
|
"small (plus rapide mais moins précis)", |
|
|
"large (plus précis mais plus lent)", |
|
|
]) |
|
|
self.model_combo.setCurrentText("medium (recommandé)") |
|
|
self.model_combo.setStyleSheet("font-size: 11pt; padding: 5px;") |
|
|
config_layout.addWidget(self.model_combo, 2, 1) |
|
|
|
|
|
|
|
|
hf_label = QLabel("🤖 Modèle HF (analyse):") |
|
|
hf_label.setFont(QFont("Arial", 11, QFont.Bold)) |
|
|
config_layout.addWidget(hf_label, 3, 0) |
|
|
self.hf_combo = QComboBox() |
|
|
self.hf_combo.setFont(QFont("Arial", 11)) |
|
|
self.hf_combo.addItems([ |
|
|
"meta-llama/Llama-3.2-1B-Instruct (meilleur compromis)", |
|
|
"microsoft/Phi-3-mini-4k-instruct (rapide)", |
|
|
"mistralai/Mistral-7B-Instruct-v0.3 (meilleure qualité)", |
|
|
]) |
|
|
self.hf_combo.setCurrentText("meta-llama/Llama-3.2-1B-Instruct (meilleur compromis)") |
|
|
self.hf_combo.setStyleSheet("font-size: 11pt; padding: 5px;") |
|
|
config_layout.addWidget(self.hf_combo, 3, 1) |
|
|
|
|
|
|
|
|
hint = QLabel("ℹ️ Les fichiers MP3 doivent contenir le nom du journaliste pour extraire l’auteur (ex: ‘Marie Dupont.mp3’).") |
|
|
hint.setWordWrap(True) |
|
|
hint.setFont(QFont("Arial", 9)) |
|
|
hint.setStyleSheet("color: #8c0000; padding-top: 6px;") |
|
|
config_layout.addWidget(hint, 4, 0, 1, 3) |
|
|
|
|
|
layout.addWidget(config_group) |
|
|
|
|
|
|
|
|
controls_layout = QHBoxLayout() |
|
|
|
|
|
self.start_btn = QPushButton("▶️ DÉMARRER LE TRAITEMENT") |
|
|
self.start_btn.setFont(QFont("Arial", 12, QFont.Bold)) |
|
|
self.start_btn.clicked.connect(self.start_processing) |
|
|
self.start_btn.setEnabled(False) |
|
|
self.start_btn.setStyleSheet("padding: 15px; background-color: #27ae60; color: white; font-weight: bold; border-radius: 5px; font-size: 12pt;") |
|
|
|
|
|
self.cancel_btn = QPushButton("⏹️ ANNULER") |
|
|
self.cancel_btn.setFont(QFont("Arial", 12, QFont.Bold)) |
|
|
self.cancel_btn.clicked.connect(self.cancel_processing) |
|
|
self.cancel_btn.setEnabled(False) |
|
|
self.cancel_btn.setStyleSheet("padding: 15px; background-color: #e74c3c; color: white; font-weight: bold; border-radius: 5px; font-size: 12pt;") |
|
|
|
|
|
controls_layout.addWidget(self.start_btn) |
|
|
controls_layout.addWidget(self.cancel_btn) |
|
|
layout.addLayout(controls_layout) |
|
|
|
|
|
|
|
|
self.progress_bar = QProgressBar() |
|
|
self.progress_bar.setStyleSheet("QProgressBar { border: 2px solid grey; border-radius: 5px; text-align: center; } QProgressBar::chunk { background-color: #3498db; }") |
|
|
layout.addWidget(self.progress_bar) |
|
|
|
|
|
|
|
|
self.elapsed_label = QLabel("⏱️ Temps écoulé: 00:00") |
|
|
self.elapsed_label.setAlignment(Qt.AlignCenter) |
|
|
self.elapsed_label.setFont(QFont("Arial", 10, QFont.Bold)) |
|
|
self.elapsed_label.setStyleSheet("color: #2c3e50; margin-bottom: 10px;") |
|
|
layout.addWidget(self.elapsed_label) |
|
|
|
|
|
|
|
|
log_group = QGroupBox("📋 Journal d'exécution") |
|
|
log_group.setFont(QFont("Arial", 12, QFont.Bold)) |
|
|
log_layout = QVBoxLayout(log_group) |
|
|
|
|
|
self.log_text = QTextEdit() |
|
|
self.log_text.setReadOnly(True) |
|
|
self.log_text.setFont(QFont("Courier New", 10)) |
|
|
self.log_text.setStyleSheet("background-color: #2c3e50; color: #ecf0f1; font-family: 'Courier New'; font-size: 10pt;") |
|
|
self.log_text.append("📝 Prêt à traiter vos fichiers BOB...") |
|
|
log_layout.addWidget(self.log_text) |
|
|
|
|
|
layout.addWidget(log_group) |
|
|
|
|
|
|
|
|
self.check_start_conditions() |
|
|
|
|
|
def select_input_dir(self): |
|
|
dir_path = QFileDialog.getExistingDirectory(self, "Sélectionner le dossier contenant les fichiers MP3") |
|
|
if dir_path: |
|
|
self.input_dir = dir_path |
|
|
|
|
|
display_path = dir_path if len(dir_path) < 60 else "..." + dir_path[-57:] |
|
|
self.input_label.setText(f"📁 Dossier d'entrée: {display_path}") |
|
|
self.check_start_conditions() |
|
|
|
|
|
def select_output_dir(self): |
|
|
dir_path = QFileDialog.getExistingDirectory(self, "Sélectionner le dossier de sortie") |
|
|
if dir_path: |
|
|
self.output_dir = dir_path |
|
|
|
|
|
display_path = dir_path if len(dir_path) < 60 else "..." + dir_path[-57:] |
|
|
self.output_label.setText(f"📁 Dossier de sortie: {display_path}") |
|
|
self.check_start_conditions() |
|
|
|
|
|
def check_start_conditions(self): |
|
|
"""Vérifie si toutes les conditions sont remplies pour démarrer""" |
|
|
can_start = bool(self.input_dir and self.output_dir) |
|
|
self.start_btn.setEnabled(can_start) |
|
|
|
|
|
def start_processing(self): |
|
|
"""Démarre le traitement""" |
|
|
if not self.input_dir or not self.output_dir: |
|
|
QMessageBox.warning(self, "Erreur", "Veuillez sélectionner les dossiers d'entrée et de sortie.") |
|
|
return |
|
|
|
|
|
|
|
|
model_text = self.model_combo.currentText() |
|
|
whisper_model = model_text.split()[0] |
|
|
|
|
|
|
|
|
hf_choice = self.hf_combo.currentText() |
|
|
if "Llama-3.2-1B" in hf_choice: |
|
|
hf_model_name = "meta-llama/Llama-3.2-1B-Instruct" |
|
|
fast_mode = True |
|
|
elif "Phi-3" in hf_choice: |
|
|
hf_model_name = "microsoft/Phi-3-mini-4k-instruct" |
|
|
fast_mode = True |
|
|
else: |
|
|
hf_model_name = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
fast_mode = False |
|
|
|
|
|
|
|
|
os.environ["BOB_INPUT_DIR"] = str(self.input_dir) |
|
|
os.environ["BOB_TRANSCRIPTIONS_DIR"] = str(Path(self.output_dir) / "transcriptions") |
|
|
os.environ["BOB_OUTPUT_FILE"] = str(Path(self.output_dir) / "resume_bob.txt") |
|
|
os.environ["WHISPER_MODEL"] = whisper_model |
|
|
os.environ["HF_MODEL"] = hf_model_name |
|
|
|
|
|
self.worker_thread = WorkerThread(self.input_dir, self.output_dir, whisper_model, hf_model_name, fast_mode) |
|
|
self.worker_thread.progress.connect(self.update_progress) |
|
|
self.worker_thread.log.connect(self.add_log) |
|
|
self.worker_thread.finished.connect(self.processing_finished) |
|
|
|
|
|
|
|
|
self.start_btn.setEnabled(False) |
|
|
self.cancel_btn.setEnabled(True) |
|
|
self.progress_bar.setValue(0) |
|
|
self.log_text.clear() |
|
|
|
|
|
|
|
|
|
|
|
self.elapsed_seconds = 0 |
|
|
self._update_elapsed_label() |
|
|
self.timer.start(1000) |
|
|
|
|
|
self.worker_thread.start() |
|
|
|
|
|
def cancel_processing(self): |
|
|
"""Annule le traitement en cours""" |
|
|
if self.worker_thread and self.worker_thread.isRunning(): |
|
|
self.worker_thread.cancel() |
|
|
self.add_log("⏹️ Annulation en cours...") |
|
|
|
|
|
def update_progress(self, value): |
|
|
"""Met à jour la barre de progression""" |
|
|
self.progress_bar.setValue(value) |
|
|
|
|
|
def add_log(self, message): |
|
|
"""Ajoute un message au journal""" |
|
|
timestamp = time.strftime("%H:%M:%S") |
|
|
self.log_text.append(f"[{timestamp}] {message}") |
|
|
|
|
|
|
|
|
scrollbar = self.log_text.verticalScrollBar() |
|
|
scrollbar.setValue(scrollbar.maximum()) |
|
|
|
|
|
def processing_finished(self, message): |
|
|
"""Traitement terminé""" |
|
|
self.start_btn.setEnabled(True) |
|
|
self.cancel_btn.setEnabled(False) |
|
|
|
|
|
if self.timer.isActive(): |
|
|
self.timer.stop() |
|
|
|
|
|
|
|
|
if "Erreur" in message or "annulé" in message: |
|
|
QMessageBox.warning(self, "Traitement terminé", message) |
|
|
else: |
|
|
QMessageBox.information(self, "Succès!", message) |
|
|
|
|
|
|
|
|
def _tick_elapsed(self): |
|
|
self.elapsed_seconds += 1 |
|
|
self._update_elapsed_label() |
|
|
|
|
|
def _update_elapsed_label(self): |
|
|
m = self.elapsed_seconds // 60 |
|
|
s = self.elapsed_seconds % 60 |
|
|
self.elapsed_label.setText(f"⏱️ Temps écoulé: {m:02d}:{s:02d}") |
|
|
|
|
|
def main(): |
|
|
_hide_windows_console() |
|
|
app = QApplication(sys.argv) |
|
|
app.setStyle('Fusion') |
|
|
|
|
|
window = BOBProcessorGUI() |
|
|
window.show() |
|
|
|
|
|
sys.exit(app.exec_()) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|