import torch import gradio as gr import librosa from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor import os # 1. CONFIGURATION MODEL_ID = "facebook/wav2vec2-xls-r-300m" QUANTIZED_MODEL_PATH = "quantized_model.pth" # 2. LOAD MODEL print("Loading model architecture...") # Load architecture model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2) feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID) # Apply quantization structure (Must match how you saved it) model = torch.quantization.quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8 ) # Load weights # Check if the quantized model file exists to avoid immediate crash if os.path.exists(QUANTIZED_MODEL_PATH): print("Loading quantized weights...") model.load_state_dict(torch.load(QUANTIZED_MODEL_PATH, map_location=torch.device('cpu'))) else: print(f"Warning: {QUANTIZED_MODEL_PATH} not found. Using random weights (Model will not work correctly).") model.eval() # 3. PREDICTION FUNCTION def predict_audio(audio_path): # Gradio passes None if the user clears the input if audio_path is None: return "No Audio Provided" try: # Load and resample using librosa (handles filepath from upload OR mic) speech_array, sr = librosa.load(audio_path, sr=16000) inputs = feature_extractor( speech_array, sampling_rate=16000, return_tensors="pt", padding=True ) with torch.no_grad(): logits = model(**inputs).logits probs = torch.nn.functional.softmax(logits, dim=-1) # Label 0 = Real, Label 1 = Deepfake fake_prob = probs[0][1].item() real_prob = probs[0][0].item() return { "Deepfake": fake_prob, "Real": real_prob } except Exception as e: return f"Error processing audio: {str(e)}" # 4. CREATE INTERFACE iface = gr.Interface( fn=predict_audio, inputs=gr.Audio( sources=["upload", "microphone"], # <--- MODIFIED HERE type="filepath", # Keep as filepath so librosa can load it label="Upload or Record Audio" ), outputs=gr.Label(num_top_classes=2), title="Deepfake Audio Detection API", description="Upload an audio file or record your voice to check if it's real or fake." ) iface.launch()