Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- enhanced_casl_app.py +590 -0
- moderate_casl_app.py +7 -2
- moderate_casl_app_fixed.py +406 -0
- requirements.txt +15 -2
- simple_casl_app.py +1089 -112
- transcription_demo.py +826 -0
enhanced_casl_app.py
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import logging
|
| 5 |
+
import requests
|
| 6 |
+
import re
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
import time
|
| 11 |
+
from typing import Dict, List, Tuple, Optional
|
| 12 |
+
import tempfile
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
# Anthropic API key
|
| 19 |
+
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
| 20 |
+
|
| 21 |
+
# Try to import SpeechBrain and HuggingFace components
|
| 22 |
+
try:
|
| 23 |
+
from speechbrain.pretrained import EncoderDecoderASR, VAD, EncoderClassifier
|
| 24 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
| 25 |
+
import torch
|
| 26 |
+
SPEECHBRAIN_AVAILABLE = True
|
| 27 |
+
HUGGINGFACE_AVAILABLE = True
|
| 28 |
+
logger.info("SpeechBrain and HuggingFace models available")
|
| 29 |
+
except ImportError as e:
|
| 30 |
+
logger.warning(f"SpeechBrain/HuggingFace not available: {e}")
|
| 31 |
+
SPEECHBRAIN_AVAILABLE = False
|
| 32 |
+
HUGGINGFACE_AVAILABLE = False
|
| 33 |
+
|
| 34 |
+
# Initialize models if available
|
| 35 |
+
asr_model = None
|
| 36 |
+
vad_model = None
|
| 37 |
+
sentiment_model = None
|
| 38 |
+
emotion_model = None
|
| 39 |
+
|
| 40 |
+
if SPEECHBRAIN_AVAILABLE and HUGGINGFACE_AVAILABLE:
|
| 41 |
+
try:
|
| 42 |
+
# Speech-to-text model
|
| 43 |
+
asr_model = EncoderDecoderASR.from_hparams(
|
| 44 |
+
source="speechbrain/asr-crdnn-rnnlm-librispeech",
|
| 45 |
+
savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Voice Activity Detection
|
| 49 |
+
vad_model = VAD.from_hparams(
|
| 50 |
+
source="speechbrain/vad-crdnn-libriparty",
|
| 51 |
+
savedir="pretrained_models/vad-crdnn-libriparty"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Sentiment analysis
|
| 55 |
+
sentiment_model = pipeline(
|
| 56 |
+
"sentiment-analysis",
|
| 57 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 58 |
+
return_all_scores=True
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Emotion analysis
|
| 62 |
+
emotion_model = pipeline(
|
| 63 |
+
"text-classification",
|
| 64 |
+
model="j-hartmann/emotion-english-distilroberta-base",
|
| 65 |
+
return_all_scores=True
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
logger.info("All models loaded successfully")
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.error(f"Error loading models: {e}")
|
| 71 |
+
SPEECHBRAIN_AVAILABLE = False
|
| 72 |
+
HUGGINGFACE_AVAILABLE = False
|
| 73 |
+
|
| 74 |
+
def call_claude_api(prompt):
|
| 75 |
+
"""Call Claude API directly"""
|
| 76 |
+
if not ANTHROPIC_API_KEY:
|
| 77 |
+
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
headers = {
|
| 81 |
+
"Content-Type": "application/json",
|
| 82 |
+
"x-api-key": ANTHROPIC_API_KEY,
|
| 83 |
+
"anthropic-version": "2023-06-01"
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
data = {
|
| 87 |
+
"model": "claude-3-5-sonnet-20241022",
|
| 88 |
+
"max_tokens": 4096,
|
| 89 |
+
"messages": [
|
| 90 |
+
{
|
| 91 |
+
"role": "user",
|
| 92 |
+
"content": prompt
|
| 93 |
+
}
|
| 94 |
+
]
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
response = requests.post(
|
| 98 |
+
"https://api.anthropic.com/v1/messages",
|
| 99 |
+
headers=headers,
|
| 100 |
+
json=data,
|
| 101 |
+
timeout=60
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
if response.status_code == 200:
|
| 105 |
+
response_json = response.json()
|
| 106 |
+
return response_json['content'][0]['text']
|
| 107 |
+
else:
|
| 108 |
+
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 109 |
+
return f"β Claude API Error: {response.status_code}"
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.error(f"Error calling Claude API: {str(e)}")
|
| 113 |
+
return f"β Error: {str(e)}"
|
| 114 |
+
|
| 115 |
+
def transcribe_audio_with_metadata(audio_file):
|
| 116 |
+
"""Transcribe audio with timestamps, sentiment, and metadata"""
|
| 117 |
+
if not audio_file:
|
| 118 |
+
return None, "No audio file provided"
|
| 119 |
+
|
| 120 |
+
if not SPEECHBRAIN_AVAILABLE:
|
| 121 |
+
return None, "SpeechBrain not available - using demo transcription"
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
# Get transcription with timestamps
|
| 125 |
+
transcript = asr_model.transcribe_file(audio_file)
|
| 126 |
+
|
| 127 |
+
# Split into sentences for analysis
|
| 128 |
+
sentences = re.split(r'[.!?]+', transcript)
|
| 129 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 130 |
+
|
| 131 |
+
# Analyze each sentence
|
| 132 |
+
rich_transcript = []
|
| 133 |
+
current_time = 0
|
| 134 |
+
|
| 135 |
+
for i, sentence in enumerate(sentences):
|
| 136 |
+
# Estimate timestamp (rough approximation)
|
| 137 |
+
timestamp = current_time + (i * 2) # Assume ~2 seconds per sentence
|
| 138 |
+
|
| 139 |
+
# Sentiment analysis
|
| 140 |
+
sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
|
| 141 |
+
sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else {'label': 'neutral', 'score': 0.5}
|
| 142 |
+
|
| 143 |
+
# Emotion analysis
|
| 144 |
+
emotion_result = emotion_model(sentence)[0] if emotion_model else None
|
| 145 |
+
emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else {'label': 'neutral', 'score': 0.5}
|
| 146 |
+
|
| 147 |
+
# Word count and complexity metrics
|
| 148 |
+
words = sentence.split()
|
| 149 |
+
word_count = len(words)
|
| 150 |
+
avg_word_length = np.mean([len(word) for word in words]) if words else 0
|
| 151 |
+
|
| 152 |
+
# Calculate speech rate (words per minute estimate)
|
| 153 |
+
speech_rate = word_count * 30 / 60 # Rough estimate
|
| 154 |
+
|
| 155 |
+
rich_transcript.append({
|
| 156 |
+
'timestamp': timestamp,
|
| 157 |
+
'sentence': sentence,
|
| 158 |
+
'word_count': word_count,
|
| 159 |
+
'avg_word_length': round(avg_word_length, 2),
|
| 160 |
+
'speech_rate_wpm': round(speech_rate, 1),
|
| 161 |
+
'sentiment': sentiment['label'],
|
| 162 |
+
'sentiment_score': round(sentiment['score'], 3),
|
| 163 |
+
'emotion': emotion['label'],
|
| 164 |
+
'emotion_score': round(emotion['score'], 3)
|
| 165 |
+
})
|
| 166 |
+
|
| 167 |
+
current_time = timestamp
|
| 168 |
+
|
| 169 |
+
return rich_transcript, "Transcription completed successfully"
|
| 170 |
+
|
| 171 |
+
except Exception as e:
|
| 172 |
+
logger.error(f"Error in transcription: {e}")
|
| 173 |
+
return None, f"Transcription error: {str(e)}"
|
| 174 |
+
|
| 175 |
+
def format_rich_transcript(rich_transcript):
|
| 176 |
+
"""Format rich transcript for display"""
|
| 177 |
+
if not rich_transcript:
|
| 178 |
+
return "No transcript data available"
|
| 179 |
+
|
| 180 |
+
formatted_lines = []
|
| 181 |
+
for entry in rich_transcript:
|
| 182 |
+
timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
|
| 183 |
+
|
| 184 |
+
line = f"[{timestamp_str}] *PAR: {entry['sentence']}"
|
| 185 |
+
line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
|
| 186 |
+
line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
|
| 187 |
+
line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
|
| 188 |
+
|
| 189 |
+
formatted_lines.append(line)
|
| 190 |
+
|
| 191 |
+
return '\n'.join(formatted_lines)
|
| 192 |
+
|
| 193 |
+
def calculate_slp_metrics(rich_transcript):
|
| 194 |
+
"""Calculate comprehensive SLP metrics"""
|
| 195 |
+
if not rich_transcript:
|
| 196 |
+
return {}
|
| 197 |
+
|
| 198 |
+
# Basic metrics
|
| 199 |
+
total_sentences = len(rich_transcript)
|
| 200 |
+
total_words = sum(entry['word_count'] for entry in rich_transcript)
|
| 201 |
+
total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
|
| 202 |
+
|
| 203 |
+
# Word-level analysis
|
| 204 |
+
all_words = []
|
| 205 |
+
for entry in rich_transcript:
|
| 206 |
+
words = entry['sentence'].lower().split()
|
| 207 |
+
all_words.extend(words)
|
| 208 |
+
|
| 209 |
+
# Word frequency distribution
|
| 210 |
+
word_freq = {}
|
| 211 |
+
for word in all_words:
|
| 212 |
+
word_clean = re.sub(r'[^\w\s]', '', word)
|
| 213 |
+
if word_clean:
|
| 214 |
+
word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
|
| 215 |
+
|
| 216 |
+
# Vocabulary diversity (Type-Token Ratio)
|
| 217 |
+
unique_words = len(set(all_words))
|
| 218 |
+
ttr = unique_words / total_words if total_words > 0 else 0
|
| 219 |
+
|
| 220 |
+
# Speech rate analysis
|
| 221 |
+
speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
|
| 222 |
+
avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
|
| 223 |
+
|
| 224 |
+
# Sentiment analysis
|
| 225 |
+
sentiment_counts = {}
|
| 226 |
+
emotion_counts = {}
|
| 227 |
+
for entry in rich_transcript:
|
| 228 |
+
sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
|
| 229 |
+
emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
|
| 230 |
+
|
| 231 |
+
# Sentence complexity
|
| 232 |
+
sentence_lengths = [entry['word_count'] for entry in rich_transcript]
|
| 233 |
+
avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
|
| 234 |
+
|
| 235 |
+
# Pause analysis (gaps between sentences)
|
| 236 |
+
pauses = []
|
| 237 |
+
for i in range(1, len(rich_transcript)):
|
| 238 |
+
pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
|
| 239 |
+
pauses.append(pause)
|
| 240 |
+
|
| 241 |
+
avg_pause_duration = np.mean(pauses) if pauses else 0
|
| 242 |
+
|
| 243 |
+
return {
|
| 244 |
+
'total_sentences': total_sentences,
|
| 245 |
+
'total_words': total_words,
|
| 246 |
+
'total_duration_seconds': total_duration,
|
| 247 |
+
'unique_words': unique_words,
|
| 248 |
+
'type_token_ratio': round(ttr, 3),
|
| 249 |
+
'avg_sentence_length': round(avg_sentence_length, 1),
|
| 250 |
+
'avg_speech_rate_wpm': round(avg_speech_rate, 1),
|
| 251 |
+
'avg_pause_duration': round(avg_pause_duration, 1),
|
| 252 |
+
'sentiment_distribution': sentiment_counts,
|
| 253 |
+
'emotion_distribution': emotion_counts,
|
| 254 |
+
'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
|
| 255 |
+
'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
def generate_slp_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes=""):
|
| 259 |
+
"""Generate comprehensive SLP analysis prompt"""
|
| 260 |
+
|
| 261 |
+
# Format metrics for the prompt
|
| 262 |
+
metrics_text = f"""
|
| 263 |
+
TRANSCRIPT METRICS:
|
| 264 |
+
- Total sentences: {metrics['total_sentences']}
|
| 265 |
+
- Total words: {metrics['total_words']}
|
| 266 |
+
- Duration: {metrics['total_duration_seconds']:.1f} seconds
|
| 267 |
+
- Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
|
| 268 |
+
- Average sentence length: {metrics['avg_sentence_length']} words
|
| 269 |
+
- Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
|
| 270 |
+
- Speech rate variability: {metrics['speech_rate_variability']} wpm
|
| 271 |
+
- Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
|
| 272 |
+
|
| 273 |
+
SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
|
| 274 |
+
EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
|
| 275 |
+
|
| 276 |
+
MOST FREQUENT WORDS: {list(metrics['word_frequency'].keys())[:10]}
|
| 277 |
+
"""
|
| 278 |
+
|
| 279 |
+
# Format rich transcript for analysis
|
| 280 |
+
transcript_text = format_rich_transcript(rich_transcript)
|
| 281 |
+
|
| 282 |
+
notes_section = f"\nSLP CLINICAL NOTES:\n{slp_notes}" if slp_notes else ""
|
| 283 |
+
|
| 284 |
+
prompt = f"""
|
| 285 |
+
You are a speech-language pathologist conducting a comprehensive analysis of a speech transcript with rich metadata.
|
| 286 |
+
|
| 287 |
+
PATIENT: {age}-year-old {gender}
|
| 288 |
+
|
| 289 |
+
{metrics_text}
|
| 290 |
+
|
| 291 |
+
TRANSCRIPT WITH METADATA:
|
| 292 |
+
{transcript_text}{notes_section}
|
| 293 |
+
|
| 294 |
+
Please provide a comprehensive analysis including:
|
| 295 |
+
|
| 296 |
+
1. SPEECH FLUENCY ANALYSIS:
|
| 297 |
+
- Speech rate patterns and variability
|
| 298 |
+
- Pause patterns and their significance
|
| 299 |
+
- Overall fluency assessment
|
| 300 |
+
|
| 301 |
+
2. LANGUAGE COMPLEXITY:
|
| 302 |
+
- Vocabulary diversity and word frequency patterns
|
| 303 |
+
- Sentence structure and complexity
|
| 304 |
+
- Language development level assessment
|
| 305 |
+
|
| 306 |
+
3. EMOTIONAL AND AFFECTIVE ANALYSIS:
|
| 307 |
+
- Sentiment patterns throughout the transcript
|
| 308 |
+
- Emotional expression and regulation
|
| 309 |
+
- Impact on communication effectiveness
|
| 310 |
+
|
| 311 |
+
4. SPEECH FACTORS:
|
| 312 |
+
- Word retrieval patterns
|
| 313 |
+
- Grammatical accuracy
|
| 314 |
+
- Repetitions and revisions
|
| 315 |
+
|
| 316 |
+
5. CLINICAL IMPLICATIONS:
|
| 317 |
+
- Specific intervention targets
|
| 318 |
+
- Strengths and areas for improvement
|
| 319 |
+
- Recommendations for therapy
|
| 320 |
+
|
| 321 |
+
6. COMPREHENSIVE SUMMARY:
|
| 322 |
+
- Overall communication profile
|
| 323 |
+
- Developmental appropriateness
|
| 324 |
+
- Prognosis and treatment priorities
|
| 325 |
+
|
| 326 |
+
Use the quantitative metrics and qualitative observations to support your analysis.
|
| 327 |
+
"""
|
| 328 |
+
|
| 329 |
+
return prompt
|
| 330 |
+
|
| 331 |
+
def analyze_rich_transcript(rich_transcript, age, gender, slp_notes=""):
|
| 332 |
+
"""Analyze rich transcript with comprehensive metrics"""
|
| 333 |
+
if not rich_transcript:
|
| 334 |
+
return "No transcript data available for analysis."
|
| 335 |
+
|
| 336 |
+
# Calculate SLP metrics
|
| 337 |
+
metrics = calculate_slp_metrics(rich_transcript)
|
| 338 |
+
|
| 339 |
+
# Generate analysis prompt
|
| 340 |
+
prompt = generate_slp_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes)
|
| 341 |
+
|
| 342 |
+
# Get analysis from Claude API
|
| 343 |
+
if ANTHROPIC_API_KEY:
|
| 344 |
+
result = call_claude_api(prompt)
|
| 345 |
+
else:
|
| 346 |
+
result = generate_demo_analysis(rich_transcript, metrics)
|
| 347 |
+
|
| 348 |
+
return result
|
| 349 |
+
|
| 350 |
+
def generate_demo_analysis(rich_transcript, metrics):
|
| 351 |
+
"""Generate demo analysis when API is not available"""
|
| 352 |
+
return f"""## Comprehensive SLP Analysis
|
| 353 |
+
|
| 354 |
+
### SPEECH FLUENCY ANALYSIS
|
| 355 |
+
**Speech Rate**: {metrics['avg_speech_rate_wpm']} words per minute (variability: {metrics['speech_rate_variability']} wpm)
|
| 356 |
+
- Speech rate appears {'within normal limits' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'below typical range' if metrics['avg_speech_rate_wpm'] < 120 else 'above typical range'}
|
| 357 |
+
- Variability suggests {'consistent' if metrics['speech_rate_variability'] < 20 else 'variable'} speech patterns
|
| 358 |
+
|
| 359 |
+
**Pause Analysis**: Average pause duration of {metrics['avg_pause_duration']:.1f} seconds
|
| 360 |
+
- {'Appropriate' if 0.5 <= metrics['avg_pause_duration'] <= 2.0 else 'Short' if metrics['avg_pause_duration'] < 0.5 else 'Long'} pauses between utterances
|
| 361 |
+
|
| 362 |
+
### LANGUAGE COMPLEXITY
|
| 363 |
+
**Vocabulary Diversity**: Type-Token Ratio of {metrics['type_token_ratio']}
|
| 364 |
+
- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited' if metrics['type_token_ratio'] < 0.3 else 'Moderate'} vocabulary diversity
|
| 365 |
+
|
| 366 |
+
**Sentence Structure**: Average {metrics['avg_sentence_length']} words per sentence
|
| 367 |
+
- Sentence length appears {'age-appropriate' if 5 <= metrics['avg_sentence_length'] <= 12 else 'below age expectations' if metrics['avg_sentence_length'] < 5 else 'above age expectations'}
|
| 368 |
+
|
| 369 |
+
**Most Frequent Words**: {', '.join(list(metrics['word_frequency'].keys())[:5])}
|
| 370 |
+
|
| 371 |
+
### EMOTIONAL AND AFFECTIVE ANALYSIS
|
| 372 |
+
**Sentiment Distribution**: {metrics['sentiment_distribution']}
|
| 373 |
+
**Emotion Distribution**: {metrics['emotion_distribution']}
|
| 374 |
+
|
| 375 |
+
### CLINICAL IMPLICATIONS
|
| 376 |
+
Based on the quantitative analysis, this patient shows:
|
| 377 |
+
- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited'} vocabulary diversity
|
| 378 |
+
- {'Appropriate' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'Atypical'} speech rate
|
| 379 |
+
- {'Consistent' if metrics['speech_rate_variability'] < 20 else 'Variable'} speech patterns
|
| 380 |
+
|
| 381 |
+
### RECOMMENDATIONS
|
| 382 |
+
1. Focus on vocabulary expansion if TTR < 0.4
|
| 383 |
+
2. Address speech rate if outside normal range
|
| 384 |
+
3. Work on sentence complexity if below age expectations
|
| 385 |
+
4. Consider emotional regulation strategies based on sentiment patterns"""
|
| 386 |
+
|
| 387 |
+
def create_enhanced_interface():
|
| 388 |
+
"""Create the enhanced Gradio interface"""
|
| 389 |
+
with gr.Blocks(title="Enhanced CASL Analysis Tool", theme=gr.themes.Soft()) as app:
|
| 390 |
+
gr.Markdown("# π£οΈ Enhanced CASL Analysis Tool")
|
| 391 |
+
gr.Markdown("Advanced speech analysis with sentiment, timestamps, and comprehensive SLP metrics")
|
| 392 |
+
|
| 393 |
+
with gr.Tabs():
|
| 394 |
+
# Audio Upload & Transcription Tab
|
| 395 |
+
with gr.Tab("π€ Audio Analysis"):
|
| 396 |
+
with gr.Row():
|
| 397 |
+
with gr.Column(scale=1):
|
| 398 |
+
gr.Markdown("### Audio Upload")
|
| 399 |
+
|
| 400 |
+
audio_input = gr.Audio(
|
| 401 |
+
type="filepath",
|
| 402 |
+
label="Upload Audio Recording"
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
transcribe_btn = gr.Button(
|
| 406 |
+
"π€ Transcribe & Analyze",
|
| 407 |
+
variant="primary",
|
| 408 |
+
size="lg"
|
| 409 |
+
)
|
| 410 |
+
|
| 411 |
+
transcription_status = gr.Markdown("")
|
| 412 |
+
|
| 413 |
+
with gr.Column(scale=2):
|
| 414 |
+
gr.Markdown("### Rich Transcript")
|
| 415 |
+
|
| 416 |
+
rich_transcript_display = gr.Textbox(
|
| 417 |
+
label="Transcription with Timestamps & Sentiment",
|
| 418 |
+
lines=15,
|
| 419 |
+
max_lines=20
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
# Analysis Tab
|
| 423 |
+
with gr.Tab("π Analysis"):
|
| 424 |
+
with gr.Row():
|
| 425 |
+
with gr.Column(scale=1):
|
| 426 |
+
gr.Markdown("### Patient Information")
|
| 427 |
+
|
| 428 |
+
with gr.Row():
|
| 429 |
+
age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
|
| 430 |
+
gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
|
| 431 |
+
|
| 432 |
+
slp_notes = gr.Textbox(
|
| 433 |
+
label="SLP Clinical Notes (Optional)",
|
| 434 |
+
placeholder="Enter additional clinical observations...",
|
| 435 |
+
lines=3
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
analyze_btn = gr.Button(
|
| 439 |
+
"π Analyze Transcript",
|
| 440 |
+
variant="primary",
|
| 441 |
+
size="lg"
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
with gr.Column(scale=2):
|
| 445 |
+
gr.Markdown("### Comprehensive Analysis")
|
| 446 |
+
|
| 447 |
+
analysis_output = gr.Textbox(
|
| 448 |
+
label="SLP Analysis Report",
|
| 449 |
+
lines=25,
|
| 450 |
+
max_lines=30
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
# Metrics Tab
|
| 454 |
+
with gr.Tab("π Metrics Dashboard"):
|
| 455 |
+
with gr.Row():
|
| 456 |
+
with gr.Column():
|
| 457 |
+
gr.Markdown("### Quantitative Metrics")
|
| 458 |
+
|
| 459 |
+
metrics_display = gr.JSON(
|
| 460 |
+
label="SLP Metrics",
|
| 461 |
+
interactive=False
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
with gr.Column():
|
| 465 |
+
gr.Markdown("### Word Frequency")
|
| 466 |
+
|
| 467 |
+
word_freq_display = gr.Dataframe(
|
| 468 |
+
headers=["Word", "Frequency"],
|
| 469 |
+
label="Most Frequent Words",
|
| 470 |
+
interactive=False
|
| 471 |
+
)
|
| 472 |
+
|
| 473 |
+
# Event handlers
|
| 474 |
+
def on_transcribe(audio_file):
|
| 475 |
+
"""Handle audio transcription"""
|
| 476 |
+
if not audio_file:
|
| 477 |
+
return "", "Please upload an audio file first."
|
| 478 |
+
|
| 479 |
+
rich_transcript, status = transcribe_audio_with_metadata(audio_file)
|
| 480 |
+
|
| 481 |
+
if rich_transcript:
|
| 482 |
+
formatted = format_rich_transcript(rich_transcript)
|
| 483 |
+
return formatted, status
|
| 484 |
+
else:
|
| 485 |
+
return "", status
|
| 486 |
+
|
| 487 |
+
def on_analyze(rich_transcript_text, age_val, gender_val, notes):
|
| 488 |
+
"""Handle analysis"""
|
| 489 |
+
# Convert formatted text back to rich transcript structure
|
| 490 |
+
# This is a simplified version - in practice you'd want to store the rich data
|
| 491 |
+
if not rich_transcript_text or rich_transcript_text == "No transcript data available":
|
| 492 |
+
return "Please transcribe audio first."
|
| 493 |
+
|
| 494 |
+
# For demo purposes, create a simple rich transcript from the text
|
| 495 |
+
lines = rich_transcript_text.split('\n')
|
| 496 |
+
rich_transcript = []
|
| 497 |
+
|
| 498 |
+
for i, line in enumerate(lines):
|
| 499 |
+
if line.strip():
|
| 500 |
+
# Extract sentence from the line
|
| 501 |
+
sentence_match = re.search(r'\*PAR: (.+?)(?=\s*\[|$)', line)
|
| 502 |
+
if sentence_match:
|
| 503 |
+
sentence = sentence_match.group(1).strip()
|
| 504 |
+
rich_transcript.append({
|
| 505 |
+
'timestamp': i * 2,
|
| 506 |
+
'sentence': sentence,
|
| 507 |
+
'word_count': len(sentence.split()),
|
| 508 |
+
'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
|
| 509 |
+
'speech_rate_wpm': 120.0,
|
| 510 |
+
'sentiment': 'neutral',
|
| 511 |
+
'sentiment_score': 0.5,
|
| 512 |
+
'emotion': 'neutral',
|
| 513 |
+
'emotion_score': 0.5
|
| 514 |
+
})
|
| 515 |
+
|
| 516 |
+
return analyze_rich_transcript(rich_transcript, age_val, gender_val, notes)
|
| 517 |
+
|
| 518 |
+
def update_metrics(rich_transcript_text):
|
| 519 |
+
"""Update metrics display"""
|
| 520 |
+
if not rich_transcript_text or rich_transcript_text == "No transcript data available":
|
| 521 |
+
return {}, []
|
| 522 |
+
|
| 523 |
+
# Convert text back to rich transcript (simplified)
|
| 524 |
+
lines = rich_transcript_text.split('\n')
|
| 525 |
+
rich_transcript = []
|
| 526 |
+
|
| 527 |
+
for i, line in enumerate(lines):
|
| 528 |
+
if line.strip():
|
| 529 |
+
sentence_match = re.search(r'\*PAR: (.+?)(?=\s*\[|$)', line)
|
| 530 |
+
if sentence_match:
|
| 531 |
+
sentence = sentence_match.group(1).strip()
|
| 532 |
+
rich_transcript.append({
|
| 533 |
+
'timestamp': i * 2,
|
| 534 |
+
'sentence': sentence,
|
| 535 |
+
'word_count': len(sentence.split()),
|
| 536 |
+
'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
|
| 537 |
+
'speech_rate_wpm': 120.0,
|
| 538 |
+
'sentiment': 'neutral',
|
| 539 |
+
'sentiment_score': 0.5,
|
| 540 |
+
'emotion': 'neutral',
|
| 541 |
+
'emotion_score': 0.5
|
| 542 |
+
})
|
| 543 |
+
|
| 544 |
+
metrics = calculate_slp_metrics(rich_transcript)
|
| 545 |
+
|
| 546 |
+
# Create word frequency dataframe
|
| 547 |
+
word_freq_data = [[word, freq] for word, freq in list(metrics['word_frequency'].items())[:20]]
|
| 548 |
+
|
| 549 |
+
return metrics, word_freq_data
|
| 550 |
+
|
| 551 |
+
# Connect event handlers
|
| 552 |
+
transcribe_btn.click(
|
| 553 |
+
on_transcribe,
|
| 554 |
+
inputs=[audio_input],
|
| 555 |
+
outputs=[rich_transcript_display, transcription_status]
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
analyze_btn.click(
|
| 559 |
+
on_analyze,
|
| 560 |
+
inputs=[rich_transcript_display, age, gender, slp_notes],
|
| 561 |
+
outputs=[analysis_output]
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
# Update metrics when transcript changes
|
| 565 |
+
rich_transcript_display.change(
|
| 566 |
+
update_metrics,
|
| 567 |
+
inputs=[rich_transcript_display],
|
| 568 |
+
outputs=[metrics_display, word_freq_display]
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
return app
|
| 572 |
+
|
| 573 |
+
if __name__ == "__main__":
|
| 574 |
+
print("π Starting Enhanced CASL Analysis Tool...")
|
| 575 |
+
|
| 576 |
+
if not ANTHROPIC_API_KEY:
|
| 577 |
+
print("β οΈ ANTHROPIC_API_KEY not configured - analysis will show demo response")
|
| 578 |
+
print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
|
| 579 |
+
print(" For local use: export ANTHROPIC_API_KEY='your-key-here'")
|
| 580 |
+
else:
|
| 581 |
+
print("β
Claude API configured")
|
| 582 |
+
|
| 583 |
+
if not SPEECHBRAIN_AVAILABLE:
|
| 584 |
+
print("β οΈ SpeechBrain not available - audio transcription will use demo mode")
|
| 585 |
+
print(" Install with: pip install speechbrain transformers torch")
|
| 586 |
+
else:
|
| 587 |
+
print("β
SpeechBrain and HuggingFace models loaded")
|
| 588 |
+
|
| 589 |
+
app = create_enhanced_interface()
|
| 590 |
+
app.launch(show_api=False)
|
moderate_casl_app.py
CHANGED
|
@@ -155,14 +155,19 @@ def call_bedrock(prompt, max_tokens=4096):
|
|
| 155 |
"messages": [
|
| 156 |
{
|
| 157 |
"role": "user",
|
| 158 |
-
"content":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
}
|
| 160 |
],
|
| 161 |
"temperature": 0.3,
|
| 162 |
"top_p": 0.9
|
| 163 |
})
|
| 164 |
|
| 165 |
-
modelId = 'anthropic.claude-3-sonnet-
|
| 166 |
response = bedrock_client.invoke_model(
|
| 167 |
body=body,
|
| 168 |
modelId=modelId,
|
|
|
|
| 155 |
"messages": [
|
| 156 |
{
|
| 157 |
"role": "user",
|
| 158 |
+
"content": [
|
| 159 |
+
{
|
| 160 |
+
"type": "text",
|
| 161 |
+
"text": prompt
|
| 162 |
+
}
|
| 163 |
+
]
|
| 164 |
}
|
| 165 |
],
|
| 166 |
"temperature": 0.3,
|
| 167 |
"top_p": 0.9
|
| 168 |
})
|
| 169 |
|
| 170 |
+
modelId = 'anthropic.claude-3-5-sonnet-20240620-v1:0'
|
| 171 |
response = bedrock_client.invoke_model(
|
| 172 |
body=body,
|
| 173 |
modelId=modelId,
|
moderate_casl_app_fixed.py
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import logging
|
| 5 |
+
import requests
|
| 6 |
+
import re
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
|
| 9 |
+
# Configure logging
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
# Anthropic API key - can be set as HuggingFace secret or environment variable
|
| 14 |
+
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
| 15 |
+
|
| 16 |
+
# Check if API key is available
|
| 17 |
+
if ANTHROPIC_API_KEY:
|
| 18 |
+
logger.info("Claude API key found")
|
| 19 |
+
else:
|
| 20 |
+
logger.warning("Claude API key not found - using demo mode")
|
| 21 |
+
|
| 22 |
+
def call_claude_api(prompt):
|
| 23 |
+
"""Call Claude API directly"""
|
| 24 |
+
if not ANTHROPIC_API_KEY:
|
| 25 |
+
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
headers = {
|
| 29 |
+
"Content-Type": "application/json",
|
| 30 |
+
"x-api-key": ANTHROPIC_API_KEY,
|
| 31 |
+
"anthropic-version": "2023-06-01"
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
data = {
|
| 35 |
+
"model": "claude-3-5-sonnet-20241022",
|
| 36 |
+
"max_tokens": 4096,
|
| 37 |
+
"messages": [
|
| 38 |
+
{
|
| 39 |
+
"role": "user",
|
| 40 |
+
"content": prompt
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
response = requests.post(
|
| 46 |
+
"https://api.anthropic.com/v1/messages",
|
| 47 |
+
headers=headers,
|
| 48 |
+
json=data,
|
| 49 |
+
timeout=60
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if response.status_code == 200:
|
| 53 |
+
response_json = response.json()
|
| 54 |
+
return response_json['content'][0]['text']
|
| 55 |
+
else:
|
| 56 |
+
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 57 |
+
return f"β Claude API Error: {response.status_code}"
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
logger.error(f"Error calling Claude API: {str(e)}")
|
| 61 |
+
return f"β Error: {str(e)}"
|
| 62 |
+
|
| 63 |
+
def process_file(file):
|
| 64 |
+
"""Process uploaded file"""
|
| 65 |
+
if file is None:
|
| 66 |
+
return "Please upload a file first."
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
# Read file content
|
| 70 |
+
with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
|
| 71 |
+
content = f.read()
|
| 72 |
+
|
| 73 |
+
if not content.strip():
|
| 74 |
+
return "File appears to be empty."
|
| 75 |
+
|
| 76 |
+
return content
|
| 77 |
+
except Exception as e:
|
| 78 |
+
return f"Error reading file: {str(e)}"
|
| 79 |
+
|
| 80 |
+
def read_cha_file(file_path):
|
| 81 |
+
"""Read and parse a .cha transcript file"""
|
| 82 |
+
try:
|
| 83 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 84 |
+
content = f.read()
|
| 85 |
+
|
| 86 |
+
# Extract participant lines (starting with *PAR:)
|
| 87 |
+
par_lines = []
|
| 88 |
+
for line in content.splitlines():
|
| 89 |
+
if line.startswith('*PAR:'):
|
| 90 |
+
par_lines.append(line)
|
| 91 |
+
|
| 92 |
+
# If no PAR lines found, just return the whole content
|
| 93 |
+
if not par_lines:
|
| 94 |
+
return content
|
| 95 |
+
|
| 96 |
+
return '\n'.join(par_lines)
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
logger.error(f"Error reading CHA file: {str(e)}")
|
| 100 |
+
return ""
|
| 101 |
+
|
| 102 |
+
def process_upload(file):
|
| 103 |
+
"""Process an uploaded file (text or CHA)"""
|
| 104 |
+
if file is None:
|
| 105 |
+
return ""
|
| 106 |
+
|
| 107 |
+
file_path = file.name
|
| 108 |
+
if file_path.endswith('.cha'):
|
| 109 |
+
return read_cha_file(file_path)
|
| 110 |
+
else:
|
| 111 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 112 |
+
return f.read()
|
| 113 |
+
|
| 114 |
+
def generate_demo_response(prompt):
|
| 115 |
+
"""Generate a demo response when API is not available"""
|
| 116 |
+
return """## Speech Factors Analysis
|
| 117 |
+
|
| 118 |
+
**Difficulty producing fluent speech**: 8 instances, moderate severity
|
| 119 |
+
- Examples: "today I would &-um like to talk about &-um a fun trip"
|
| 120 |
+
- "we went to the &-um &-um beach [//] no to the mountains [//] I mean the beach actually"
|
| 121 |
+
|
| 122 |
+
**Word retrieval issues**: 6 instances, mild-moderate severity
|
| 123 |
+
- Examples: "what do you call those &-um &-um sprinkles! that's the word"
|
| 124 |
+
- "sometimes I forget [//] forgetted [: forgot] [*] what they call those things we built"
|
| 125 |
+
|
| 126 |
+
**Grammatical errors**: 4 instances, moderate severity
|
| 127 |
+
- Examples: "after swimming we [//] I eat [: ate] [*] &-um ice cream"
|
| 128 |
+
- "we saw [/] saw fishies [: fish] [*] swimming in the water"
|
| 129 |
+
|
| 130 |
+
**Repetitions and revisions**: 5 instances, mild severity
|
| 131 |
+
- Examples: "we [/] we stayed for &-um three no [//] four days"
|
| 132 |
+
- "I want to go back to the beach [/] beach next year"
|
| 133 |
+
|
| 134 |
+
## Language Skills Assessment
|
| 135 |
+
|
| 136 |
+
**Lexical/Semantic Skills**:
|
| 137 |
+
- Vocabulary diversity appears age-appropriate with some word-finding difficulties
|
| 138 |
+
- Examples: "what do you call those &-um &-um sprinkles! that's the word"
|
| 139 |
+
- Shows good semantic understanding but retrieval challenges
|
| 140 |
+
|
| 141 |
+
**Syntactic Skills**:
|
| 142 |
+
- Basic sentence structure is intact with some grammatical inconsistencies
|
| 143 |
+
- Examples: "my brother he [//] he helped me dig a big hole"
|
| 144 |
+
- Verb tense errors noted: "forgetted" for "forgot", "eat" for "ate"
|
| 145 |
+
|
| 146 |
+
**Supralinguistic Skills**:
|
| 147 |
+
- Narrative organization is good with logical sequence
|
| 148 |
+
- Examples: "sometimes I wonder [/] wonder where fishies [: fish] [*] go when it's cold"
|
| 149 |
+
- Shows creative thinking and topic maintenance
|
| 150 |
+
|
| 151 |
+
## Treatment Recommendations
|
| 152 |
+
|
| 153 |
+
1. **Word-finding strategies**: Implement semantic cuing techniques using the patient's experiences (beach, ice cream) as context
|
| 154 |
+
2. **Grammar practice**: Focus on verb tense consistency with structured exercises
|
| 155 |
+
3. **Fluency techniques**: Work on reducing fillers and improving speech flow
|
| 156 |
+
4. **Self-monitoring**: Help patient identify and correct grammatical errors
|
| 157 |
+
5. **Vocabulary expansion**: Build on existing semantic networks
|
| 158 |
+
|
| 159 |
+
## Clinical Summary
|
| 160 |
+
|
| 161 |
+
This child demonstrates a mild-to-moderate expressive language disorder with primary concerns in word retrieval and grammatical accuracy. Strengths include good narrative organization and topic maintenance. The pattern suggests intervention should focus on word-finding strategies and grammatical form practice while building on existing semantic knowledge."""
|
| 162 |
+
|
| 163 |
+
def analyze_transcript(transcript, age, gender, slp_notes=""):
|
| 164 |
+
"""Analyze a speech transcript using Claude"""
|
| 165 |
+
if not transcript or len(transcript.strip()) < 50:
|
| 166 |
+
return "Error: Please provide a longer transcript for analysis."
|
| 167 |
+
|
| 168 |
+
# Add SLP notes to the prompt if provided
|
| 169 |
+
notes_section = ""
|
| 170 |
+
if slp_notes and slp_notes.strip():
|
| 171 |
+
notes_section = f"""
|
| 172 |
+
|
| 173 |
+
SLP CLINICAL NOTES:
|
| 174 |
+
{slp_notes.strip()}
|
| 175 |
+
"""
|
| 176 |
+
|
| 177 |
+
# Simplified analysis prompt
|
| 178 |
+
prompt = f"""
|
| 179 |
+
You are a speech-language pathologist analyzing a transcript for CASL assessment.
|
| 180 |
+
|
| 181 |
+
Patient: {age}-year-old {gender}
|
| 182 |
+
|
| 183 |
+
TRANSCRIPT:
|
| 184 |
+
{transcript}{notes_section}
|
| 185 |
+
|
| 186 |
+
Please provide a comprehensive CASL analysis including:
|
| 187 |
+
|
| 188 |
+
1. SPEECH FACTORS (with counts and severity):
|
| 189 |
+
- Difficulty producing fluent speech
|
| 190 |
+
- Word retrieval issues
|
| 191 |
+
- Grammatical errors
|
| 192 |
+
- Repetitions and revisions
|
| 193 |
+
|
| 194 |
+
2. LANGUAGE SKILLS ASSESSMENT:
|
| 195 |
+
- Lexical/Semantic Skills (qualitative assessment)
|
| 196 |
+
- Syntactic Skills (qualitative assessment)
|
| 197 |
+
- Supralinguistic Skills (qualitative assessment)
|
| 198 |
+
|
| 199 |
+
3. TREATMENT RECOMMENDATIONS:
|
| 200 |
+
- List 3-5 specific intervention strategies
|
| 201 |
+
|
| 202 |
+
4. CLINICAL SUMMARY:
|
| 203 |
+
- Brief explanation of findings and prognosis
|
| 204 |
+
|
| 205 |
+
Use exact quotes from the transcript as evidence.
|
| 206 |
+
Focus on qualitative observations rather than standardized scores.
|
| 207 |
+
Be specific and provide concrete examples from the transcript.
|
| 208 |
+
{f"Consider the SLP clinical notes in your analysis." if slp_notes and slp_notes.strip() else ""}
|
| 209 |
+
"""
|
| 210 |
+
|
| 211 |
+
# Get analysis from Claude API or demo
|
| 212 |
+
if ANTHROPIC_API_KEY:
|
| 213 |
+
result = call_claude_api(prompt)
|
| 214 |
+
else:
|
| 215 |
+
result = generate_demo_response(prompt)
|
| 216 |
+
|
| 217 |
+
return result
|
| 218 |
+
|
| 219 |
+
def create_interface():
|
| 220 |
+
"""Create the Gradio interface"""
|
| 221 |
+
with gr.Blocks(title="Enhanced CASL Analysis Tool", theme=gr.themes.Soft()) as app:
|
| 222 |
+
gr.Markdown("# π£οΈ Enhanced CASL Analysis Tool")
|
| 223 |
+
gr.Markdown("Upload a speech transcript and get comprehensive CASL assessment results.")
|
| 224 |
+
|
| 225 |
+
with gr.Tabs():
|
| 226 |
+
# Analysis Tab
|
| 227 |
+
with gr.Tab("π Analysis"):
|
| 228 |
+
with gr.Row():
|
| 229 |
+
with gr.Column():
|
| 230 |
+
gr.Markdown("### Patient Information")
|
| 231 |
+
|
| 232 |
+
with gr.Row():
|
| 233 |
+
age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
|
| 234 |
+
gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
|
| 235 |
+
|
| 236 |
+
slp_notes = gr.Textbox(
|
| 237 |
+
label="SLP Clinical Notes (Optional)",
|
| 238 |
+
placeholder="Enter any additional clinical observations, context, or notes...",
|
| 239 |
+
lines=3
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
gr.Markdown("### Transcript Input")
|
| 243 |
+
|
| 244 |
+
file_upload = gr.File(
|
| 245 |
+
label="Upload Transcript File",
|
| 246 |
+
file_types=[".txt", ".cha"]
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
transcript = gr.Textbox(
|
| 250 |
+
label="Or Paste Transcript Here",
|
| 251 |
+
placeholder="Enter transcript text or upload a file...",
|
| 252 |
+
lines=10
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
analyze_btn = gr.Button("π Analyze Transcript", variant="primary")
|
| 256 |
+
|
| 257 |
+
with gr.Column():
|
| 258 |
+
gr.Markdown("### Analysis Results")
|
| 259 |
+
|
| 260 |
+
analysis_output = gr.Textbox(
|
| 261 |
+
label="CASL Analysis Report",
|
| 262 |
+
placeholder="Analysis results will appear here...",
|
| 263 |
+
lines=25,
|
| 264 |
+
max_lines=30
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
# Sample Transcripts Tab
|
| 268 |
+
with gr.Tab("π Sample Transcripts"):
|
| 269 |
+
with gr.Row():
|
| 270 |
+
with gr.Column():
|
| 271 |
+
gr.Markdown("### Sample Transcripts")
|
| 272 |
+
|
| 273 |
+
sample_choice = gr.Dropdown(
|
| 274 |
+
choices=[
|
| 275 |
+
"Beach Trip (Child)",
|
| 276 |
+
"School Day (Adolescent)",
|
| 277 |
+
"Adult Recovery"
|
| 278 |
+
],
|
| 279 |
+
label="Select a sample transcript:",
|
| 280 |
+
value="Beach Trip (Child)"
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
load_sample_btn = gr.Button("Load Sample", variant="secondary")
|
| 284 |
+
|
| 285 |
+
sample_transcript = gr.Textbox(
|
| 286 |
+
label="Sample Transcript",
|
| 287 |
+
lines=15,
|
| 288 |
+
interactive=False
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
use_sample_btn = gr.Button("Use This Sample for Analysis", variant="primary")
|
| 292 |
+
|
| 293 |
+
with gr.Column():
|
| 294 |
+
gr.Markdown("### Sample Descriptions")
|
| 295 |
+
|
| 296 |
+
gr.Markdown("""
|
| 297 |
+
**Beach Trip (Child)**: 8-year-old child describing a family beach vacation
|
| 298 |
+
- Shows typical child language patterns
|
| 299 |
+
- Contains word-finding difficulties and grammatical errors
|
| 300 |
+
- Good narrative structure despite language challenges
|
| 301 |
+
|
| 302 |
+
**School Day (Adolescent)**: Teenager describing a school day
|
| 303 |
+
- More complex language but still some disfluencies
|
| 304 |
+
- Shows adolescent speech patterns
|
| 305 |
+
- Academic vocabulary and social language
|
| 306 |
+
|
| 307 |
+
**Adult Recovery**: Adult describing stroke recovery
|
| 308 |
+
- Post-stroke language patterns
|
| 309 |
+
- Word-finding difficulties
|
| 310 |
+
- Shows recovery progress
|
| 311 |
+
""")
|
| 312 |
+
|
| 313 |
+
# Sample transcripts
|
| 314 |
+
SAMPLE_TRANSCRIPTS = {
|
| 315 |
+
"Beach Trip (Child)": """*PAR: today I would &-um like to talk about &-um a fun trip I took last &-um summer with my family.
|
| 316 |
+
*PAR: we went to the &-um &-um beach [//] no to the mountains [//] I mean the beach actually.
|
| 317 |
+
*PAR: there was lots of &-um &-um swimming and &-um sun.
|
| 318 |
+
*PAR: we [/] we stayed for &-um three no [//] four days in a &-um hotel near the water [: ocean] [*].
|
| 319 |
+
*PAR: my favorite part was &-um building &-um castles with sand.
|
| 320 |
+
*PAR: sometimes I forget [//] forgetted [: forgot] [*] what they call those things we built.
|
| 321 |
+
*PAR: my brother he [//] he helped me dig a big hole.
|
| 322 |
+
*PAR: we saw [/] saw fishies [: fish] [*] swimming in the water.
|
| 323 |
+
*PAR: sometimes I wonder [/] wonder where fishies [: fish] [*] go when it's cold.
|
| 324 |
+
*PAR: maybe they have [/] have houses under the water.
|
| 325 |
+
*PAR: after swimming we [//] I eat [: ate] [*] &-um ice cream with &-um chocolate things on top.
|
| 326 |
+
*PAR: what do you call those &-um &-um sprinkles! that's the word.
|
| 327 |
+
*PAR: my mom said to &-um that I could have &-um two scoops next time.
|
| 328 |
+
*PAR: I want to go back to the beach [/] beach next year.""",
|
| 329 |
+
|
| 330 |
+
"School Day (Adolescent)": """*PAR: yesterday was &-um kind of a weird day at school.
|
| 331 |
+
*PAR: I had this big test in math and I was like really nervous about it.
|
| 332 |
+
*PAR: when I got there [//] when I got to class the teacher said we could use calculators.
|
| 333 |
+
*PAR: I was like &-oh &-um that's good because I always mess up the &-um the calculations.
|
| 334 |
+
*PAR: there was this one problem about &-um what do you call it &-um geometry I think.
|
| 335 |
+
*PAR: I couldn't remember the formula for [//] I mean I knew it but I just couldn't think of it.
|
| 336 |
+
*PAR: so I raised my hand and asked the teacher and she was really nice about it.
|
| 337 |
+
*PAR: after the test me and my friends went to lunch and we talked about how we did.
|
| 338 |
+
*PAR: everyone was saying it was hard but I think I did okay.
|
| 339 |
+
*PAR: oh and then in English class we had to read our essays out loud.
|
| 340 |
+
*PAR: I hate doing that because I get really nervous and I start talking fast.
|
| 341 |
+
*PAR: but the teacher said mine was good which made me feel better.""",
|
| 342 |
+
|
| 343 |
+
"Adult Recovery": """*PAR: I &-um I want to talk about &-uh my &-um recovery.
|
| 344 |
+
*PAR: it's been &-um [//] it's hard to &-um to find the words sometimes.
|
| 345 |
+
*PAR: before the &-um the stroke I was &-um working at the &-uh at the bank.
|
| 346 |
+
*PAR: now I have to &-um practice speaking every day with my therapist.
|
| 347 |
+
*PAR: my wife she [//] she helps me a lot at home.
|
| 348 |
+
*PAR: we do &-um exercises together like &-uh reading and &-um talking about pictures.
|
| 349 |
+
*PAR: sometimes I get frustrated because I know what I want to say but &-um the words don't come out right.
|
| 350 |
+
*PAR: but I'm getting better little by little.
|
| 351 |
+
*PAR: the doctor says I'm making good progress.
|
| 352 |
+
*PAR: I hope to go back to work someday but right now I'm focusing on &-um getting better."""
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
# Event handlers
|
| 356 |
+
def load_sample_transcript(sample_name):
|
| 357 |
+
"""Load a sample transcript"""
|
| 358 |
+
return SAMPLE_TRANSCRIPTS.get(sample_name, "")
|
| 359 |
+
|
| 360 |
+
def use_sample_for_analysis(sample_text, age_val, gender_val, notes):
|
| 361 |
+
"""Use sample transcript for analysis"""
|
| 362 |
+
if not sample_text:
|
| 363 |
+
return "Please load a sample transcript first."
|
| 364 |
+
return analyze_transcript(sample_text, age_val, gender_val, notes)
|
| 365 |
+
|
| 366 |
+
def on_analyze(transcript_text, age_val, gender_val, notes):
|
| 367 |
+
"""Handle analysis"""
|
| 368 |
+
if not transcript_text or len(transcript_text.strip()) < 50:
|
| 369 |
+
return "Error: Please provide a longer transcript for analysis."
|
| 370 |
+
return analyze_transcript(transcript_text, age_val, gender_val, notes)
|
| 371 |
+
|
| 372 |
+
# Connect event handlers
|
| 373 |
+
load_sample_btn.click(
|
| 374 |
+
load_sample_transcript,
|
| 375 |
+
inputs=[sample_choice],
|
| 376 |
+
outputs=[sample_transcript]
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
use_sample_btn.click(
|
| 380 |
+
use_sample_for_analysis,
|
| 381 |
+
inputs=[sample_transcript, age, gender, slp_notes],
|
| 382 |
+
outputs=[analysis_output]
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
analyze_btn.click(
|
| 386 |
+
on_analyze,
|
| 387 |
+
inputs=[transcript, age, gender, slp_notes],
|
| 388 |
+
outputs=[analysis_output]
|
| 389 |
+
)
|
| 390 |
+
|
| 391 |
+
# File upload handler
|
| 392 |
+
file_upload.upload(process_upload, file_upload, transcript)
|
| 393 |
+
|
| 394 |
+
return app
|
| 395 |
+
|
| 396 |
+
if __name__ == "__main__":
|
| 397 |
+
print("π Starting Enhanced CASL Analysis Tool...")
|
| 398 |
+
if not ANTHROPIC_API_KEY:
|
| 399 |
+
print("β οΈ ANTHROPIC_API_KEY not configured - analysis will show demo response")
|
| 400 |
+
print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
|
| 401 |
+
print(" For local use: export ANTHROPIC_API_KEY='your-key-here'")
|
| 402 |
+
else:
|
| 403 |
+
print("β
Claude API configured")
|
| 404 |
+
|
| 405 |
+
app = create_interface()
|
| 406 |
+
app.launch(show_api=False)
|
requirements.txt
CHANGED
|
@@ -5,5 +5,18 @@ matplotlib>=3.3.0
|
|
| 5 |
requests>=2.25.0
|
| 6 |
reportlab>=3.6.0
|
| 7 |
PyPDF2>=2.0.0
|
| 8 |
-
speechrecognition>=3.8.
|
| 9 |
-
pydub>=0.25.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
requests>=2.25.0
|
| 6 |
reportlab>=3.6.0
|
| 7 |
PyPDF2>=2.0.0
|
| 8 |
+
speechrecognition>=3.8.0
|
| 9 |
+
pydub>=0.25.0
|
| 10 |
+
|
| 11 |
+
# Transcription and audio processing
|
| 12 |
+
speechbrain>=0.5.15
|
| 13 |
+
torch>=1.9.0
|
| 14 |
+
transformers>=4.20.0
|
| 15 |
+
moviepy>=1.0.3
|
| 16 |
+
|
| 17 |
+
# Optional: Speaker diarization (requires HF token)
|
| 18 |
+
# pyannote.audio>=2.1.0
|
| 19 |
+
|
| 20 |
+
# Optional: Additional audio processing
|
| 21 |
+
librosa>=0.9.0
|
| 22 |
+
soundfile>=0.10.0
|
simple_casl_app.py
CHANGED
|
@@ -3,6 +3,9 @@ import json
|
|
| 3 |
import os
|
| 4 |
import logging
|
| 5 |
import requests
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Configure logging
|
| 8 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -11,52 +14,483 @@ logger = logging.getLogger(__name__)
|
|
| 11 |
# Anthropic API key - can be set as HuggingFace secret or environment variable
|
| 12 |
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Check if API key is available
|
| 15 |
if ANTHROPIC_API_KEY:
|
| 16 |
logger.info("Claude API key found")
|
| 17 |
else:
|
| 18 |
logger.warning("Claude API key not found - using demo mode")
|
| 19 |
|
| 20 |
-
def
|
| 21 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
if not ANTHROPIC_API_KEY:
|
| 23 |
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 24 |
|
| 25 |
try:
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
"x-api-key": ANTHROPIC_API_KEY,
|
| 29 |
-
"anthropic-version": "2023-06-01"
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
data = {
|
| 33 |
-
"model": "claude-3-5-sonnet-20241022",
|
| 34 |
-
"max_tokens": 4096,
|
| 35 |
-
"messages": [
|
| 36 |
-
{
|
| 37 |
-
"role": "user",
|
| 38 |
-
"content": prompt
|
| 39 |
-
}
|
| 40 |
-
]
|
| 41 |
-
}
|
| 42 |
-
|
| 43 |
-
response = requests.post(
|
| 44 |
-
"https://api.anthropic.com/v1/messages",
|
| 45 |
-
headers=headers,
|
| 46 |
-
json=data,
|
| 47 |
-
timeout=60
|
| 48 |
-
)
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
except Exception as e:
|
| 58 |
logger.error(f"Error calling Claude API: {str(e)}")
|
| 59 |
return f"β Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
def process_file(file):
|
| 62 |
"""Process uploaded file"""
|
|
@@ -75,15 +509,10 @@ def process_file(file):
|
|
| 75 |
except Exception as e:
|
| 76 |
return f"Error reading file: {str(e)}"
|
| 77 |
|
| 78 |
-
def
|
| 79 |
-
"""
|
| 80 |
-
if
|
| 81 |
-
return "Please
|
| 82 |
-
|
| 83 |
-
# Get transcript content
|
| 84 |
-
transcript = process_file(file)
|
| 85 |
-
if transcript.startswith("Error") or transcript.startswith("Please"):
|
| 86 |
-
return transcript
|
| 87 |
|
| 88 |
# Add SLP notes to the prompt if provided
|
| 89 |
notes_section = ""
|
|
@@ -94,45 +523,249 @@ def analyze_transcript(file, age, gender, slp_notes):
|
|
| 94 |
{slp_notes.strip()}
|
| 95 |
"""
|
| 96 |
|
| 97 |
-
#
|
| 98 |
prompt = f"""
|
| 99 |
-
You are a speech-language pathologist
|
| 100 |
-
|
| 101 |
Patient: {age}-year-old {gender}
|
| 102 |
|
| 103 |
TRANSCRIPT:
|
| 104 |
-
{
|
| 105 |
-
|
| 106 |
-
Please provide a CASL analysis including:
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
-
|
| 116 |
-
-
|
| 117 |
-
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
"""
|
| 129 |
|
| 130 |
# Get analysis from Claude API
|
| 131 |
-
result =
|
| 132 |
return result
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
|
| 135 |
-
"""Perform targeted analysis based on custom questions"""
|
| 136 |
if not transcript or not transcript.strip():
|
| 137 |
return "Please provide a transcript first."
|
| 138 |
|
|
@@ -148,9 +781,9 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
|
|
| 148 |
{slp_notes.strip()}
|
| 149 |
"""
|
| 150 |
|
| 151 |
-
#
|
| 152 |
prompt = f"""
|
| 153 |
-
You are a speech-language pathologist conducting a targeted analysis of a speech transcript.
|
| 154 |
|
| 155 |
Patient: {age}-year-old {gender}
|
| 156 |
|
|
@@ -160,27 +793,94 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
|
|
| 160 |
SPECIFIC QUESTION FOR ANALYSIS:
|
| 161 |
{custom_question.strip()}
|
| 162 |
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
4. Offer practical insights for clinical practice
|
| 170 |
-
5. Be concise but comprehensive
|
| 171 |
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
"""
|
| 174 |
|
| 175 |
# Get targeted analysis from Claude API
|
| 176 |
-
result =
|
| 177 |
return result
|
| 178 |
|
| 179 |
# Create enhanced interface with tabs
|
| 180 |
with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
| 181 |
|
| 182 |
gr.Markdown("# π£οΈ Enhanced CASL Analysis Tool")
|
| 183 |
-
gr.Markdown("Upload a speech transcript and get instant CASL assessment results with targeted analysis options.")
|
| 184 |
|
| 185 |
# Store transcript globally
|
| 186 |
transcript_state = gr.State("")
|
|
@@ -190,12 +890,46 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 190 |
with gr.Tab("οΏ½οΏ½οΏ½οΏ½ Basic Analysis"):
|
| 191 |
with gr.Row():
|
| 192 |
with gr.Column():
|
| 193 |
-
gr.Markdown("###
|
| 194 |
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
age = gr.Number(
|
| 201 |
label="Patient Age",
|
|
@@ -215,11 +949,6 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 215 |
placeholder="Enter any additional clinical observations, context, or notes...",
|
| 216 |
lines=3
|
| 217 |
)
|
| 218 |
-
|
| 219 |
-
analyze_btn = gr.Button(
|
| 220 |
-
"π Analyze Transcript",
|
| 221 |
-
variant="primary"
|
| 222 |
-
)
|
| 223 |
|
| 224 |
with gr.Column():
|
| 225 |
gr.Markdown("### Analysis Results")
|
|
@@ -230,6 +959,8 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 230 |
lines=25,
|
| 231 |
max_lines=30
|
| 232 |
)
|
|
|
|
|
|
|
| 233 |
|
| 234 |
# Tab 2: Targeted Analysis
|
| 235 |
with gr.Tab("π― Targeted Analysis"):
|
|
@@ -257,7 +988,9 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 257 |
"What narrative organization skills are evident?",
|
| 258 |
"What specific intervention targets would you recommend?",
|
| 259 |
"How does this patient's language compare to typical development?",
|
| 260 |
-
"What evidence suggests cognitive-linguistic strengths/weaknesses?"
|
|
|
|
|
|
|
| 261 |
],
|
| 262 |
label="Question Templates (Optional)",
|
| 263 |
value="Select a template or write your own..."
|
|
@@ -283,6 +1016,8 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 283 |
lines=25,
|
| 284 |
max_lines=30
|
| 285 |
)
|
|
|
|
|
|
|
| 286 |
|
| 287 |
# Tab 3: Quick Questions
|
| 288 |
with gr.Tab("β‘ Quick Questions"):
|
|
@@ -307,7 +1042,19 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 307 |
"Narrative structure",
|
| 308 |
"Vocabulary level",
|
| 309 |
"Sentence complexity",
|
| 310 |
-
"Speech rate patterns"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
],
|
| 312 |
label="Select questions to analyze:",
|
| 313 |
value=[]
|
|
@@ -327,17 +1074,108 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 327 |
lines=25,
|
| 328 |
max_lines=30
|
| 329 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
# Event handlers
|
| 332 |
-
def
|
| 333 |
-
"""Handle
|
| 334 |
result = analyze_transcript(file, age_val, gender_val, notes)
|
| 335 |
transcript = process_file(file) if file else ""
|
| 336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
|
| 339 |
"""Handle targeted analysis"""
|
| 340 |
-
|
|
|
|
|
|
|
| 341 |
|
| 342 |
def on_question_template_change(template):
|
| 343 |
"""Handle question template selection"""
|
|
@@ -348,10 +1186,10 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 348 |
def on_quick_analyze(transcript, questions, age_val, gender_val, notes):
|
| 349 |
"""Handle quick analysis with multiple questions"""
|
| 350 |
if not transcript or not transcript.strip():
|
| 351 |
-
return "Please provide a transcript first."
|
| 352 |
|
| 353 |
if not questions:
|
| 354 |
-
return "Please select at least one question to analyze."
|
| 355 |
|
| 356 |
# Add SLP notes to the prompt if provided
|
| 357 |
notes_section = ""
|
|
@@ -362,40 +1200,150 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 362 |
{notes.strip()}
|
| 363 |
"""
|
| 364 |
|
| 365 |
-
# Create quick analysis prompt
|
| 366 |
questions_text = "\n".join([f"- {q}" for q in questions])
|
| 367 |
prompt = f"""
|
| 368 |
-
You are a speech-language pathologist conducting a quick analysis of a speech transcript.
|
| 369 |
|
| 370 |
Patient: {age_val}-year-old {gender_val}
|
| 371 |
|
| 372 |
TRANSCRIPT:
|
| 373 |
{transcript}{notes_section}
|
| 374 |
|
| 375 |
-
Please provide a
|
| 376 |
{questions_text}
|
| 377 |
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
-
|
| 384 |
"""
|
| 385 |
|
| 386 |
-
|
|
|
|
|
|
|
| 387 |
|
| 388 |
# Connect event handlers
|
| 389 |
-
|
| 390 |
-
|
| 391 |
inputs=[file_upload, age, gender, slp_notes],
|
| 392 |
-
outputs=[output, transcript_input]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
)
|
| 394 |
|
| 395 |
targeted_analyze_btn.click(
|
| 396 |
on_targeted_analyze,
|
| 397 |
inputs=[transcript_input, custom_question, age, gender, slp_notes],
|
| 398 |
-
outputs=[targeted_output]
|
| 399 |
)
|
| 400 |
|
| 401 |
question_templates.change(
|
|
@@ -407,11 +1355,15 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
|
| 407 |
quick_analyze_btn.click(
|
| 408 |
on_quick_analyze,
|
| 409 |
inputs=[quick_transcript, quick_questions, age, gender, slp_notes],
|
| 410 |
-
outputs=[quick_output]
|
| 411 |
)
|
| 412 |
|
| 413 |
if __name__ == "__main__":
|
| 414 |
print("π Starting Enhanced CASL Analysis Tool...")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
if not ANTHROPIC_API_KEY:
|
| 416 |
print("β οΈ ANTHROPIC_API_KEY not configured - analysis will show error message")
|
| 417 |
print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
|
|
@@ -419,4 +1371,29 @@ if __name__ == "__main__":
|
|
| 419 |
else:
|
| 420 |
print("β
Claude API configured")
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
app.launch(show_api=False)
|
|
|
|
| 3 |
import os
|
| 4 |
import logging
|
| 5 |
import requests
|
| 6 |
+
import re
|
| 7 |
+
import tempfile
|
| 8 |
+
import numpy as np
|
| 9 |
|
| 10 |
# Configure logging
|
| 11 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 14 |
# Anthropic API key - can be set as HuggingFace secret or environment variable
|
| 15 |
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
| 16 |
|
| 17 |
+
# Try to import transcription libraries
|
| 18 |
+
try:
|
| 19 |
+
from speechbrain.pretrained import EncoderDecoderASR
|
| 20 |
+
import torch
|
| 21 |
+
SPEECHBRAIN_AVAILABLE = True
|
| 22 |
+
logger.info("SpeechBrain available for transcription")
|
| 23 |
+
except ImportError as e:
|
| 24 |
+
logger.warning(f"SpeechBrain not available: {e}")
|
| 25 |
+
SPEECHBRAIN_AVAILABLE = False
|
| 26 |
+
|
| 27 |
+
# Try to import video processing
|
| 28 |
+
try:
|
| 29 |
+
import moviepy.editor as mp
|
| 30 |
+
MOVIEPY_AVAILABLE = True
|
| 31 |
+
logger.info("MoviePy available for video processing")
|
| 32 |
+
except ImportError as e:
|
| 33 |
+
logger.warning(f"MoviePy not available: {e}")
|
| 34 |
+
MOVIEPY_AVAILABLE = False
|
| 35 |
+
|
| 36 |
+
# Try to import speaker diarization
|
| 37 |
+
try:
|
| 38 |
+
from pyannote.audio import Pipeline
|
| 39 |
+
from pyannote.audio.pipelines.utils.hook import ProgressHook
|
| 40 |
+
DIARIZATION_AVAILABLE = True
|
| 41 |
+
logger.info("Pyannote.audio available for speaker diarization")
|
| 42 |
+
except ImportError as e:
|
| 43 |
+
logger.warning(f"Pyannote.audio not available: {e}")
|
| 44 |
+
DIARIZATION_AVAILABLE = False
|
| 45 |
+
|
| 46 |
+
# Try to import sentiment and emotion analysis
|
| 47 |
+
try:
|
| 48 |
+
from transformers import pipeline
|
| 49 |
+
SENTIMENT_AVAILABLE = True
|
| 50 |
+
logger.info("Transformers available for sentiment analysis")
|
| 51 |
+
except ImportError as e:
|
| 52 |
+
logger.warning(f"Transformers not available: {e}")
|
| 53 |
+
SENTIMENT_AVAILABLE = False
|
| 54 |
+
|
| 55 |
+
# Initialize models if available
|
| 56 |
+
asr_model = None
|
| 57 |
+
sentiment_model = None
|
| 58 |
+
emotion_model = None
|
| 59 |
+
diarization_pipeline = None
|
| 60 |
+
|
| 61 |
+
if SPEECHBRAIN_AVAILABLE:
|
| 62 |
+
try:
|
| 63 |
+
asr_model = EncoderDecoderASR.from_hparams(
|
| 64 |
+
source="speechbrain/asr-crdnn-rnnlm-librispeech",
|
| 65 |
+
savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
|
| 66 |
+
)
|
| 67 |
+
logger.info("ASR model loaded successfully")
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.error(f"Error loading ASR model: {e}")
|
| 70 |
+
SPEECHBRAIN_AVAILABLE = False
|
| 71 |
+
|
| 72 |
+
if SENTIMENT_AVAILABLE:
|
| 73 |
+
try:
|
| 74 |
+
sentiment_model = pipeline(
|
| 75 |
+
"sentiment-analysis",
|
| 76 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 77 |
+
top_k=None
|
| 78 |
+
)
|
| 79 |
+
emotion_model = pipeline(
|
| 80 |
+
"text-classification",
|
| 81 |
+
model="j-hartmann/emotion-english-distilroberta-base",
|
| 82 |
+
top_k=None
|
| 83 |
+
)
|
| 84 |
+
logger.info("Sentiment and emotion models loaded")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"Error loading sentiment models: {e}")
|
| 87 |
+
SENTIMENT_AVAILABLE = False
|
| 88 |
+
|
| 89 |
+
if DIARIZATION_AVAILABLE:
|
| 90 |
+
try:
|
| 91 |
+
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 92 |
+
if HF_TOKEN:
|
| 93 |
+
diarization_pipeline = Pipeline.from_pretrained(
|
| 94 |
+
"pyannote/speaker-diarization@2.1",
|
| 95 |
+
use_auth_token=HF_TOKEN
|
| 96 |
+
)
|
| 97 |
+
logger.info("Speaker diarization pipeline loaded")
|
| 98 |
+
else:
|
| 99 |
+
logger.warning("HF_TOKEN not set - speaker diarization will be disabled")
|
| 100 |
+
except Exception as e:
|
| 101 |
+
logger.error(f"Error loading diarization pipeline: {e}")
|
| 102 |
+
|
| 103 |
# Check if API key is available
|
| 104 |
if ANTHROPIC_API_KEY:
|
| 105 |
logger.info("Claude API key found")
|
| 106 |
else:
|
| 107 |
logger.warning("Claude API key not found - using demo mode")
|
| 108 |
|
| 109 |
+
def validate_analysis_completeness(response_text):
|
| 110 |
+
"""Validate that all 12 sections are present in the analysis"""
|
| 111 |
+
required_sections = [
|
| 112 |
+
"1. SPEECH FACTORS",
|
| 113 |
+
"2. LANGUAGE SKILLS ASSESSMENT",
|
| 114 |
+
"3. COMPLEX SENTENCE ANALYSIS",
|
| 115 |
+
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 116 |
+
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 117 |
+
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 118 |
+
"7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
|
| 119 |
+
"8. COGNITIVE-LINGUISTIC FACTORS",
|
| 120 |
+
"9. FLUENCY AND RHYTHM ANALYSIS",
|
| 121 |
+
"10. QUANTITATIVE METRICS",
|
| 122 |
+
"11. CLINICAL IMPLICATIONS",
|
| 123 |
+
"12. PROGNOSIS AND SUMMARY"
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
missing_sections = []
|
| 127 |
+
for section in required_sections:
|
| 128 |
+
if section not in response_text:
|
| 129 |
+
missing_sections.append(section)
|
| 130 |
+
|
| 131 |
+
if missing_sections:
|
| 132 |
+
print(f"\nβ οΈ MISSING SECTIONS: {missing_sections}")
|
| 133 |
+
return False
|
| 134 |
+
else:
|
| 135 |
+
print(f"\nβ
ALL 12 SECTIONS PRESENT")
|
| 136 |
+
return True
|
| 137 |
+
|
| 138 |
+
def call_claude_api_with_continuation(prompt, max_continuations=3):
|
| 139 |
+
"""Call Claude API with continuation prompting to ensure complete responses"""
|
| 140 |
if not ANTHROPIC_API_KEY:
|
| 141 |
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 142 |
|
| 143 |
try:
|
| 144 |
+
full_response = ""
|
| 145 |
+
continuation_count = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
# Add continuation instruction to original prompt
|
| 148 |
+
initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
|
| 149 |
+
|
| 150 |
+
while continuation_count <= max_continuations:
|
| 151 |
+
if continuation_count == 0:
|
| 152 |
+
current_prompt = initial_prompt
|
| 153 |
+
else:
|
| 154 |
+
# For continuations, provide context about what was already covered
|
| 155 |
+
current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1} of {max_continuations}):\n\nIMPORTANT: Do not repeat what you've already written. Continue with the next section or complete any unfinished sections. If you're done, do not include <CONTINUE>. Provide the remaining analysis sections. Make sure to complete ALL 12 sections of the analysis."
|
| 156 |
+
|
| 157 |
+
headers = {
|
| 158 |
+
"Content-Type": "application/json",
|
| 159 |
+
"x-api-key": ANTHROPIC_API_KEY,
|
| 160 |
+
"anthropic-version": "2023-06-01"
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
data = {
|
| 164 |
+
"model": "claude-3-5-sonnet-20241022",
|
| 165 |
+
"max_tokens": 4096,
|
| 166 |
+
"messages": [
|
| 167 |
+
{
|
| 168 |
+
"role": "user",
|
| 169 |
+
"content": current_prompt
|
| 170 |
+
}
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
|
| 174 |
+
response = requests.post(
|
| 175 |
+
"https://api.anthropic.com/v1/messages",
|
| 176 |
+
headers=headers,
|
| 177 |
+
json=data,
|
| 178 |
+
timeout=90
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
if response.status_code == 200:
|
| 182 |
+
response_json = response.json()
|
| 183 |
+
response_text = response_json['content'][0]['text']
|
| 184 |
+
|
| 185 |
+
# Log response for debugging
|
| 186 |
+
print(f"\n=== PART {continuation_count + 1} RESPONSE ===")
|
| 187 |
+
print(f"Length: {len(response_text)} characters")
|
| 188 |
+
print(f"Contains CONTINUE: {'<CONTINUE>' in response_text}")
|
| 189 |
+
print(f"First 200 chars: {response_text[:200]}...")
|
| 190 |
+
print(f"Last 200 chars: {response_text[-200:]}...")
|
| 191 |
+
print("=" * 50)
|
| 192 |
+
|
| 193 |
+
# Simple string combination - no complex processing
|
| 194 |
+
if continuation_count == 0:
|
| 195 |
+
full_response = response_text
|
| 196 |
+
else:
|
| 197 |
+
# Just add a newline and append the continuation
|
| 198 |
+
full_response += "\n\n" + response_text
|
| 199 |
+
|
| 200 |
+
# Check if response indicates continuation is needed
|
| 201 |
+
needs_continuation = "<CONTINUE>" in response_text
|
| 202 |
+
|
| 203 |
+
print(f"Needs continuation: {needs_continuation}")
|
| 204 |
+
print(f"Continuation count: {continuation_count}/{max_continuations}")
|
| 205 |
+
|
| 206 |
+
# Continue if <CONTINUE> is present and we haven't reached max
|
| 207 |
+
if needs_continuation and continuation_count < max_continuations:
|
| 208 |
+
# Remove the CONTINUE marker
|
| 209 |
+
full_response = full_response.replace("<CONTINUE>", "")
|
| 210 |
+
continuation_count += 1
|
| 211 |
+
logger.info(f"Continuing analysis (attempt {continuation_count}/{max_continuations})")
|
| 212 |
+
continue
|
| 213 |
+
else:
|
| 214 |
+
# Clean up any remaining continuation markers
|
| 215 |
+
full_response = full_response.replace("<CONTINUE>", "")
|
| 216 |
+
break
|
| 217 |
+
else:
|
| 218 |
+
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 219 |
+
return f"β Claude API Error: {response.status_code}"
|
| 220 |
+
|
| 221 |
except Exception as e:
|
| 222 |
logger.error(f"Error calling Claude API: {str(e)}")
|
| 223 |
return f"β Error: {str(e)}"
|
| 224 |
+
|
| 225 |
+
# Add completion indicator
|
| 226 |
+
if continuation_count > 0:
|
| 227 |
+
full_response += f"\n\n[Analysis completed in {continuation_count + 1} parts]"
|
| 228 |
+
|
| 229 |
+
# Log final response for debugging
|
| 230 |
+
print(f"\n=== FINAL COMPLETE RESPONSE ===")
|
| 231 |
+
print(f"Total length: {len(full_response)} characters")
|
| 232 |
+
print(f"Number of parts: {continuation_count + 1}")
|
| 233 |
+
print("=" * 50)
|
| 234 |
+
|
| 235 |
+
# Print the entire final response for debugging
|
| 236 |
+
print(f"\n=== ENTIRE FINAL RESPONSE ===")
|
| 237 |
+
print(full_response)
|
| 238 |
+
print("=" * 50)
|
| 239 |
+
|
| 240 |
+
return full_response
|
| 241 |
+
|
| 242 |
+
def call_claude_api(prompt):
|
| 243 |
+
"""Call Claude API directly (legacy function for backward compatibility)"""
|
| 244 |
+
return call_claude_api_with_continuation(prompt, max_continuations=0)
|
| 245 |
+
|
| 246 |
+
def extract_audio_from_video(video_path):
|
| 247 |
+
"""Extract audio from video file"""
|
| 248 |
+
if not MOVIEPY_AVAILABLE:
|
| 249 |
+
return None, "MoviePy not available for video processing"
|
| 250 |
+
|
| 251 |
+
try:
|
| 252 |
+
temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 253 |
+
temp_audio_path = temp_audio.name
|
| 254 |
+
temp_audio.close()
|
| 255 |
+
|
| 256 |
+
video = mp.VideoFileClip(video_path)
|
| 257 |
+
audio = video.audio
|
| 258 |
+
|
| 259 |
+
if audio is None:
|
| 260 |
+
return None, "No audio track found in video file"
|
| 261 |
+
|
| 262 |
+
audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
|
| 263 |
+
video.close()
|
| 264 |
+
audio.close()
|
| 265 |
+
|
| 266 |
+
return temp_audio_path, "Audio extracted successfully"
|
| 267 |
+
|
| 268 |
+
except Exception as e:
|
| 269 |
+
logger.error(f"Error extracting audio: {e}")
|
| 270 |
+
return None, f"Error extracting audio: {str(e)}"
|
| 271 |
+
|
| 272 |
+
def perform_speaker_diarization(audio_path):
|
| 273 |
+
"""Perform speaker diarization on audio file"""
|
| 274 |
+
if not DIARIZATION_AVAILABLE or not diarization_pipeline:
|
| 275 |
+
return None, "Speaker diarization not available"
|
| 276 |
+
|
| 277 |
+
try:
|
| 278 |
+
with ProgressHook() as hook:
|
| 279 |
+
diarization = diarization_pipeline(audio_path, hook=hook)
|
| 280 |
+
|
| 281 |
+
speaker_segments = []
|
| 282 |
+
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
| 283 |
+
speaker_segments.append({
|
| 284 |
+
'start': turn.start,
|
| 285 |
+
'end': turn.end,
|
| 286 |
+
'speaker': speaker,
|
| 287 |
+
'duration': turn.end - turn.start
|
| 288 |
+
})
|
| 289 |
+
|
| 290 |
+
logger.info(f"Diarization completed: {len(speaker_segments)} segments found")
|
| 291 |
+
return speaker_segments, "Diarization completed successfully"
|
| 292 |
+
|
| 293 |
+
except Exception as e:
|
| 294 |
+
logger.error(f"Error in diarization: {e}")
|
| 295 |
+
return None, f"Diarization error: {str(e)}"
|
| 296 |
+
|
| 297 |
+
def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
|
| 298 |
+
"""Transcribe audio with timestamps, sentiment, and metadata"""
|
| 299 |
+
if not audio_file:
|
| 300 |
+
return None, "No audio file provided"
|
| 301 |
+
|
| 302 |
+
if not SPEECHBRAIN_AVAILABLE:
|
| 303 |
+
return None, "SpeechBrain not available for transcription"
|
| 304 |
+
|
| 305 |
+
try:
|
| 306 |
+
# Check if it's a video file
|
| 307 |
+
file_extension = os.path.splitext(audio_file)[1].lower()
|
| 308 |
+
if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
|
| 309 |
+
processed_audio, status = extract_audio_from_video(audio_file)
|
| 310 |
+
if not processed_audio:
|
| 311 |
+
return None, status
|
| 312 |
+
else:
|
| 313 |
+
processed_audio = audio_file
|
| 314 |
+
|
| 315 |
+
# Perform speaker diarization if enabled
|
| 316 |
+
speaker_segments = None
|
| 317 |
+
diarization_status = ""
|
| 318 |
+
if enable_diarization:
|
| 319 |
+
speaker_segments, diarization_status = perform_speaker_diarization(processed_audio)
|
| 320 |
+
|
| 321 |
+
# Get transcription
|
| 322 |
+
transcript = asr_model.transcribe_file(processed_audio)
|
| 323 |
+
|
| 324 |
+
# Clean up temporary file if created
|
| 325 |
+
if processed_audio != audio_file and os.path.exists(processed_audio):
|
| 326 |
+
try:
|
| 327 |
+
os.unlink(processed_audio)
|
| 328 |
+
except:
|
| 329 |
+
pass
|
| 330 |
+
|
| 331 |
+
# Split into sentences and add metadata
|
| 332 |
+
sentences = re.split(r'[.!?]+', transcript)
|
| 333 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 334 |
+
|
| 335 |
+
rich_transcript = []
|
| 336 |
+
current_time = 0
|
| 337 |
+
|
| 338 |
+
for i, sentence in enumerate(sentences):
|
| 339 |
+
timestamp = current_time + (i * 2)
|
| 340 |
+
|
| 341 |
+
# Determine speaker
|
| 342 |
+
speaker = "UNKNOWN"
|
| 343 |
+
if speaker_segments:
|
| 344 |
+
for segment in speaker_segments:
|
| 345 |
+
if segment['start'] <= timestamp <= segment['end']:
|
| 346 |
+
speaker = segment['speaker']
|
| 347 |
+
break
|
| 348 |
+
|
| 349 |
+
# Sentiment and emotion analysis
|
| 350 |
+
sentiment = {'label': 'neutral', 'score': 0.5}
|
| 351 |
+
emotion = {'label': 'neutral', 'score': 0.5}
|
| 352 |
+
|
| 353 |
+
if SENTIMENT_AVAILABLE:
|
| 354 |
+
try:
|
| 355 |
+
sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
|
| 356 |
+
sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else sentiment
|
| 357 |
+
|
| 358 |
+
emotion_result = emotion_model(sentence)[0] if emotion_model else None
|
| 359 |
+
emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else emotion
|
| 360 |
+
except:
|
| 361 |
+
pass
|
| 362 |
+
|
| 363 |
+
# Word metrics
|
| 364 |
+
words = sentence.split()
|
| 365 |
+
word_count = len(words)
|
| 366 |
+
avg_word_length = np.mean([len(word) for word in words]) if words else 0
|
| 367 |
+
speech_rate = word_count * 30 / 60
|
| 368 |
+
|
| 369 |
+
rich_transcript.append({
|
| 370 |
+
'timestamp': timestamp,
|
| 371 |
+
'speaker': speaker,
|
| 372 |
+
'sentence': sentence,
|
| 373 |
+
'word_count': word_count,
|
| 374 |
+
'avg_word_length': round(avg_word_length, 2),
|
| 375 |
+
'speech_rate_wpm': round(speech_rate, 1),
|
| 376 |
+
'sentiment': sentiment['label'],
|
| 377 |
+
'sentiment_score': round(sentiment['score'], 3),
|
| 378 |
+
'emotion': emotion['label'],
|
| 379 |
+
'emotion_score': round(emotion['score'], 3)
|
| 380 |
+
})
|
| 381 |
+
|
| 382 |
+
current_time = timestamp
|
| 383 |
+
|
| 384 |
+
status_msg = f"Transcription completed successfully"
|
| 385 |
+
if diarization_status:
|
| 386 |
+
status_msg += f" {diarization_status}"
|
| 387 |
+
|
| 388 |
+
return rich_transcript, status_msg
|
| 389 |
+
|
| 390 |
+
except Exception as e:
|
| 391 |
+
logger.error(f"Error in transcription: {e}")
|
| 392 |
+
return None, f"Transcription error: {str(e)}"
|
| 393 |
+
|
| 394 |
+
def format_rich_transcript(rich_transcript):
|
| 395 |
+
"""Format rich transcript for display"""
|
| 396 |
+
if not rich_transcript:
|
| 397 |
+
return "No transcript data available"
|
| 398 |
+
|
| 399 |
+
formatted_lines = []
|
| 400 |
+
for entry in rich_transcript:
|
| 401 |
+
timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
|
| 402 |
+
|
| 403 |
+
line = f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}"
|
| 404 |
+
line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
|
| 405 |
+
line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
|
| 406 |
+
line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
|
| 407 |
+
|
| 408 |
+
formatted_lines.append(line)
|
| 409 |
+
|
| 410 |
+
return '\n'.join(formatted_lines)
|
| 411 |
+
|
| 412 |
+
def calculate_slp_metrics(rich_transcript):
|
| 413 |
+
"""Calculate comprehensive SLP metrics"""
|
| 414 |
+
if not rich_transcript:
|
| 415 |
+
return {}
|
| 416 |
+
|
| 417 |
+
# Basic metrics
|
| 418 |
+
total_sentences = len(rich_transcript)
|
| 419 |
+
total_words = sum(entry['word_count'] for entry in rich_transcript)
|
| 420 |
+
total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
|
| 421 |
+
|
| 422 |
+
# Speaker analysis
|
| 423 |
+
speakers = {}
|
| 424 |
+
for entry in rich_transcript:
|
| 425 |
+
speaker = entry['speaker']
|
| 426 |
+
if speaker not in speakers:
|
| 427 |
+
speakers[speaker] = {
|
| 428 |
+
'sentences': 0,
|
| 429 |
+
'words': 0,
|
| 430 |
+
'sentiments': [],
|
| 431 |
+
'emotions': []
|
| 432 |
+
}
|
| 433 |
+
speakers[speaker]['sentences'] += 1
|
| 434 |
+
speakers[speaker]['words'] += entry['word_count']
|
| 435 |
+
speakers[speaker]['sentiments'].append(entry['sentiment'])
|
| 436 |
+
speakers[speaker]['emotions'].append(entry['emotion'])
|
| 437 |
+
|
| 438 |
+
# Word-level analysis
|
| 439 |
+
all_words = []
|
| 440 |
+
for entry in rich_transcript:
|
| 441 |
+
words = entry['sentence'].lower().split()
|
| 442 |
+
all_words.extend(words)
|
| 443 |
+
|
| 444 |
+
# Word frequency distribution
|
| 445 |
+
word_freq = {}
|
| 446 |
+
for word in all_words:
|
| 447 |
+
word_clean = re.sub(r'[^\w\s]', '', word)
|
| 448 |
+
if word_clean:
|
| 449 |
+
word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
|
| 450 |
+
|
| 451 |
+
# Vocabulary diversity (Type-Token Ratio)
|
| 452 |
+
unique_words = len(set(all_words))
|
| 453 |
+
ttr = unique_words / total_words if total_words > 0 else 0
|
| 454 |
+
|
| 455 |
+
# Speech rate analysis
|
| 456 |
+
speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
|
| 457 |
+
avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
|
| 458 |
+
|
| 459 |
+
# Sentiment analysis
|
| 460 |
+
sentiment_counts = {}
|
| 461 |
+
emotion_counts = {}
|
| 462 |
+
for entry in rich_transcript:
|
| 463 |
+
sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
|
| 464 |
+
emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
|
| 465 |
+
|
| 466 |
+
# Sentence complexity
|
| 467 |
+
sentence_lengths = [entry['word_count'] for entry in rich_transcript]
|
| 468 |
+
avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
|
| 469 |
+
|
| 470 |
+
# Pause analysis
|
| 471 |
+
pauses = []
|
| 472 |
+
for i in range(1, len(rich_transcript)):
|
| 473 |
+
pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
|
| 474 |
+
pauses.append(pause)
|
| 475 |
+
|
| 476 |
+
avg_pause_duration = np.mean(pauses) if pauses else 0
|
| 477 |
+
|
| 478 |
+
return {
|
| 479 |
+
'total_sentences': total_sentences,
|
| 480 |
+
'total_words': total_words,
|
| 481 |
+
'total_duration_seconds': total_duration,
|
| 482 |
+
'unique_words': unique_words,
|
| 483 |
+
'type_token_ratio': round(ttr, 3),
|
| 484 |
+
'avg_sentence_length': round(avg_sentence_length, 1),
|
| 485 |
+
'avg_speech_rate_wpm': round(avg_speech_rate, 1),
|
| 486 |
+
'avg_pause_duration': round(avg_pause_duration, 1),
|
| 487 |
+
'sentiment_distribution': sentiment_counts,
|
| 488 |
+
'emotion_distribution': emotion_counts,
|
| 489 |
+
'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
|
| 490 |
+
'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0,
|
| 491 |
+
'speakers': speakers,
|
| 492 |
+
'speaker_count': len(speakers)
|
| 493 |
+
}
|
| 494 |
|
| 495 |
def process_file(file):
|
| 496 |
"""Process uploaded file"""
|
|
|
|
| 509 |
except Exception as e:
|
| 510 |
return f"Error reading file: {str(e)}"
|
| 511 |
|
| 512 |
+
def analyze_transcript_content(transcript_content, age, gender, slp_notes):
|
| 513 |
+
"""Analyze transcript content with comprehensive quantification and detailed citations"""
|
| 514 |
+
if not transcript_content or len(transcript_content.strip()) < 50:
|
| 515 |
+
return "Error: Please provide a longer transcript for analysis."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
|
| 517 |
# Add SLP notes to the prompt if provided
|
| 518 |
notes_section = ""
|
|
|
|
| 523 |
{slp_notes.strip()}
|
| 524 |
"""
|
| 525 |
|
| 526 |
+
# Enhanced comprehensive analysis prompt with detailed quantification
|
| 527 |
prompt = f"""
|
| 528 |
+
You are a speech-language pathologist conducting a COMPREHENSIVE CASL assessment. Provide a SINGLE, DETAILED analysis that quantifies EVERY occurrence and cites specific examples.
|
| 529 |
+
|
| 530 |
Patient: {age}-year-old {gender}
|
| 531 |
|
| 532 |
TRANSCRIPT:
|
| 533 |
+
{transcript_content}{notes_section}
|
|
|
|
|
|
|
| 534 |
|
| 535 |
+
INSTRUCTIONS: Provide ONE comprehensive analysis covering ALL areas below. QUANTIFY EVERYTHING with exact counts and cite SPECIFIC examples from the transcript. Be thorough and detailed. COMPLETE ALL 12 SECTIONS.
|
| 536 |
+
|
| 537 |
+
COMPREHENSIVE CASL ANALYSIS:
|
| 538 |
+
|
| 539 |
+
1. SPEECH FACTORS (with EXACT counts and specific citations):
|
| 540 |
+
|
| 541 |
+
A. Fluency Issues:
|
| 542 |
+
- Count and cite EVERY filler word ("um", "uh", "like", "you know", etc.)
|
| 543 |
+
- Count and cite EVERY false start/self-correction
|
| 544 |
+
- Count and cite EVERY repetition of words/phrases
|
| 545 |
+
- Count and cite EVERY revision/restart
|
| 546 |
+
- Calculate percentage of disfluent speech
|
| 547 |
+
|
| 548 |
+
B. Word Retrieval Issues:
|
| 549 |
+
- Count and cite EVERY instance of circumlocution
|
| 550 |
+
- Count and cite EVERY incomplete thought/abandoned utterance
|
| 551 |
+
- Count and cite EVERY word-finding pause
|
| 552 |
+
- Count and cite EVERY use of generic terms ("thing", "stuff", etc.)
|
| 553 |
+
|
| 554 |
+
C. Grammatical Errors:
|
| 555 |
+
- Count and cite EVERY grammatical error (verb tense, subject-verb agreement, etc.)
|
| 556 |
+
- Count and cite EVERY syntactic error
|
| 557 |
+
- Count and cite EVERY morphological error
|
| 558 |
+
- Count and cite EVERY run-on sentence
|
| 559 |
+
|
| 560 |
+
2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
|
| 561 |
+
|
| 562 |
+
A. Lexical/Semantic Skills:
|
| 563 |
+
- Count total unique words vs. total words (Type-Token Ratio)
|
| 564 |
+
- List and categorize vocabulary by sophistication level
|
| 565 |
+
- Identify semantic relationships demonstrated
|
| 566 |
+
- Assess word retrieval strategies used
|
| 567 |
+
- Evaluate semantic precision
|
| 568 |
+
|
| 569 |
+
B. Syntactic Skills:
|
| 570 |
+
- Count sentence types (simple, compound, complex, compound-complex)
|
| 571 |
+
- Calculate average sentence length
|
| 572 |
+
- Identify syntactic patterns and errors
|
| 573 |
+
- Assess clause complexity and embedding
|
| 574 |
+
|
| 575 |
+
C. Supralinguistic Skills:
|
| 576 |
+
- Identify and cite examples of:
|
| 577 |
+
* Cause-effect relationships
|
| 578 |
+
* Inferences made
|
| 579 |
+
* Non-literal language use
|
| 580 |
+
* Problem-solving language
|
| 581 |
+
* Metalinguistic awareness
|
| 582 |
+
|
| 583 |
+
3. COMPLEX SENTENCE ANALYSIS (with exact counts):
|
| 584 |
+
|
| 585 |
+
A. Coordinating Conjunctions:
|
| 586 |
+
- Count and cite EVERY use of: and, but, or, so, yet, for, nor
|
| 587 |
+
- Analyze patterns of use
|
| 588 |
+
- Assess age-appropriateness
|
| 589 |
+
|
| 590 |
+
B. Subordinating Conjunctions:
|
| 591 |
+
- Count and cite EVERY use of: because, although, while, since, if, when, where, that, which, who, whom, whose
|
| 592 |
+
- Analyze clause complexity
|
| 593 |
+
- Assess embedding depth
|
| 594 |
+
|
| 595 |
+
C. Sentence Structure Analysis:
|
| 596 |
+
- Count each sentence type with examples
|
| 597 |
+
- Calculate complexity ratios
|
| 598 |
+
- Assess developmental appropriateness
|
| 599 |
+
|
| 600 |
+
4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
|
| 601 |
+
|
| 602 |
+
A. Similes:
|
| 603 |
+
- Count and cite EVERY simile (comparisons using "like" or "as")
|
| 604 |
+
- Analyze creativity and appropriateness
|
| 605 |
+
|
| 606 |
+
B. Metaphors:
|
| 607 |
+
- Count and cite EVERY metaphor (direct comparisons)
|
| 608 |
+
- Assess comprehension and use
|
| 609 |
+
|
| 610 |
+
C. Idioms:
|
| 611 |
+
- Count and cite EVERY idiom used
|
| 612 |
+
- Assess comprehension and appropriate use
|
| 613 |
+
|
| 614 |
+
D. Non-literal Language:
|
| 615 |
+
- Count and cite EVERY instance of sarcasm, humor, irony
|
| 616 |
+
- Assess comprehension level
|
| 617 |
+
|
| 618 |
+
5. PRAGMATIC LANGUAGE ASSESSMENT (with specific examples):
|
| 619 |
+
|
| 620 |
+
A. Turn-taking:
|
| 621 |
+
- Analyze conversational flow
|
| 622 |
+
- Count interruptions or overlaps
|
| 623 |
+
- Assess reciprocity
|
| 624 |
+
|
| 625 |
+
B. Topic Management:
|
| 626 |
+
- Count topic shifts
|
| 627 |
+
- Assess topic maintenance
|
| 628 |
+
- Evaluate topic introduction
|
| 629 |
+
|
| 630 |
+
C. Social Communication:
|
| 631 |
+
- Assess register appropriateness
|
| 632 |
+
- Evaluate politeness markers
|
| 633 |
+
- Analyze social awareness
|
| 634 |
+
|
| 635 |
+
6. VOCABULARY AND SEMANTIC ANALYSIS (with quantification):
|
| 636 |
+
|
| 637 |
+
A. Vocabulary Diversity:
|
| 638 |
+
- Calculate Type-Token Ratio
|
| 639 |
+
- List most frequent words
|
| 640 |
+
- Assess vocabulary sophistication
|
| 641 |
+
|
| 642 |
+
B. Semantic Relationships:
|
| 643 |
+
- Count and cite examples of:
|
| 644 |
+
* Synonyms/antonyms
|
| 645 |
+
* Categories/hierarchies
|
| 646 |
+
* Part-whole relationships
|
| 647 |
+
* Cause-effect vocabulary
|
| 648 |
+
|
| 649 |
+
7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
|
| 650 |
+
|
| 651 |
+
A. Morphological Markers:
|
| 652 |
+
- Count and cite use of:
|
| 653 |
+
* Plurals (-s, -es)
|
| 654 |
+
* Possessives
|
| 655 |
+
* Verb tenses
|
| 656 |
+
* Derivational morphemes
|
| 657 |
+
|
| 658 |
+
B. Phonological Patterns:
|
| 659 |
+
- Identify speech sound errors
|
| 660 |
+
- Count phonological processes
|
| 661 |
+
- Assess syllable structure
|
| 662 |
+
|
| 663 |
+
8. COGNITIVE-LINGUISTIC FACTORS (with evidence):
|
| 664 |
+
|
| 665 |
+
A. Working Memory:
|
| 666 |
+
- Assess sentence length complexity
|
| 667 |
+
- Analyze information retention
|
| 668 |
+
- Evaluate processing demands
|
| 669 |
+
|
| 670 |
+
B. Processing Speed:
|
| 671 |
+
- Analyze speech rate
|
| 672 |
+
- Assess response time
|
| 673 |
+
- Evaluate efficiency
|
| 674 |
+
|
| 675 |
+
C. Executive Function:
|
| 676 |
+
- Assess planning and organization
|
| 677 |
+
- Evaluate self-monitoring
|
| 678 |
+
- Analyze cognitive flexibility
|
| 679 |
+
|
| 680 |
+
9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
|
| 681 |
+
|
| 682 |
+
A. Speech Rate:
|
| 683 |
+
- Calculate words per minute
|
| 684 |
+
- Analyze rate variability
|
| 685 |
+
- Assess naturalness
|
| 686 |
+
|
| 687 |
+
B. Pause Patterns:
|
| 688 |
+
- Count and analyze pauses
|
| 689 |
+
- Assess pause function
|
| 690 |
+
- Evaluate rhythm
|
| 691 |
+
|
| 692 |
+
10. QUANTITATIVE METRICS:
|
| 693 |
+
|
| 694 |
+
- Total words: [count]
|
| 695 |
+
- Total sentences: [count]
|
| 696 |
+
- Average sentence length: [calculation]
|
| 697 |
+
- Type-Token Ratio: [calculation]
|
| 698 |
+
- Disfluency rate: [percentage]
|
| 699 |
+
- Error rate: [percentage]
|
| 700 |
+
- Vocabulary diversity score: [calculation]
|
| 701 |
+
|
| 702 |
+
11. CLINICAL IMPLICATIONS:
|
| 703 |
+
|
| 704 |
+
A. Strengths:
|
| 705 |
+
- List specific strengths with evidence
|
| 706 |
+
- Identify areas of competence
|
| 707 |
+
|
| 708 |
+
B. Areas of Need:
|
| 709 |
+
- Prioritize intervention targets
|
| 710 |
+
- Provide specific examples
|
| 711 |
+
|
| 712 |
+
C. Treatment Recommendations:
|
| 713 |
+
- List 5-7 specific intervention strategies
|
| 714 |
+
- Include intensity and frequency recommendations
|
| 715 |
+
- Address all identified areas of need
|
| 716 |
+
|
| 717 |
+
12. PROGNOSIS AND SUMMARY:
|
| 718 |
+
|
| 719 |
+
- Overall communication profile
|
| 720 |
+
- Developmental appropriateness
|
| 721 |
+
- Impact on academic/social functioning
|
| 722 |
+
- Expected progress with intervention
|
| 723 |
+
|
| 724 |
+
FORMAT REQUIREMENTS:
|
| 725 |
+
- Use bullet points for organization
|
| 726 |
+
- Include exact counts for everything
|
| 727 |
+
- Cite specific quotes from transcript
|
| 728 |
+
- Use clear headings and subheadings
|
| 729 |
+
- Provide percentages and ratios where applicable
|
| 730 |
+
- Be comprehensive but organized
|
| 731 |
+
- Focus on clinical relevance
|
| 732 |
+
- COMPLETE ALL 12 SECTIONS
|
| 733 |
+
|
| 734 |
+
SECTION CHECKLIST - COMPLETE ALL:
|
| 735 |
+
β‘ 1. SPEECH FACTORS (A, B, C)
|
| 736 |
+
β‘ 2. LANGUAGE SKILLS ASSESSMENT (A, B, C)
|
| 737 |
+
β‘ 3. COMPLEX SENTENCE ANALYSIS (A, B, C)
|
| 738 |
+
β‘ 4. FIGURATIVE LANGUAGE ANALYSIS (A, B, C, D)
|
| 739 |
+
β‘ 5. PRAGMATIC LANGUAGE ASSESSMENT (A, B, C)
|
| 740 |
+
β‘ 6. VOCABULARY AND SEMANTIC ANALYSIS (A, B)
|
| 741 |
+
β‘ 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (A, B)
|
| 742 |
+
β‘ 8. COGNITIVE-LINGUISTIC FACTORS (A, B, C)
|
| 743 |
+
β‘ 9. FLUENCY AND RHYTHM ANALYSIS (A, B)
|
| 744 |
+
β‘ 10. QUANTITATIVE METRICS
|
| 745 |
+
β‘ 11. CLINICAL IMPLICATIONS (A, B, C)
|
| 746 |
+
β‘ 12. PROGNOSIS AND SUMMARY
|
| 747 |
+
|
| 748 |
+
CRITICAL: If you cannot complete all 12 sections in one response, end with <CONTINUE> and continue with the remaining sections. Do not skip any sections. Use the checklist to ensure all sections are completed.
|
| 749 |
"""
|
| 750 |
|
| 751 |
# Get analysis from Claude API
|
| 752 |
+
result = call_claude_api_with_continuation(prompt, max_continuations=5)
|
| 753 |
return result
|
| 754 |
|
| 755 |
+
def analyze_transcript(file, age, gender, slp_notes):
|
| 756 |
+
"""Analyze transcript from file upload"""
|
| 757 |
+
if file is None:
|
| 758 |
+
return "Please upload a transcript file first."
|
| 759 |
+
|
| 760 |
+
# Get transcript content
|
| 761 |
+
transcript = process_file(file)
|
| 762 |
+
if transcript.startswith("Error") or transcript.startswith("Please"):
|
| 763 |
+
return transcript
|
| 764 |
+
|
| 765 |
+
return analyze_transcript_content(transcript, age, gender, slp_notes)
|
| 766 |
+
|
| 767 |
def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
|
| 768 |
+
"""Perform targeted analysis based on custom questions with comprehensive detail"""
|
| 769 |
if not transcript or not transcript.strip():
|
| 770 |
return "Please provide a transcript first."
|
| 771 |
|
|
|
|
| 781 |
{slp_notes.strip()}
|
| 782 |
"""
|
| 783 |
|
| 784 |
+
# Enhanced targeted analysis prompt with comprehensive detail
|
| 785 |
prompt = f"""
|
| 786 |
+
You are a speech-language pathologist conducting a DETAILED targeted analysis of a speech transcript.
|
| 787 |
|
| 788 |
Patient: {age}-year-old {gender}
|
| 789 |
|
|
|
|
| 793 |
SPECIFIC QUESTION FOR ANALYSIS:
|
| 794 |
{custom_question.strip()}
|
| 795 |
|
| 796 |
+
INSTRUCTIONS: Provide a COMPREHENSIVE, DETAILED analysis that directly addresses this specific question. Include:
|
| 797 |
+
- EXACT counts and quantification
|
| 798 |
+
- SPECIFIC citations from the transcript
|
| 799 |
+
- DETAILED examples for every observation
|
| 800 |
+
- PERCENTAGES and ratios where applicable
|
| 801 |
+
- CLINICAL significance of findings
|
| 802 |
+
- AGE-APPROPRIATE assessment
|
| 803 |
+
|
| 804 |
+
ANALYSIS REQUIREMENTS:
|
| 805 |
|
| 806 |
+
1. QUANTIFICATION:
|
| 807 |
+
- Count every relevant occurrence
|
| 808 |
+
- Calculate percentages and ratios
|
| 809 |
+
- Provide specific numbers for all observations
|
|
|
|
|
|
|
| 810 |
|
| 811 |
+
2. EVIDENCE:
|
| 812 |
+
- Cite exact quotes from the transcript
|
| 813 |
+
- Provide line-by-line examples
|
| 814 |
+
- Include specific timestamps or context
|
| 815 |
+
|
| 816 |
+
3. DETAILED EXAMPLES:
|
| 817 |
+
- Give multiple examples for each pattern
|
| 818 |
+
- Show variations in the pattern
|
| 819 |
+
- Demonstrate the range of severity
|
| 820 |
+
|
| 821 |
+
4. CLINICAL ASSESSMENT:
|
| 822 |
+
- Assess severity level
|
| 823 |
+
- Compare to age expectations
|
| 824 |
+
- Identify clinical significance
|
| 825 |
+
- Suggest intervention implications
|
| 826 |
+
|
| 827 |
+
5. COMPREHENSIVE COVERAGE:
|
| 828 |
+
- Address all aspects of the question
|
| 829 |
+
- Consider related language areas
|
| 830 |
+
- Include both strengths and weaknesses
|
| 831 |
+
- Provide developmental context
|
| 832 |
+
|
| 833 |
+
ANALYSIS STRUCTURE:
|
| 834 |
+
|
| 835 |
+
A. DIRECT ANSWER TO QUESTION:
|
| 836 |
+
- Provide a clear, direct answer
|
| 837 |
+
- Include quantification and severity assessment
|
| 838 |
+
|
| 839 |
+
B. DETAILED EVIDENCE:
|
| 840 |
+
- List every relevant example with exact quotes
|
| 841 |
+
- Provide counts and percentages
|
| 842 |
+
- Show patterns and variations
|
| 843 |
+
|
| 844 |
+
C. PATTERN ANALYSIS:
|
| 845 |
+
- Identify underlying patterns
|
| 846 |
+
- Analyze frequency and consistency
|
| 847 |
+
- Assess variability across the transcript
|
| 848 |
+
|
| 849 |
+
D. DEVELOPMENTAL ASSESSMENT:
|
| 850 |
+
- Compare to age-appropriate expectations
|
| 851 |
+
- Identify developmental level
|
| 852 |
+
- Assess progress and challenges
|
| 853 |
+
|
| 854 |
+
E. CLINICAL IMPLICATIONS:
|
| 855 |
+
- Impact on communication
|
| 856 |
+
- Effect on academic/social functioning
|
| 857 |
+
- Priority for intervention
|
| 858 |
+
|
| 859 |
+
F. INTERVENTION CONSIDERATIONS:
|
| 860 |
+
- Specific strategies to address the issue
|
| 861 |
+
- Intensity and frequency recommendations
|
| 862 |
+
- Expected outcomes and timeline
|
| 863 |
+
|
| 864 |
+
FORMAT REQUIREMENTS:
|
| 865 |
+
- Use clear headings and subheadings
|
| 866 |
+
- Include bullet points for organization
|
| 867 |
+
- Provide exact counts and percentages
|
| 868 |
+
- Cite specific quotes with context
|
| 869 |
+
- Be thorough and comprehensive
|
| 870 |
+
- Focus on clinical relevance and utility
|
| 871 |
+
|
| 872 |
+
Remember: This should be a DETAILED, COMPREHENSIVE analysis that thoroughly addresses the specific question with quantification, evidence, and clinical implications.
|
| 873 |
"""
|
| 874 |
|
| 875 |
# Get targeted analysis from Claude API
|
| 876 |
+
result = call_claude_api_with_continuation(prompt, max_continuations=3)
|
| 877 |
return result
|
| 878 |
|
| 879 |
# Create enhanced interface with tabs
|
| 880 |
with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
|
| 881 |
|
| 882 |
gr.Markdown("# π£οΈ Enhanced CASL Analysis Tool")
|
| 883 |
+
gr.Markdown("Upload a speech transcript, paste text, or transcribe audio/video and get instant CASL assessment results with targeted analysis options.")
|
| 884 |
|
| 885 |
# Store transcript globally
|
| 886 |
transcript_state = gr.State("")
|
|
|
|
| 890 |
with gr.Tab("οΏ½οΏ½οΏ½οΏ½ Basic Analysis"):
|
| 891 |
with gr.Row():
|
| 892 |
with gr.Column():
|
| 893 |
+
gr.Markdown("### Input Options")
|
| 894 |
|
| 895 |
+
with gr.Tabs():
|
| 896 |
+
with gr.Tab("π File Upload"):
|
| 897 |
+
file_upload = gr.File(
|
| 898 |
+
label="Upload Transcript File",
|
| 899 |
+
file_types=[".txt", ".cha"]
|
| 900 |
+
)
|
| 901 |
+
|
| 902 |
+
analyze_file_btn = gr.Button(
|
| 903 |
+
"π Analyze File",
|
| 904 |
+
variant="primary"
|
| 905 |
+
)
|
| 906 |
+
|
| 907 |
+
with gr.Tab("π Text Input"):
|
| 908 |
+
text_input = gr.Textbox(
|
| 909 |
+
label="Paste Transcript Here",
|
| 910 |
+
placeholder="Paste your transcript text here...",
|
| 911 |
+
lines=10
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
analyze_text_btn = gr.Button(
|
| 915 |
+
"π Analyze Text",
|
| 916 |
+
variant="primary"
|
| 917 |
+
)
|
| 918 |
+
|
| 919 |
+
with gr.Tab("π€ Audio/Video Transcription"):
|
| 920 |
+
audio_input = gr.File(
|
| 921 |
+
label="Upload Audio/Video File",
|
| 922 |
+
file_types=["audio", "video"]
|
| 923 |
+
)
|
| 924 |
+
|
| 925 |
+
transcribe_btn = gr.Button(
|
| 926 |
+
"π€ Transcribe & Analyze",
|
| 927 |
+
variant="primary"
|
| 928 |
+
)
|
| 929 |
+
|
| 930 |
+
transcription_status = gr.Markdown("")
|
| 931 |
+
|
| 932 |
+
gr.Markdown("### Patient Information")
|
| 933 |
|
| 934 |
age = gr.Number(
|
| 935 |
label="Patient Age",
|
|
|
|
| 949 |
placeholder="Enter any additional clinical observations, context, or notes...",
|
| 950 |
lines=3
|
| 951 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 952 |
|
| 953 |
with gr.Column():
|
| 954 |
gr.Markdown("### Analysis Results")
|
|
|
|
| 959 |
lines=25,
|
| 960 |
max_lines=30
|
| 961 |
)
|
| 962 |
+
|
| 963 |
+
analysis_progress = gr.Markdown("")
|
| 964 |
|
| 965 |
# Tab 2: Targeted Analysis
|
| 966 |
with gr.Tab("π― Targeted Analysis"):
|
|
|
|
| 988 |
"What narrative organization skills are evident?",
|
| 989 |
"What specific intervention targets would you recommend?",
|
| 990 |
"How does this patient's language compare to typical development?",
|
| 991 |
+
"What evidence suggests cognitive-linguistic strengths/weaknesses?",
|
| 992 |
+
"Analyze the use of conjunctions and complex sentences",
|
| 993 |
+
"Identify and analyze figurative language use"
|
| 994 |
],
|
| 995 |
label="Question Templates (Optional)",
|
| 996 |
value="Select a template or write your own..."
|
|
|
|
| 1016 |
lines=25,
|
| 1017 |
max_lines=30
|
| 1018 |
)
|
| 1019 |
+
|
| 1020 |
+
targeted_progress = gr.Markdown("")
|
| 1021 |
|
| 1022 |
# Tab 3: Quick Questions
|
| 1023 |
with gr.Tab("β‘ Quick Questions"):
|
|
|
|
| 1042 |
"Narrative structure",
|
| 1043 |
"Vocabulary level",
|
| 1044 |
"Sentence complexity",
|
| 1045 |
+
"Speech rate patterns",
|
| 1046 |
+
"Complex sentence analysis",
|
| 1047 |
+
"Figurative language use",
|
| 1048 |
+
"Morphological markers",
|
| 1049 |
+
"Phonological patterns",
|
| 1050 |
+
"Turn-taking skills",
|
| 1051 |
+
"Topic maintenance",
|
| 1052 |
+
"Social communication",
|
| 1053 |
+
"Cognitive-linguistic factors",
|
| 1054 |
+
"Working memory demands",
|
| 1055 |
+
"Executive function skills",
|
| 1056 |
+
"Metalinguistic awareness",
|
| 1057 |
+
"Academic language use"
|
| 1058 |
],
|
| 1059 |
label="Select questions to analyze:",
|
| 1060 |
value=[]
|
|
|
|
| 1074 |
lines=25,
|
| 1075 |
max_lines=30
|
| 1076 |
)
|
| 1077 |
+
|
| 1078 |
+
quick_progress = gr.Markdown("")
|
| 1079 |
+
|
| 1080 |
+
# Tab 4: Advanced Transcription
|
| 1081 |
+
with gr.Tab("π€ Advanced Transcription"):
|
| 1082 |
+
with gr.Row():
|
| 1083 |
+
with gr.Column(scale=1):
|
| 1084 |
+
gr.Markdown("### Audio/Video Upload")
|
| 1085 |
+
gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
|
| 1086 |
+
|
| 1087 |
+
transcription_file_input = gr.File(
|
| 1088 |
+
label="Upload Audio or Video File",
|
| 1089 |
+
file_types=["audio", "video"]
|
| 1090 |
+
)
|
| 1091 |
+
|
| 1092 |
+
enable_diarization = gr.Checkbox(
|
| 1093 |
+
label="Enable Speaker Diarization",
|
| 1094 |
+
value=True,
|
| 1095 |
+
info="Identify different speakers in the audio"
|
| 1096 |
+
)
|
| 1097 |
+
|
| 1098 |
+
transcribe_advanced_btn = gr.Button(
|
| 1099 |
+
"π€ Transcribe with Metadata",
|
| 1100 |
+
variant="primary",
|
| 1101 |
+
size="lg"
|
| 1102 |
+
)
|
| 1103 |
+
|
| 1104 |
+
transcription_status = gr.Markdown("")
|
| 1105 |
+
|
| 1106 |
+
with gr.Column(scale=2):
|
| 1107 |
+
gr.Markdown("### Rich Transcript with Metadata")
|
| 1108 |
+
|
| 1109 |
+
rich_transcript_display = gr.Textbox(
|
| 1110 |
+
label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
|
| 1111 |
+
lines=15,
|
| 1112 |
+
max_lines=20
|
| 1113 |
+
)
|
| 1114 |
+
|
| 1115 |
+
with gr.Row():
|
| 1116 |
+
with gr.Column():
|
| 1117 |
+
gr.Markdown("### Speech Metrics")
|
| 1118 |
+
|
| 1119 |
+
transcription_metrics_display = gr.Textbox(
|
| 1120 |
+
label="SLP Metrics",
|
| 1121 |
+
lines=10,
|
| 1122 |
+
max_lines=15
|
| 1123 |
+
)
|
| 1124 |
+
|
| 1125 |
+
with gr.Column():
|
| 1126 |
+
gr.Markdown("### Word Frequency")
|
| 1127 |
+
|
| 1128 |
+
transcription_word_freq_display = gr.Dataframe(
|
| 1129 |
+
headers=["Word", "Frequency"],
|
| 1130 |
+
label="Most Frequent Words",
|
| 1131 |
+
interactive=False
|
| 1132 |
+
)
|
| 1133 |
|
| 1134 |
# Event handlers
|
| 1135 |
+
def on_analyze_file(file, age_val, gender_val, notes):
|
| 1136 |
+
"""Handle file analysis"""
|
| 1137 |
result = analyze_transcript(file, age_val, gender_val, notes)
|
| 1138 |
transcript = process_file(file) if file else ""
|
| 1139 |
+
progress_msg = "β
Analysis completed" if "[Analysis completed in" in result else "π Analysis in progress..."
|
| 1140 |
+
return result, transcript, progress_msg
|
| 1141 |
+
|
| 1142 |
+
def on_analyze_text(text, age_val, gender_val, notes):
|
| 1143 |
+
"""Handle text analysis"""
|
| 1144 |
+
result = analyze_transcript_content(text, age_val, gender_val, notes)
|
| 1145 |
+
progress_msg = "β
Analysis completed" if "[Analysis completed in" in result else "π Analysis in progress..."
|
| 1146 |
+
return result, text, progress_msg
|
| 1147 |
+
|
| 1148 |
+
def on_transcribe_and_analyze(audio_file, age_val, gender_val, notes):
|
| 1149 |
+
"""Handle transcription and analysis"""
|
| 1150 |
+
if not audio_file:
|
| 1151 |
+
return "Please upload an audio/video file first.", "", "No file provided"
|
| 1152 |
+
|
| 1153 |
+
transcript, status = transcribe_audio(audio_file.name)
|
| 1154 |
+
if transcript:
|
| 1155 |
+
result = analyze_transcript_content(transcript, age_val, gender_val, notes)
|
| 1156 |
+
progress_msg = "β
Analysis completed" if "[Analysis completed in" in result else "π Analysis in progress..."
|
| 1157 |
+
return result, transcript, status
|
| 1158 |
+
else:
|
| 1159 |
+
return f"Transcription failed: {status}", "", status
|
| 1160 |
+
|
| 1161 |
+
def on_transcribe_advanced(audio_file, enable_diarization):
|
| 1162 |
+
"""Handle advanced transcription"""
|
| 1163 |
+
if not audio_file:
|
| 1164 |
+
return "Please upload an audio/video file first.", "", "No file provided"
|
| 1165 |
+
|
| 1166 |
+
transcript, status = transcribe_audio_with_metadata(audio_file.name, enable_diarization)
|
| 1167 |
+
if transcript:
|
| 1168 |
+
metrics = calculate_slp_metrics(transcript)
|
| 1169 |
+
word_freq_data = metrics.get('word_frequency', {})
|
| 1170 |
+
return transcript, status, metrics, word_freq_data
|
| 1171 |
+
else:
|
| 1172 |
+
return f"Transcription failed: {status}", "", {}, {}
|
| 1173 |
|
| 1174 |
def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
|
| 1175 |
"""Handle targeted analysis"""
|
| 1176 |
+
result = targeted_analysis(transcript, question, age_val, gender_val, notes)
|
| 1177 |
+
progress_msg = "β
Targeted analysis completed" if "[Analysis completed in" in result else "π Targeted analysis in progress..."
|
| 1178 |
+
return result, progress_msg
|
| 1179 |
|
| 1180 |
def on_question_template_change(template):
|
| 1181 |
"""Handle question template selection"""
|
|
|
|
| 1186 |
def on_quick_analyze(transcript, questions, age_val, gender_val, notes):
|
| 1187 |
"""Handle quick analysis with multiple questions"""
|
| 1188 |
if not transcript or not transcript.strip():
|
| 1189 |
+
return "Please provide a transcript first.", "β No transcript provided"
|
| 1190 |
|
| 1191 |
if not questions:
|
| 1192 |
+
return "Please select at least one question to analyze.", "β No questions selected"
|
| 1193 |
|
| 1194 |
# Add SLP notes to the prompt if provided
|
| 1195 |
notes_section = ""
|
|
|
|
| 1200 |
{notes.strip()}
|
| 1201 |
"""
|
| 1202 |
|
| 1203 |
+
# Create enhanced quick analysis prompt with comprehensive SLP analysis
|
| 1204 |
questions_text = "\n".join([f"- {q}" for q in questions])
|
| 1205 |
prompt = f"""
|
| 1206 |
+
You are a speech-language pathologist conducting a COMPREHENSIVE quick analysis of a speech transcript.
|
| 1207 |
|
| 1208 |
Patient: {age_val}-year-old {gender_val}
|
| 1209 |
|
| 1210 |
TRANSCRIPT:
|
| 1211 |
{transcript}{notes_section}
|
| 1212 |
|
| 1213 |
+
Please provide a DETAILED analysis addressing these specific areas:
|
| 1214 |
{questions_text}
|
| 1215 |
|
| 1216 |
+
ANALYSIS REQUIREMENTS:
|
| 1217 |
+
|
| 1218 |
+
For each selected area, provide:
|
| 1219 |
+
1. EXACT COUNTS and quantification
|
| 1220 |
+
2. SPECIFIC EXAMPLES with exact quotes from transcript
|
| 1221 |
+
3. PERCENTAGES and ratios where applicable
|
| 1222 |
+
4. SEVERITY assessment
|
| 1223 |
+
5. AGE-APPROPRIATE evaluation
|
| 1224 |
+
6. CLINICAL significance
|
| 1225 |
+
7. INTERVENTION considerations
|
| 1226 |
+
|
| 1227 |
+
DETAILED ANALYSIS GUIDELINES:
|
| 1228 |
+
|
| 1229 |
+
For SYNTAX and COMPLEX SENTENCE analysis:
|
| 1230 |
+
- Count and cite EVERY coordinating conjunction (and, but, or, so, yet, for, nor)
|
| 1231 |
+
- Count and cite EVERY subordinating conjunction (because, although, while, since, if, when, where, that, which, who, whom, whose)
|
| 1232 |
+
- Identify and count each sentence type (simple, compound, complex, compound-complex)
|
| 1233 |
+
- Calculate complexity ratios and percentages
|
| 1234 |
+
- Assess embedding depth and clause complexity
|
| 1235 |
+
- Provide specific examples for each pattern
|
| 1236 |
+
|
| 1237 |
+
For FIGURATIVE LANGUAGE analysis:
|
| 1238 |
+
- Count and cite EVERY simile (comparisons using "like" or "as")
|
| 1239 |
+
- Count and cite EVERY metaphor (direct comparisons without "like" or "as")
|
| 1240 |
+
- Count and cite EVERY idiom and non-literal expression
|
| 1241 |
+
- Assess creativity and age-appropriate use
|
| 1242 |
+
- Provide specific examples with context
|
| 1243 |
+
|
| 1244 |
+
For PRAGMATIC and SOCIAL COMMUNICATION:
|
| 1245 |
+
- Count and analyze turn-taking patterns
|
| 1246 |
+
- Assess topic maintenance and shifting abilities
|
| 1247 |
+
- Evaluate social appropriateness and register use
|
| 1248 |
+
- Count interruptions or conversational breakdowns
|
| 1249 |
+
- Analyze non-literal language comprehension
|
| 1250 |
+
- Provide specific examples of pragmatic behaviors
|
| 1251 |
+
|
| 1252 |
+
For VOCABULARY and SEMANTIC analysis:
|
| 1253 |
+
- Calculate Type-Token Ratio
|
| 1254 |
+
- Count and categorize vocabulary by sophistication level
|
| 1255 |
+
- Analyze word retrieval strategies and circumlocution
|
| 1256 |
+
- Assess semantic precision and relationships
|
| 1257 |
+
- Count academic vs. everyday vocabulary use
|
| 1258 |
+
- Provide specific examples of vocabulary patterns
|
| 1259 |
+
|
| 1260 |
+
For MORPHOLOGICAL and PHONOLOGICAL analysis:
|
| 1261 |
+
- Count and cite EVERY morphological marker (plurals, possessives, verb tenses)
|
| 1262 |
+
- Count and cite EVERY derivational morpheme (prefixes, suffixes)
|
| 1263 |
+
- Identify and count phonological patterns and errors
|
| 1264 |
+
- Assess syllable structure and stress patterns
|
| 1265 |
+
- Provide specific examples of morphological use
|
| 1266 |
+
|
| 1267 |
+
For COGNITIVE-LINGUISTIC factors:
|
| 1268 |
+
- Assess working memory demands in language production
|
| 1269 |
+
- Analyze processing speed and efficiency
|
| 1270 |
+
- Count and evaluate attention and focus patterns
|
| 1271 |
+
- Assess executive function skills and self-monitoring
|
| 1272 |
+
- Provide specific examples of cognitive-linguistic patterns
|
| 1273 |
+
|
| 1274 |
+
For FLUENCY and SPEECH RATE:
|
| 1275 |
+
- Count and cite EVERY disfluency (fillers, repetitions, revisions)
|
| 1276 |
+
- Calculate speech rate and variability
|
| 1277 |
+
- Analyze pause patterns and their function
|
| 1278 |
+
- Assess overall speech naturalness
|
| 1279 |
+
- Provide specific examples of fluency patterns
|
| 1280 |
+
|
| 1281 |
+
For GRAMMAR and LANGUAGE ERRORS:
|
| 1282 |
+
- Count and cite EVERY grammatical error
|
| 1283 |
+
- Count and cite EVERY syntactic error
|
| 1284 |
+
- Count and cite EVERY morphological error
|
| 1285 |
+
- Calculate error rates and percentages
|
| 1286 |
+
- Provide specific examples of error patterns
|
| 1287 |
+
|
| 1288 |
+
For WORD-FINDING and RETRIEVAL:
|
| 1289 |
+
- Count and cite EVERY instance of circumlocution
|
| 1290 |
+
- Count and cite EVERY incomplete thought
|
| 1291 |
+
- Count and cite EVERY word-finding pause
|
| 1292 |
+
- Analyze word retrieval strategies used
|
| 1293 |
+
- Provide specific examples of retrieval patterns
|
| 1294 |
+
|
| 1295 |
+
For NARRATIVE and DISCOURSE:
|
| 1296 |
+
- Assess narrative organization and coherence
|
| 1297 |
+
- Count topic shifts and maintenance
|
| 1298 |
+
- Analyze discourse markers and transitions
|
| 1299 |
+
- Evaluate story structure and completeness
|
| 1300 |
+
- Provide specific examples of narrative patterns
|
| 1301 |
+
|
| 1302 |
+
FORMAT REQUIREMENTS:
|
| 1303 |
+
- Use clear headings for each area analyzed
|
| 1304 |
+
- Include bullet points for organization
|
| 1305 |
+
- Provide exact counts and percentages
|
| 1306 |
+
- Cite specific quotes from transcript
|
| 1307 |
+
- Include severity assessments
|
| 1308 |
+
- Provide clinical implications
|
| 1309 |
+
- Be comprehensive but focused on selected areas
|
| 1310 |
|
| 1311 |
+
Remember: This should be a DETAILED analysis that thoroughly addresses each selected area with quantification, evidence, and clinical relevance.
|
| 1312 |
"""
|
| 1313 |
|
| 1314 |
+
result = call_claude_api_with_continuation(prompt, max_continuations=2)
|
| 1315 |
+
progress_msg = "β
Quick analysis completed" if "[Analysis completed in" in result else "π Quick analysis in progress..."
|
| 1316 |
+
return result, progress_msg
|
| 1317 |
|
| 1318 |
# Connect event handlers
|
| 1319 |
+
analyze_file_btn.click(
|
| 1320 |
+
on_analyze_file,
|
| 1321 |
inputs=[file_upload, age, gender, slp_notes],
|
| 1322 |
+
outputs=[output, transcript_input, analysis_progress]
|
| 1323 |
+
)
|
| 1324 |
+
|
| 1325 |
+
analyze_text_btn.click(
|
| 1326 |
+
on_analyze_text,
|
| 1327 |
+
inputs=[text_input, age, gender, slp_notes],
|
| 1328 |
+
outputs=[output, transcript_input, analysis_progress]
|
| 1329 |
+
)
|
| 1330 |
+
|
| 1331 |
+
transcribe_btn.click(
|
| 1332 |
+
on_transcribe_and_analyze,
|
| 1333 |
+
inputs=[audio_input, age, gender, slp_notes],
|
| 1334 |
+
outputs=[output, transcript_input, transcription_status]
|
| 1335 |
+
)
|
| 1336 |
+
|
| 1337 |
+
transcribe_advanced_btn.click(
|
| 1338 |
+
on_transcribe_advanced,
|
| 1339 |
+
inputs=[transcription_file_input, enable_diarization],
|
| 1340 |
+
outputs=[rich_transcript_display, transcription_status, transcription_metrics_display, transcription_word_freq_display]
|
| 1341 |
)
|
| 1342 |
|
| 1343 |
targeted_analyze_btn.click(
|
| 1344 |
on_targeted_analyze,
|
| 1345 |
inputs=[transcript_input, custom_question, age, gender, slp_notes],
|
| 1346 |
+
outputs=[targeted_output, targeted_progress]
|
| 1347 |
)
|
| 1348 |
|
| 1349 |
question_templates.change(
|
|
|
|
| 1355 |
quick_analyze_btn.click(
|
| 1356 |
on_quick_analyze,
|
| 1357 |
inputs=[quick_transcript, quick_questions, age, gender, slp_notes],
|
| 1358 |
+
outputs=[quick_output, quick_progress]
|
| 1359 |
)
|
| 1360 |
|
| 1361 |
if __name__ == "__main__":
|
| 1362 |
print("π Starting Enhanced CASL Analysis Tool...")
|
| 1363 |
+
print("π Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis, Advanced Transcription")
|
| 1364 |
+
print("π€ Transcription: Audio/Video support with speaker diarization, sentiment, and emotion analysis")
|
| 1365 |
+
print("π Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
|
| 1366 |
+
|
| 1367 |
if not ANTHROPIC_API_KEY:
|
| 1368 |
print("β οΈ ANTHROPIC_API_KEY not configured - analysis will show error message")
|
| 1369 |
print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
|
|
|
|
| 1371 |
else:
|
| 1372 |
print("β
Claude API configured")
|
| 1373 |
|
| 1374 |
+
if not SPEECHBRAIN_AVAILABLE:
|
| 1375 |
+
print("β οΈ SpeechBrain not available - transcription will be disabled")
|
| 1376 |
+
print(" Install with: pip install speechbrain transformers torch")
|
| 1377 |
+
else:
|
| 1378 |
+
print("β
SpeechBrain available for transcription")
|
| 1379 |
+
|
| 1380 |
+
if not MOVIEPY_AVAILABLE:
|
| 1381 |
+
print("β οΈ MoviePy not available - video processing will be limited")
|
| 1382 |
+
print(" Install with: pip install moviepy")
|
| 1383 |
+
else:
|
| 1384 |
+
print("β
MoviePy available for video processing")
|
| 1385 |
+
|
| 1386 |
+
if not DIARIZATION_AVAILABLE:
|
| 1387 |
+
print("β οΈ Pyannote.audio not available - speaker diarization will be disabled")
|
| 1388 |
+
print(" Install with: pip install pyannote.audio")
|
| 1389 |
+
print(" Note: Requires HuggingFace token for model access")
|
| 1390 |
+
else:
|
| 1391 |
+
print("β
Pyannote.audio available for speaker diarization")
|
| 1392 |
+
|
| 1393 |
+
if not SENTIMENT_AVAILABLE:
|
| 1394 |
+
print("β οΈ Transformers not available - sentiment/emotion analysis will be disabled")
|
| 1395 |
+
print(" Install with: pip install transformers torch")
|
| 1396 |
+
else:
|
| 1397 |
+
print("β
Transformers available for sentiment and emotion analysis")
|
| 1398 |
+
|
| 1399 |
app.launch(show_api=False)
|
transcription_demo.py
ADDED
|
@@ -0,0 +1,826 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import logging
|
| 5 |
+
import re
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
import time
|
| 10 |
+
import tempfile
|
| 11 |
+
from typing import Dict, List, Tuple, Optional
|
| 12 |
+
import requests
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
# Try to import video processing libraries
|
| 19 |
+
try:
|
| 20 |
+
import moviepy.editor as mp
|
| 21 |
+
MOVIEPY_AVAILABLE = True
|
| 22 |
+
logger.info("MoviePy available for video processing")
|
| 23 |
+
except ImportError as e:
|
| 24 |
+
logger.warning(f"MoviePy not available: {e}")
|
| 25 |
+
MOVIEPY_AVAILABLE = False
|
| 26 |
+
|
| 27 |
+
# Try to import speaker diarization
|
| 28 |
+
try:
|
| 29 |
+
from pyannote.audio import Pipeline
|
| 30 |
+
from pyannote.audio.pipelines.utils.hook import ProgressHook
|
| 31 |
+
DIARIZATION_AVAILABLE = True
|
| 32 |
+
logger.info("Pyannote.audio available for speaker diarization")
|
| 33 |
+
except ImportError as e:
|
| 34 |
+
logger.warning(f"Pyannote.audio not available: {e}")
|
| 35 |
+
DIARIZATION_AVAILABLE = False
|
| 36 |
+
|
| 37 |
+
# Try to import SpeechBrain and HuggingFace components
|
| 38 |
+
try:
|
| 39 |
+
from speechbrain.pretrained import EncoderDecoderASR, VAD, EncoderClassifier
|
| 40 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
| 41 |
+
import torch
|
| 42 |
+
SPEECHBRAIN_AVAILABLE = True
|
| 43 |
+
HUGGINGFACE_AVAILABLE = True
|
| 44 |
+
logger.info("SpeechBrain and HuggingFace models available")
|
| 45 |
+
except ImportError as e:
|
| 46 |
+
logger.warning(f"SpeechBrain/HuggingFace not available: {e}")
|
| 47 |
+
SPEECHBRAIN_AVAILABLE = False
|
| 48 |
+
HUGGINGFACE_AVAILABLE = False
|
| 49 |
+
|
| 50 |
+
# Initialize models if available
|
| 51 |
+
asr_model = None
|
| 52 |
+
vad_model = None
|
| 53 |
+
sentiment_model = None
|
| 54 |
+
emotion_model = None
|
| 55 |
+
diarization_pipeline = None
|
| 56 |
+
|
| 57 |
+
if SPEECHBRAIN_AVAILABLE and HUGGINGFACE_AVAILABLE:
|
| 58 |
+
try:
|
| 59 |
+
# Speech-to-text model
|
| 60 |
+
asr_model = EncoderDecoderASR.from_hparams(
|
| 61 |
+
source="speechbrain/asr-crdnn-rnnlm-librispeech",
|
| 62 |
+
savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Voice Activity Detection
|
| 66 |
+
vad_model = VAD.from_hparams(
|
| 67 |
+
source="speechbrain/vad-crdnn-libriparty",
|
| 68 |
+
savedir="pretrained_models/vad-crdnn-libriparty"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Sentiment analysis
|
| 72 |
+
sentiment_model = pipeline(
|
| 73 |
+
"sentiment-analysis",
|
| 74 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 75 |
+
top_k=None
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Emotion analysis
|
| 79 |
+
emotion_model = pipeline(
|
| 80 |
+
"text-classification",
|
| 81 |
+
model="j-hartmann/emotion-english-distilroberta-base",
|
| 82 |
+
top_k=None
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
logger.info("All models loaded successfully")
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(f"Error loading models: {e}")
|
| 88 |
+
SPEECHBRAIN_AVAILABLE = False
|
| 89 |
+
HUGGINGFACE_AVAILABLE = False
|
| 90 |
+
|
| 91 |
+
# Initialize diarization pipeline
|
| 92 |
+
if DIARIZATION_AVAILABLE:
|
| 93 |
+
try:
|
| 94 |
+
# Note: You'll need to get a HuggingFace token and accept the model terms
|
| 95 |
+
# at https://huggingface.co/pyannote/speaker-diarization
|
| 96 |
+
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 97 |
+
if HF_TOKEN:
|
| 98 |
+
diarization_pipeline = Pipeline.from_pretrained(
|
| 99 |
+
"pyannote/speaker-diarization@2.1",
|
| 100 |
+
use_auth_token=HF_TOKEN
|
| 101 |
+
)
|
| 102 |
+
logger.info("Speaker diarization pipeline loaded")
|
| 103 |
+
else:
|
| 104 |
+
logger.warning("HF_TOKEN not set - speaker diarization will be disabled")
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.error(f"Error loading diarization pipeline: {e}")
|
| 107 |
+
|
| 108 |
+
def extract_audio_from_video(video_path):
|
| 109 |
+
"""Extract audio from video file (MP4, etc.)"""
|
| 110 |
+
if not MOVIEPY_AVAILABLE:
|
| 111 |
+
return None, "MoviePy not available for video processing"
|
| 112 |
+
|
| 113 |
+
try:
|
| 114 |
+
# Create temporary file for audio
|
| 115 |
+
temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 116 |
+
temp_audio_path = temp_audio.name
|
| 117 |
+
temp_audio.close()
|
| 118 |
+
|
| 119 |
+
# Load video and extract audio
|
| 120 |
+
video = mp.VideoFileClip(video_path)
|
| 121 |
+
audio = video.audio
|
| 122 |
+
|
| 123 |
+
if audio is None:
|
| 124 |
+
return None, "No audio track found in video file"
|
| 125 |
+
|
| 126 |
+
# Export audio to temporary WAV file
|
| 127 |
+
audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
|
| 128 |
+
|
| 129 |
+
# Close video to free memory
|
| 130 |
+
video.close()
|
| 131 |
+
audio.close()
|
| 132 |
+
|
| 133 |
+
logger.info(f"Audio extracted from video: {temp_audio_path}")
|
| 134 |
+
return temp_audio_path, "Audio extracted successfully"
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.error(f"Error extracting audio from video: {e}")
|
| 138 |
+
return None, f"Error extracting audio: {str(e)}"
|
| 139 |
+
|
| 140 |
+
def perform_speaker_diarization(audio_path):
|
| 141 |
+
"""Perform speaker diarization on audio file"""
|
| 142 |
+
if not DIARIZATION_AVAILABLE or not diarization_pipeline:
|
| 143 |
+
return None, "Speaker diarization not available"
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
# Perform diarization
|
| 147 |
+
with ProgressHook() as hook:
|
| 148 |
+
diarization = diarization_pipeline(audio_path, hook=hook)
|
| 149 |
+
|
| 150 |
+
# Extract speaker segments
|
| 151 |
+
speaker_segments = []
|
| 152 |
+
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
| 153 |
+
speaker_segments.append({
|
| 154 |
+
'start': turn.start,
|
| 155 |
+
'end': turn.end,
|
| 156 |
+
'speaker': speaker,
|
| 157 |
+
'duration': turn.end - turn.start
|
| 158 |
+
})
|
| 159 |
+
|
| 160 |
+
logger.info(f"Diarization completed: {len(speaker_segments)} segments found")
|
| 161 |
+
return speaker_segments, "Diarization completed successfully"
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
logger.error(f"Error in diarization: {e}")
|
| 165 |
+
return None, f"Diarization error: {str(e)}"
|
| 166 |
+
|
| 167 |
+
def process_audio_file(file_path):
|
| 168 |
+
"""Process audio file, extracting from video if needed"""
|
| 169 |
+
if not file_path:
|
| 170 |
+
return None, "No file provided"
|
| 171 |
+
|
| 172 |
+
file_extension = os.path.splitext(file_path)[1].lower()
|
| 173 |
+
|
| 174 |
+
# If it's a video file, extract audio first
|
| 175 |
+
if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
|
| 176 |
+
logger.info(f"Processing video file: {file_path}")
|
| 177 |
+
audio_path, status = extract_audio_from_video(file_path)
|
| 178 |
+
if audio_path:
|
| 179 |
+
return audio_path, f"Video processed: {status}"
|
| 180 |
+
else:
|
| 181 |
+
return None, status
|
| 182 |
+
|
| 183 |
+
# If it's already an audio file, use it directly
|
| 184 |
+
elif file_extension in ['.wav', '.mp3', '.m4a', '.flac', '.ogg']:
|
| 185 |
+
logger.info(f"Processing audio file: {file_path}")
|
| 186 |
+
return file_path, "Audio file ready for transcription"
|
| 187 |
+
|
| 188 |
+
else:
|
| 189 |
+
return None, f"Unsupported file format: {file_extension}"
|
| 190 |
+
|
| 191 |
+
def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
|
| 192 |
+
"""Transcribe audio with timestamps, sentiment, and metadata"""
|
| 193 |
+
if not audio_file:
|
| 194 |
+
return None, "No audio file provided"
|
| 195 |
+
|
| 196 |
+
if not SPEECHBRAIN_AVAILABLE:
|
| 197 |
+
return None, "SpeechBrain not available - using demo transcription"
|
| 198 |
+
|
| 199 |
+
try:
|
| 200 |
+
# Process the file (extract audio if it's a video)
|
| 201 |
+
processed_audio_path, process_status = process_audio_file(audio_file)
|
| 202 |
+
|
| 203 |
+
if not processed_audio_path:
|
| 204 |
+
return None, process_status
|
| 205 |
+
|
| 206 |
+
# Perform speaker diarization if enabled
|
| 207 |
+
speaker_segments = None
|
| 208 |
+
diarization_status = ""
|
| 209 |
+
if enable_diarization:
|
| 210 |
+
speaker_segments, diarization_status = perform_speaker_diarization(processed_audio_path)
|
| 211 |
+
|
| 212 |
+
# Get transcription with timestamps
|
| 213 |
+
transcript = asr_model.transcribe_file(processed_audio_path)
|
| 214 |
+
|
| 215 |
+
# Clean up temporary audio file if it was created from video
|
| 216 |
+
if processed_audio_path != audio_file and os.path.exists(processed_audio_path):
|
| 217 |
+
try:
|
| 218 |
+
os.unlink(processed_audio_path)
|
| 219 |
+
logger.info("Temporary audio file cleaned up")
|
| 220 |
+
except Exception as e:
|
| 221 |
+
logger.warning(f"Could not clean up temporary file: {e}")
|
| 222 |
+
|
| 223 |
+
# Split into sentences for analysis
|
| 224 |
+
sentences = re.split(r'[.!?]+', transcript)
|
| 225 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 226 |
+
|
| 227 |
+
# Analyze each sentence
|
| 228 |
+
rich_transcript = []
|
| 229 |
+
current_time = 0
|
| 230 |
+
|
| 231 |
+
for i, sentence in enumerate(sentences):
|
| 232 |
+
# Estimate timestamp (rough approximation)
|
| 233 |
+
timestamp = current_time + (i * 2) # Assume ~2 seconds per sentence
|
| 234 |
+
|
| 235 |
+
# Determine speaker for this timestamp
|
| 236 |
+
speaker = "UNKNOWN"
|
| 237 |
+
if speaker_segments:
|
| 238 |
+
for segment in speaker_segments:
|
| 239 |
+
if segment['start'] <= timestamp <= segment['end']:
|
| 240 |
+
speaker = segment['speaker']
|
| 241 |
+
break
|
| 242 |
+
|
| 243 |
+
# Sentiment analysis
|
| 244 |
+
sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
|
| 245 |
+
sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else {'label': 'neutral', 'score': 0.5}
|
| 246 |
+
|
| 247 |
+
# Emotion analysis
|
| 248 |
+
emotion_result = emotion_model(sentence)[0] if emotion_model else None
|
| 249 |
+
emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else {'label': 'neutral', 'score': 0.5}
|
| 250 |
+
|
| 251 |
+
# Word count and complexity metrics
|
| 252 |
+
words = sentence.split()
|
| 253 |
+
word_count = len(words)
|
| 254 |
+
avg_word_length = np.mean([len(word) for word in words]) if words else 0
|
| 255 |
+
|
| 256 |
+
# Calculate speech rate (words per minute estimate)
|
| 257 |
+
speech_rate = word_count * 30 / 60 # Rough estimate
|
| 258 |
+
|
| 259 |
+
rich_transcript.append({
|
| 260 |
+
'timestamp': timestamp,
|
| 261 |
+
'speaker': speaker,
|
| 262 |
+
'sentence': sentence,
|
| 263 |
+
'word_count': word_count,
|
| 264 |
+
'avg_word_length': round(avg_word_length, 2),
|
| 265 |
+
'speech_rate_wpm': round(speech_rate, 1),
|
| 266 |
+
'sentiment': sentiment['label'],
|
| 267 |
+
'sentiment_score': round(sentiment['score'], 3),
|
| 268 |
+
'emotion': emotion['label'],
|
| 269 |
+
'emotion_score': round(emotion['score'], 3)
|
| 270 |
+
})
|
| 271 |
+
|
| 272 |
+
current_time = timestamp
|
| 273 |
+
|
| 274 |
+
status_msg = f"Transcription completed successfully. {process_status}"
|
| 275 |
+
if diarization_status:
|
| 276 |
+
status_msg += f" {diarization_status}"
|
| 277 |
+
|
| 278 |
+
return rich_transcript, status_msg
|
| 279 |
+
|
| 280 |
+
except Exception as e:
|
| 281 |
+
logger.error(f"Error in transcription: {e}")
|
| 282 |
+
return None, f"Transcription error: {str(e)}"
|
| 283 |
+
|
| 284 |
+
def format_rich_transcript(rich_transcript):
|
| 285 |
+
"""Format rich transcript for display"""
|
| 286 |
+
if not rich_transcript:
|
| 287 |
+
return "No transcript data available"
|
| 288 |
+
|
| 289 |
+
formatted_lines = []
|
| 290 |
+
for entry in rich_transcript:
|
| 291 |
+
timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
|
| 292 |
+
|
| 293 |
+
line = f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}"
|
| 294 |
+
line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
|
| 295 |
+
line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
|
| 296 |
+
line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
|
| 297 |
+
|
| 298 |
+
formatted_lines.append(line)
|
| 299 |
+
|
| 300 |
+
return '\n'.join(formatted_lines)
|
| 301 |
+
|
| 302 |
+
def calculate_slp_metrics(rich_transcript):
|
| 303 |
+
"""Calculate comprehensive SLP metrics"""
|
| 304 |
+
if not rich_transcript:
|
| 305 |
+
return {}
|
| 306 |
+
|
| 307 |
+
# Basic metrics
|
| 308 |
+
total_sentences = len(rich_transcript)
|
| 309 |
+
total_words = sum(entry['word_count'] for entry in rich_transcript)
|
| 310 |
+
total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
|
| 311 |
+
|
| 312 |
+
# Speaker analysis
|
| 313 |
+
speakers = {}
|
| 314 |
+
for entry in rich_transcript:
|
| 315 |
+
speaker = entry['speaker']
|
| 316 |
+
if speaker not in speakers:
|
| 317 |
+
speakers[speaker] = {
|
| 318 |
+
'sentences': 0,
|
| 319 |
+
'words': 0,
|
| 320 |
+
'sentiments': [],
|
| 321 |
+
'emotions': []
|
| 322 |
+
}
|
| 323 |
+
speakers[speaker]['sentences'] += 1
|
| 324 |
+
speakers[speaker]['words'] += entry['word_count']
|
| 325 |
+
speakers[speaker]['sentiments'].append(entry['sentiment'])
|
| 326 |
+
speakers[speaker]['emotions'].append(entry['emotion'])
|
| 327 |
+
|
| 328 |
+
# Word-level analysis
|
| 329 |
+
all_words = []
|
| 330 |
+
for entry in rich_transcript:
|
| 331 |
+
words = entry['sentence'].lower().split()
|
| 332 |
+
all_words.extend(words)
|
| 333 |
+
|
| 334 |
+
# Word frequency distribution
|
| 335 |
+
word_freq = {}
|
| 336 |
+
for word in all_words:
|
| 337 |
+
word_clean = re.sub(r'[^\w\s]', '', word)
|
| 338 |
+
if word_clean:
|
| 339 |
+
word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
|
| 340 |
+
|
| 341 |
+
# Vocabulary diversity (Type-Token Ratio)
|
| 342 |
+
unique_words = len(set(all_words))
|
| 343 |
+
ttr = unique_words / total_words if total_words > 0 else 0
|
| 344 |
+
|
| 345 |
+
# Speech rate analysis
|
| 346 |
+
speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
|
| 347 |
+
avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
|
| 348 |
+
|
| 349 |
+
# Sentiment analysis
|
| 350 |
+
sentiment_counts = {}
|
| 351 |
+
emotion_counts = {}
|
| 352 |
+
for entry in rich_transcript:
|
| 353 |
+
sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
|
| 354 |
+
emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
|
| 355 |
+
|
| 356 |
+
# Sentence complexity
|
| 357 |
+
sentence_lengths = [entry['word_count'] for entry in rich_transcript]
|
| 358 |
+
avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
|
| 359 |
+
|
| 360 |
+
# Pause analysis (gaps between sentences)
|
| 361 |
+
pauses = []
|
| 362 |
+
for i in range(1, len(rich_transcript)):
|
| 363 |
+
pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
|
| 364 |
+
pauses.append(pause)
|
| 365 |
+
|
| 366 |
+
avg_pause_duration = np.mean(pauses) if pauses else 0
|
| 367 |
+
|
| 368 |
+
return {
|
| 369 |
+
'total_sentences': total_sentences,
|
| 370 |
+
'total_words': total_words,
|
| 371 |
+
'total_duration_seconds': total_duration,
|
| 372 |
+
'unique_words': unique_words,
|
| 373 |
+
'type_token_ratio': round(ttr, 3),
|
| 374 |
+
'avg_sentence_length': round(avg_sentence_length, 1),
|
| 375 |
+
'avg_speech_rate_wpm': round(avg_speech_rate, 1),
|
| 376 |
+
'avg_pause_duration': round(avg_pause_duration, 1),
|
| 377 |
+
'sentiment_distribution': sentiment_counts,
|
| 378 |
+
'emotion_distribution': emotion_counts,
|
| 379 |
+
'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
|
| 380 |
+
'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0,
|
| 381 |
+
'speakers': speakers,
|
| 382 |
+
'speaker_count': len(speakers)
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
def generate_comprehensive_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes=""):
|
| 386 |
+
"""Generate comprehensive analysis prompt using rich transcript data"""
|
| 387 |
+
|
| 388 |
+
# Format rich transcript with timestamps and metadata
|
| 389 |
+
transcript_lines = []
|
| 390 |
+
for entry in rich_transcript:
|
| 391 |
+
timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
|
| 392 |
+
transcript_lines.append(f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}")
|
| 393 |
+
|
| 394 |
+
transcript_text = '\n'.join(transcript_lines)
|
| 395 |
+
|
| 396 |
+
# Format metrics for analysis
|
| 397 |
+
metrics_text = f"""
|
| 398 |
+
TRANSCRIPT METRICS:
|
| 399 |
+
β’ Total sentences: {metrics['total_sentences']}
|
| 400 |
+
β’ Total words: {metrics['total_words']}
|
| 401 |
+
β’ Duration: {metrics['total_duration_seconds']:.1f} seconds
|
| 402 |
+
β’ Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
|
| 403 |
+
β’ Average sentence length: {metrics['avg_sentence_length']} words
|
| 404 |
+
β’ Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
|
| 405 |
+
β’ Speech rate variability: {metrics['speech_rate_variability']} wpm
|
| 406 |
+
β’ Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
|
| 407 |
+
β’ Number of speakers: {metrics['speaker_count']}
|
| 408 |
+
|
| 409 |
+
SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
|
| 410 |
+
EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
|
| 411 |
+
|
| 412 |
+
SPEAKER ANALYSIS:"""
|
| 413 |
+
|
| 414 |
+
for speaker, data in metrics['speakers'].items():
|
| 415 |
+
metrics_text += f"\nβ’ {speaker}: {data['sentences']} sentences, {data['words']} words"
|
| 416 |
+
|
| 417 |
+
metrics_text += f"\n\nMOST FREQUENT WORDS: {list(metrics['word_frequency'].keys())[:10]}"
|
| 418 |
+
|
| 419 |
+
notes_section = f"\nSLP CLINICAL NOTES:\n{slp_notes}" if slp_notes else ""
|
| 420 |
+
|
| 421 |
+
prompt = f"""
|
| 422 |
+
You are a speech-language pathologist conducting a comprehensive analysis of a speech transcript with rich temporal and affective metadata.
|
| 423 |
+
|
| 424 |
+
PATIENT: {age}-year-old {gender}
|
| 425 |
+
|
| 426 |
+
{metrics_text}
|
| 427 |
+
|
| 428 |
+
TRANSCRIPT WITH TIMESTAMPS AND METADATA:
|
| 429 |
+
{transcript_text}{notes_section}
|
| 430 |
+
|
| 431 |
+
Please provide a comprehensive analysis including:
|
| 432 |
+
|
| 433 |
+
1. TEMPORAL SPEECH PATTERNS:
|
| 434 |
+
- Analyze speech rate changes over time using timestamps
|
| 435 |
+
- Identify patterns in pause duration and frequency
|
| 436 |
+
- Assess temporal consistency in speech production
|
| 437 |
+
- Note any significant changes in speech patterns throughout the session
|
| 438 |
+
|
| 439 |
+
2. AFFECTIVE AND EMOTIONAL ANALYSIS:
|
| 440 |
+
- Analyze sentiment patterns throughout the transcript using timestamp data
|
| 441 |
+
- Identify emotional shifts and their potential causes
|
| 442 |
+
- Assess emotional regulation and expression
|
| 443 |
+
- Note any correlations between emotional state and speech characteristics
|
| 444 |
+
|
| 445 |
+
3. SPEAKER-SPECIFIC ANALYSIS (if multiple speakers):
|
| 446 |
+
- Compare speech patterns between speakers
|
| 447 |
+
- Analyze turn-taking patterns and timing
|
| 448 |
+
- Assess interaction dynamics
|
| 449 |
+
- Note speaker-specific emotional and sentiment patterns
|
| 450 |
+
|
| 451 |
+
4. SPEECH FLUENCY AND RATE ANALYSIS:
|
| 452 |
+
- Analyze speech rate variability using the provided metrics
|
| 453 |
+
- Identify periods of fluent vs. dysfluent speech
|
| 454 |
+
- Assess the impact of emotional state on speech rate
|
| 455 |
+
- Note any temporal patterns in speech rate changes
|
| 456 |
+
|
| 457 |
+
5. LANGUAGE COMPLEXITY ASSESSMENT:
|
| 458 |
+
- Analyze vocabulary diversity using Type-Token Ratio
|
| 459 |
+
- Assess sentence complexity and variety
|
| 460 |
+
- Identify patterns in word frequency and usage
|
| 461 |
+
- Note any temporal changes in language complexity
|
| 462 |
+
|
| 463 |
+
6. COMPLEX SENTENCE ANALYSIS:
|
| 464 |
+
- Count and analyze use of coordinating conjunctions (and, but, or, so, yet, for, nor)
|
| 465 |
+
- Count and analyze use of subordinating conjunctions (because, although, while, since, if, when, where, that, which, who, whom, whose)
|
| 466 |
+
- Identify compound, complex, and compound-complex sentences
|
| 467 |
+
- Assess sentence variety and complexity level for age
|
| 468 |
+
|
| 469 |
+
7. FIGURATIVE LANGUAGE ANALYSIS:
|
| 470 |
+
- Identify and count similes (comparisons using "like" or "as")
|
| 471 |
+
- Identify and count metaphors (direct comparisons without "like" or "as")
|
| 472 |
+
- Identify and count idioms (common expressions with non-literal meanings)
|
| 473 |
+
- Assess figurative language comprehension and use for age
|
| 474 |
+
|
| 475 |
+
8. CLINICAL IMPLICATIONS:
|
| 476 |
+
- Specific intervention targets based on temporal patterns
|
| 477 |
+
- Recommendations for emotional regulation if needed
|
| 478 |
+
- Suggestions for improving speech rate consistency
|
| 479 |
+
- Strategies for enhancing language complexity
|
| 480 |
+
- Age-appropriate development recommendations
|
| 481 |
+
|
| 482 |
+
9. COMPREHENSIVE SUMMARY:
|
| 483 |
+
- Overall communication profile with temporal considerations
|
| 484 |
+
- Assessment of emotional and affective communication
|
| 485 |
+
- Developmental appropriateness considering age
|
| 486 |
+
- Prognosis and treatment priorities
|
| 487 |
+
|
| 488 |
+
Use the temporal data, sentiment scores, and emotional labels to provide insights that would not be possible with a simple transcript. Reference specific timestamps and emotional states when making observations.
|
| 489 |
+
"""
|
| 490 |
+
|
| 491 |
+
return prompt
|
| 492 |
+
|
| 493 |
+
def analyze_rich_transcript_with_llm(rich_transcript, age, gender, slp_notes=""):
|
| 494 |
+
"""Analyze rich transcript using LLM with comprehensive metadata"""
|
| 495 |
+
if not rich_transcript:
|
| 496 |
+
return "No transcript data available for analysis."
|
| 497 |
+
|
| 498 |
+
# Calculate SLP metrics
|
| 499 |
+
metrics = calculate_slp_metrics(rich_transcript)
|
| 500 |
+
|
| 501 |
+
# Generate comprehensive analysis prompt
|
| 502 |
+
prompt = generate_comprehensive_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes)
|
| 503 |
+
|
| 504 |
+
# Get analysis from Claude API
|
| 505 |
+
if ANTHROPIC_API_KEY:
|
| 506 |
+
result = call_claude_api(prompt)
|
| 507 |
+
else:
|
| 508 |
+
result = generate_demo_analysis(rich_transcript, metrics)
|
| 509 |
+
|
| 510 |
+
return result
|
| 511 |
+
|
| 512 |
+
def call_claude_api(prompt):
|
| 513 |
+
"""Call Claude API directly"""
|
| 514 |
+
if not ANTHROPIC_API_KEY:
|
| 515 |
+
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 516 |
+
|
| 517 |
+
try:
|
| 518 |
+
headers = {
|
| 519 |
+
"Content-Type": "application/json",
|
| 520 |
+
"x-api-key": ANTHROPIC_API_KEY,
|
| 521 |
+
"anthropic-version": "2023-06-01"
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
data = {
|
| 525 |
+
"model": "claude-3-5-sonnet-20241022",
|
| 526 |
+
"max_tokens": 4096,
|
| 527 |
+
"messages": [
|
| 528 |
+
{
|
| 529 |
+
"role": "user",
|
| 530 |
+
"content": prompt
|
| 531 |
+
}
|
| 532 |
+
]
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
response = requests.post(
|
| 536 |
+
"https://api.anthropic.com/v1/messages",
|
| 537 |
+
headers=headers,
|
| 538 |
+
json=data,
|
| 539 |
+
timeout=60
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
if response.status_code == 200:
|
| 543 |
+
response_json = response.json()
|
| 544 |
+
return response_json['content'][0]['text']
|
| 545 |
+
else:
|
| 546 |
+
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 547 |
+
return f"β Claude API Error: {response.status_code}"
|
| 548 |
+
|
| 549 |
+
except Exception as e:
|
| 550 |
+
logger.error(f"Error calling Claude API: {str(e)}")
|
| 551 |
+
return f"β Error: {str(e)}"
|
| 552 |
+
|
| 553 |
+
def generate_demo_analysis(rich_transcript, metrics):
|
| 554 |
+
"""Generate demo analysis when API is not available"""
|
| 555 |
+
return f"""## Comprehensive SLP Analysis with Temporal and Affective Data
|
| 556 |
+
|
| 557 |
+
### TEMPORAL SPEECH PATTERNS
|
| 558 |
+
**Speech Rate Analysis**: {metrics['avg_speech_rate_wpm']} words per minute (variability: {metrics['speech_rate_variability']} wpm)
|
| 559 |
+
- Speech rate appears {'within normal limits' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'below typical range' if metrics['avg_speech_rate_wpm'] < 120 else 'above typical range'}
|
| 560 |
+
- Variability suggests {'consistent' if metrics['speech_rate_variability'] < 20 else 'variable'} speech patterns
|
| 561 |
+
|
| 562 |
+
**Pause Analysis**: Average pause duration of {metrics['avg_pause_duration']:.1f} seconds
|
| 563 |
+
- {'Appropriate' if 0.5 <= metrics['avg_pause_duration'] <= 2.0 else 'Short' if metrics['avg_pause_duration'] < 0.5 else 'Long'} pauses between utterances
|
| 564 |
+
|
| 565 |
+
### AFFECTIVE AND EMOTIONAL ANALYSIS
|
| 566 |
+
**Sentiment Distribution**: {metrics['sentiment_distribution']}
|
| 567 |
+
**Emotion Distribution**: {metrics['emotion_distribution']}
|
| 568 |
+
|
| 569 |
+
The emotional patterns suggest {'positive' if 'positive' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['positive'] > 2 else 'neutral' if 'neutral' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['neutral'] > 2 else 'mixed'} emotional expression throughout the session.
|
| 570 |
+
|
| 571 |
+
### LANGUAGE COMPLEXITY
|
| 572 |
+
**Vocabulary Diversity**: Type-Token Ratio of {metrics['type_token_ratio']}
|
| 573 |
+
- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited' if metrics['type_token_ratio'] < 0.3 else 'Moderate'} vocabulary diversity
|
| 574 |
+
|
| 575 |
+
**Sentence Structure**: Average {metrics['avg_sentence_length']} words per sentence
|
| 576 |
+
- Sentence length appears {'age-appropriate' if 5 <= metrics['avg_sentence_length'] <= 12 else 'below age expectations' if metrics['avg_sentence_length'] < 5 else 'above age expectations'}
|
| 577 |
+
|
| 578 |
+
**Most Frequent Words**: {', '.join(list(metrics['word_frequency'].keys())[:5])}
|
| 579 |
+
|
| 580 |
+
### SPEAKER ANALYSIS
|
| 581 |
+
**Number of Speakers**: {metrics['speaker_count']}
|
| 582 |
+
{chr(10).join([f"β’ {speaker}: {data['sentences']} sentences, {data['words']} words" for speaker, data in metrics['speakers'].items()])}
|
| 583 |
+
|
| 584 |
+
### CLINICAL IMPLICATIONS
|
| 585 |
+
Based on the temporal and affective analysis, this patient shows:
|
| 586 |
+
- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited'} vocabulary diversity
|
| 587 |
+
- {'Appropriate' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'Atypical'} speech rate
|
| 588 |
+
- {'Consistent' if metrics['speech_rate_variability'] < 20 else 'Variable'} speech patterns
|
| 589 |
+
- {'Positive' if 'positive' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['positive'] > 2 else 'Neutral'} emotional expression
|
| 590 |
+
|
| 591 |
+
### RECOMMENDATIONS
|
| 592 |
+
1. Focus on vocabulary expansion if TTR < 0.4
|
| 593 |
+
2. Address speech rate if outside normal range
|
| 594 |
+
3. Work on sentence complexity if below age expectations
|
| 595 |
+
4. Consider emotional regulation strategies based on sentiment patterns
|
| 596 |
+
5. Monitor temporal patterns in speech rate and fluency"""
|
| 597 |
+
|
| 598 |
+
def create_transcription_interface():
|
| 599 |
+
"""Create the transcription-focused Gradio interface"""
|
| 600 |
+
with gr.Blocks(title="Advanced Transcription Tool", theme=gr.themes.Soft()) as app:
|
| 601 |
+
gr.Markdown("# π€ Advanced Transcription Tool")
|
| 602 |
+
gr.Markdown("Transcribe audio/video with speaker diarization, timestamps, sentiment analysis, and comprehensive LLM analysis")
|
| 603 |
+
|
| 604 |
+
with gr.Tabs():
|
| 605 |
+
# Audio/Video Upload & Transcription Tab
|
| 606 |
+
with gr.Tab("π€ Audio/Video Transcription"):
|
| 607 |
+
with gr.Row():
|
| 608 |
+
with gr.Column(scale=1):
|
| 609 |
+
gr.Markdown("### File Upload")
|
| 610 |
+
gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
|
| 611 |
+
|
| 612 |
+
file_input = gr.File(
|
| 613 |
+
label="Upload Audio or Video File",
|
| 614 |
+
file_types=["audio", "video"]
|
| 615 |
+
)
|
| 616 |
+
|
| 617 |
+
enable_diarization = gr.Checkbox(
|
| 618 |
+
label="Enable Speaker Diarization",
|
| 619 |
+
value=True,
|
| 620 |
+
info="Identify different speakers in the audio"
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
transcribe_btn = gr.Button(
|
| 624 |
+
"π€ Transcribe File",
|
| 625 |
+
variant="primary",
|
| 626 |
+
size="lg"
|
| 627 |
+
)
|
| 628 |
+
|
| 629 |
+
transcription_status = gr.Markdown("")
|
| 630 |
+
|
| 631 |
+
with gr.Column(scale=2):
|
| 632 |
+
gr.Markdown("### Rich Transcript with Metadata")
|
| 633 |
+
|
| 634 |
+
rich_transcript_display = gr.Textbox(
|
| 635 |
+
label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
|
| 636 |
+
lines=15,
|
| 637 |
+
max_lines=20
|
| 638 |
+
)
|
| 639 |
+
|
| 640 |
+
# Analysis Tab
|
| 641 |
+
with gr.Tab("π LLM Analysis"):
|
| 642 |
+
with gr.Row():
|
| 643 |
+
with gr.Column(scale=1):
|
| 644 |
+
gr.Markdown("### Patient Information")
|
| 645 |
+
|
| 646 |
+
with gr.Row():
|
| 647 |
+
age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
|
| 648 |
+
gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
|
| 649 |
+
|
| 650 |
+
slp_notes = gr.Textbox(
|
| 651 |
+
label="SLP Clinical Notes (Optional)",
|
| 652 |
+
placeholder="Enter additional clinical observations...",
|
| 653 |
+
lines=3
|
| 654 |
+
)
|
| 655 |
+
|
| 656 |
+
analyze_btn = gr.Button(
|
| 657 |
+
"π Analyze with LLM",
|
| 658 |
+
variant="primary",
|
| 659 |
+
size="lg"
|
| 660 |
+
)
|
| 661 |
+
|
| 662 |
+
with gr.Column(scale=2):
|
| 663 |
+
gr.Markdown("### Comprehensive LLM Analysis")
|
| 664 |
+
|
| 665 |
+
analysis_output = gr.Textbox(
|
| 666 |
+
label="LLM Analysis Report",
|
| 667 |
+
lines=25,
|
| 668 |
+
max_lines=30
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
# Metrics Tab
|
| 672 |
+
with gr.Tab("π Speech Metrics"):
|
| 673 |
+
with gr.Row():
|
| 674 |
+
with gr.Column():
|
| 675 |
+
gr.Markdown("### Quantitative Speech Metrics")
|
| 676 |
+
|
| 677 |
+
metrics_display = gr.Textbox(
|
| 678 |
+
label="SLP Metrics",
|
| 679 |
+
lines=15,
|
| 680 |
+
max_lines=20
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
+
with gr.Column():
|
| 684 |
+
gr.Markdown("### Word Frequency Analysis")
|
| 685 |
+
|
| 686 |
+
word_freq_display = gr.Dataframe(
|
| 687 |
+
headers=["Word", "Frequency"],
|
| 688 |
+
label="Most Frequent Words",
|
| 689 |
+
interactive=False
|
| 690 |
+
)
|
| 691 |
+
|
| 692 |
+
# Raw Data Tab
|
| 693 |
+
with gr.Tab("π Raw Data"):
|
| 694 |
+
with gr.Row():
|
| 695 |
+
with gr.Column():
|
| 696 |
+
gr.Markdown("### JSON Data")
|
| 697 |
+
|
| 698 |
+
json_display = gr.Textbox(
|
| 699 |
+
label="Raw JSON Data",
|
| 700 |
+
lines=20,
|
| 701 |
+
max_lines=25
|
| 702 |
+
)
|
| 703 |
+
|
| 704 |
+
# Event handlers
|
| 705 |
+
def on_transcribe(file, diarization_enabled):
|
| 706 |
+
"""Handle file transcription"""
|
| 707 |
+
if not file:
|
| 708 |
+
return "", "", "", "Please upload a file first."
|
| 709 |
+
|
| 710 |
+
rich_transcript, status = transcribe_audio_with_metadata(file.name, diarization_enabled)
|
| 711 |
+
|
| 712 |
+
if rich_transcript:
|
| 713 |
+
formatted = format_rich_transcript(rich_transcript)
|
| 714 |
+
metrics = calculate_slp_metrics(rich_transcript)
|
| 715 |
+
|
| 716 |
+
# Format metrics for display
|
| 717 |
+
metrics_text = f"""SPEECH METRICS:
|
| 718 |
+
β’ Total sentences: {metrics['total_sentences']}
|
| 719 |
+
β’ Total words: {metrics['total_words']}
|
| 720 |
+
β’ Duration: {metrics['total_duration_seconds']:.1f} seconds
|
| 721 |
+
β’ Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
|
| 722 |
+
β’ Average sentence length: {metrics['avg_sentence_length']} words
|
| 723 |
+
β’ Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
|
| 724 |
+
β’ Speech rate variability: {metrics['speech_rate_variability']} wpm
|
| 725 |
+
β’ Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
|
| 726 |
+
β’ Number of speakers: {metrics['speaker_count']}
|
| 727 |
+
|
| 728 |
+
SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
|
| 729 |
+
EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
|
| 730 |
+
|
| 731 |
+
SPEAKER ANALYSIS:"""
|
| 732 |
+
|
| 733 |
+
for speaker, data in metrics['speakers'].items():
|
| 734 |
+
metrics_text += f"\nβ’ {speaker}: {data['sentences']} sentences, {data['words']} words"
|
| 735 |
+
|
| 736 |
+
# Create word frequency dataframe
|
| 737 |
+
word_freq_data = [[word, freq] for word, freq in list(metrics['word_frequency'].items())[:20]]
|
| 738 |
+
|
| 739 |
+
# JSON data
|
| 740 |
+
json_data = json.dumps(rich_transcript, indent=2)
|
| 741 |
+
|
| 742 |
+
return formatted, metrics_text, word_freq_data, status
|
| 743 |
+
else:
|
| 744 |
+
return "", "", [], status
|
| 745 |
+
|
| 746 |
+
def on_analyze(rich_transcript_text, age_val, gender_val, notes):
|
| 747 |
+
"""Handle LLM analysis"""
|
| 748 |
+
if not rich_transcript_text or rich_transcript_text == "No transcript data available":
|
| 749 |
+
return "Please transcribe audio first."
|
| 750 |
+
|
| 751 |
+
# Convert formatted text back to rich transcript structure
|
| 752 |
+
lines = rich_transcript_text.split('\n')
|
| 753 |
+
rich_transcript = []
|
| 754 |
+
|
| 755 |
+
for i, line in enumerate(lines):
|
| 756 |
+
if line.strip():
|
| 757 |
+
# Extract data from the formatted line
|
| 758 |
+
timestamp_match = re.search(r'\[(\d{2}:\d{2})\]', line)
|
| 759 |
+
speaker_match = re.search(r'\*(\w+):', line)
|
| 760 |
+
sentence_match = re.search(r'\*\w+:\s*(.+?)(?=\s*\[|$)', line)
|
| 761 |
+
|
| 762 |
+
if timestamp_match and speaker_match and sentence_match:
|
| 763 |
+
timestamp_str = timestamp_match.group(1)
|
| 764 |
+
minutes, seconds = map(int, timestamp_str.split(':'))
|
| 765 |
+
timestamp = minutes * 60 + seconds
|
| 766 |
+
|
| 767 |
+
speaker = speaker_match.group(1)
|
| 768 |
+
sentence = sentence_match.group(1).strip()
|
| 769 |
+
|
| 770 |
+
rich_transcript.append({
|
| 771 |
+
'timestamp': timestamp,
|
| 772 |
+
'speaker': speaker,
|
| 773 |
+
'sentence': sentence,
|
| 774 |
+
'word_count': len(sentence.split()),
|
| 775 |
+
'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
|
| 776 |
+
'speech_rate_wpm': 120.0,
|
| 777 |
+
'sentiment': 'neutral',
|
| 778 |
+
'sentiment_score': 0.5,
|
| 779 |
+
'emotion': 'neutral',
|
| 780 |
+
'emotion_score': 0.5
|
| 781 |
+
})
|
| 782 |
+
|
| 783 |
+
return analyze_rich_transcript_with_llm(rich_transcript, age_val, gender_val, notes)
|
| 784 |
+
|
| 785 |
+
# Connect event handlers
|
| 786 |
+
transcribe_btn.click(
|
| 787 |
+
on_transcribe,
|
| 788 |
+
inputs=[file_input, enable_diarization],
|
| 789 |
+
outputs=[rich_transcript_display, metrics_display, word_freq_display, transcription_status]
|
| 790 |
+
)
|
| 791 |
+
|
| 792 |
+
analyze_btn.click(
|
| 793 |
+
on_analyze,
|
| 794 |
+
inputs=[rich_transcript_display, age, gender, slp_notes],
|
| 795 |
+
outputs=[analysis_output]
|
| 796 |
+
)
|
| 797 |
+
|
| 798 |
+
return app
|
| 799 |
+
|
| 800 |
+
if __name__ == "__main__":
|
| 801 |
+
print("π Starting Advanced Transcription Tool...")
|
| 802 |
+
|
| 803 |
+
if not MOVIEPY_AVAILABLE:
|
| 804 |
+
print("β οΈ MoviePy not available - video processing will be limited")
|
| 805 |
+
print(" Install with: pip install moviepy")
|
| 806 |
+
else:
|
| 807 |
+
print("β
MoviePy available for video processing")
|
| 808 |
+
|
| 809 |
+
if not DIARIZATION_AVAILABLE:
|
| 810 |
+
print("β οΈ Pyannote.audio not available - speaker diarization will be disabled")
|
| 811 |
+
print(" Install with: pip install pyannote.audio")
|
| 812 |
+
else:
|
| 813 |
+
print("β
Pyannote.audio available for speaker diarization")
|
| 814 |
+
if not os.getenv("HF_TOKEN"):
|
| 815 |
+
print("β οΈ HF_TOKEN not set - set it to enable speaker diarization")
|
| 816 |
+
print(" Get token from: https://huggingface.co/settings/tokens")
|
| 817 |
+
print(" Accept model terms at: https://huggingface.co/pyannote/speaker-diarization")
|
| 818 |
+
|
| 819 |
+
if not SPEECHBRAIN_AVAILABLE:
|
| 820 |
+
print("β οΈ SpeechBrain not available - audio transcription will use demo mode")
|
| 821 |
+
print(" Install with: pip install speechbrain transformers torch")
|
| 822 |
+
else:
|
| 823 |
+
print("β
SpeechBrain and HuggingFace models loaded")
|
| 824 |
+
|
| 825 |
+
app = create_transcription_interface()
|
| 826 |
+
app.launch(show_api=False)
|