SreekarB commited on
Commit
d60565b
Β·
verified Β·
1 Parent(s): d76b08e

Upload 8 files

Browse files
enhanced_casl_app.py ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import logging
5
+ import requests
6
+ import re
7
+ import numpy as np
8
+ import pandas as pd
9
+ from datetime import datetime
10
+ import time
11
+ from typing import Dict, List, Tuple, Optional
12
+ import tempfile
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Anthropic API key
19
+ ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
20
+
21
+ # Try to import SpeechBrain and HuggingFace components
22
+ try:
23
+ from speechbrain.pretrained import EncoderDecoderASR, VAD, EncoderClassifier
24
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
25
+ import torch
26
+ SPEECHBRAIN_AVAILABLE = True
27
+ HUGGINGFACE_AVAILABLE = True
28
+ logger.info("SpeechBrain and HuggingFace models available")
29
+ except ImportError as e:
30
+ logger.warning(f"SpeechBrain/HuggingFace not available: {e}")
31
+ SPEECHBRAIN_AVAILABLE = False
32
+ HUGGINGFACE_AVAILABLE = False
33
+
34
+ # Initialize models if available
35
+ asr_model = None
36
+ vad_model = None
37
+ sentiment_model = None
38
+ emotion_model = None
39
+
40
+ if SPEECHBRAIN_AVAILABLE and HUGGINGFACE_AVAILABLE:
41
+ try:
42
+ # Speech-to-text model
43
+ asr_model = EncoderDecoderASR.from_hparams(
44
+ source="speechbrain/asr-crdnn-rnnlm-librispeech",
45
+ savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
46
+ )
47
+
48
+ # Voice Activity Detection
49
+ vad_model = VAD.from_hparams(
50
+ source="speechbrain/vad-crdnn-libriparty",
51
+ savedir="pretrained_models/vad-crdnn-libriparty"
52
+ )
53
+
54
+ # Sentiment analysis
55
+ sentiment_model = pipeline(
56
+ "sentiment-analysis",
57
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
58
+ return_all_scores=True
59
+ )
60
+
61
+ # Emotion analysis
62
+ emotion_model = pipeline(
63
+ "text-classification",
64
+ model="j-hartmann/emotion-english-distilroberta-base",
65
+ return_all_scores=True
66
+ )
67
+
68
+ logger.info("All models loaded successfully")
69
+ except Exception as e:
70
+ logger.error(f"Error loading models: {e}")
71
+ SPEECHBRAIN_AVAILABLE = False
72
+ HUGGINGFACE_AVAILABLE = False
73
+
74
+ def call_claude_api(prompt):
75
+ """Call Claude API directly"""
76
+ if not ANTHROPIC_API_KEY:
77
+ return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
78
+
79
+ try:
80
+ headers = {
81
+ "Content-Type": "application/json",
82
+ "x-api-key": ANTHROPIC_API_KEY,
83
+ "anthropic-version": "2023-06-01"
84
+ }
85
+
86
+ data = {
87
+ "model": "claude-3-5-sonnet-20241022",
88
+ "max_tokens": 4096,
89
+ "messages": [
90
+ {
91
+ "role": "user",
92
+ "content": prompt
93
+ }
94
+ ]
95
+ }
96
+
97
+ response = requests.post(
98
+ "https://api.anthropic.com/v1/messages",
99
+ headers=headers,
100
+ json=data,
101
+ timeout=60
102
+ )
103
+
104
+ if response.status_code == 200:
105
+ response_json = response.json()
106
+ return response_json['content'][0]['text']
107
+ else:
108
+ logger.error(f"Claude API error: {response.status_code} - {response.text}")
109
+ return f"❌ Claude API Error: {response.status_code}"
110
+
111
+ except Exception as e:
112
+ logger.error(f"Error calling Claude API: {str(e)}")
113
+ return f"❌ Error: {str(e)}"
114
+
115
+ def transcribe_audio_with_metadata(audio_file):
116
+ """Transcribe audio with timestamps, sentiment, and metadata"""
117
+ if not audio_file:
118
+ return None, "No audio file provided"
119
+
120
+ if not SPEECHBRAIN_AVAILABLE:
121
+ return None, "SpeechBrain not available - using demo transcription"
122
+
123
+ try:
124
+ # Get transcription with timestamps
125
+ transcript = asr_model.transcribe_file(audio_file)
126
+
127
+ # Split into sentences for analysis
128
+ sentences = re.split(r'[.!?]+', transcript)
129
+ sentences = [s.strip() for s in sentences if s.strip()]
130
+
131
+ # Analyze each sentence
132
+ rich_transcript = []
133
+ current_time = 0
134
+
135
+ for i, sentence in enumerate(sentences):
136
+ # Estimate timestamp (rough approximation)
137
+ timestamp = current_time + (i * 2) # Assume ~2 seconds per sentence
138
+
139
+ # Sentiment analysis
140
+ sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
141
+ sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else {'label': 'neutral', 'score': 0.5}
142
+
143
+ # Emotion analysis
144
+ emotion_result = emotion_model(sentence)[0] if emotion_model else None
145
+ emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else {'label': 'neutral', 'score': 0.5}
146
+
147
+ # Word count and complexity metrics
148
+ words = sentence.split()
149
+ word_count = len(words)
150
+ avg_word_length = np.mean([len(word) for word in words]) if words else 0
151
+
152
+ # Calculate speech rate (words per minute estimate)
153
+ speech_rate = word_count * 30 / 60 # Rough estimate
154
+
155
+ rich_transcript.append({
156
+ 'timestamp': timestamp,
157
+ 'sentence': sentence,
158
+ 'word_count': word_count,
159
+ 'avg_word_length': round(avg_word_length, 2),
160
+ 'speech_rate_wpm': round(speech_rate, 1),
161
+ 'sentiment': sentiment['label'],
162
+ 'sentiment_score': round(sentiment['score'], 3),
163
+ 'emotion': emotion['label'],
164
+ 'emotion_score': round(emotion['score'], 3)
165
+ })
166
+
167
+ current_time = timestamp
168
+
169
+ return rich_transcript, "Transcription completed successfully"
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error in transcription: {e}")
173
+ return None, f"Transcription error: {str(e)}"
174
+
175
+ def format_rich_transcript(rich_transcript):
176
+ """Format rich transcript for display"""
177
+ if not rich_transcript:
178
+ return "No transcript data available"
179
+
180
+ formatted_lines = []
181
+ for entry in rich_transcript:
182
+ timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
183
+
184
+ line = f"[{timestamp_str}] *PAR: {entry['sentence']}"
185
+ line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
186
+ line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
187
+ line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
188
+
189
+ formatted_lines.append(line)
190
+
191
+ return '\n'.join(formatted_lines)
192
+
193
+ def calculate_slp_metrics(rich_transcript):
194
+ """Calculate comprehensive SLP metrics"""
195
+ if not rich_transcript:
196
+ return {}
197
+
198
+ # Basic metrics
199
+ total_sentences = len(rich_transcript)
200
+ total_words = sum(entry['word_count'] for entry in rich_transcript)
201
+ total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
202
+
203
+ # Word-level analysis
204
+ all_words = []
205
+ for entry in rich_transcript:
206
+ words = entry['sentence'].lower().split()
207
+ all_words.extend(words)
208
+
209
+ # Word frequency distribution
210
+ word_freq = {}
211
+ for word in all_words:
212
+ word_clean = re.sub(r'[^\w\s]', '', word)
213
+ if word_clean:
214
+ word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
215
+
216
+ # Vocabulary diversity (Type-Token Ratio)
217
+ unique_words = len(set(all_words))
218
+ ttr = unique_words / total_words if total_words > 0 else 0
219
+
220
+ # Speech rate analysis
221
+ speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
222
+ avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
223
+
224
+ # Sentiment analysis
225
+ sentiment_counts = {}
226
+ emotion_counts = {}
227
+ for entry in rich_transcript:
228
+ sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
229
+ emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
230
+
231
+ # Sentence complexity
232
+ sentence_lengths = [entry['word_count'] for entry in rich_transcript]
233
+ avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
234
+
235
+ # Pause analysis (gaps between sentences)
236
+ pauses = []
237
+ for i in range(1, len(rich_transcript)):
238
+ pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
239
+ pauses.append(pause)
240
+
241
+ avg_pause_duration = np.mean(pauses) if pauses else 0
242
+
243
+ return {
244
+ 'total_sentences': total_sentences,
245
+ 'total_words': total_words,
246
+ 'total_duration_seconds': total_duration,
247
+ 'unique_words': unique_words,
248
+ 'type_token_ratio': round(ttr, 3),
249
+ 'avg_sentence_length': round(avg_sentence_length, 1),
250
+ 'avg_speech_rate_wpm': round(avg_speech_rate, 1),
251
+ 'avg_pause_duration': round(avg_pause_duration, 1),
252
+ 'sentiment_distribution': sentiment_counts,
253
+ 'emotion_distribution': emotion_counts,
254
+ 'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
255
+ 'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0
256
+ }
257
+
258
+ def generate_slp_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes=""):
259
+ """Generate comprehensive SLP analysis prompt"""
260
+
261
+ # Format metrics for the prompt
262
+ metrics_text = f"""
263
+ TRANSCRIPT METRICS:
264
+ - Total sentences: {metrics['total_sentences']}
265
+ - Total words: {metrics['total_words']}
266
+ - Duration: {metrics['total_duration_seconds']:.1f} seconds
267
+ - Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
268
+ - Average sentence length: {metrics['avg_sentence_length']} words
269
+ - Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
270
+ - Speech rate variability: {metrics['speech_rate_variability']} wpm
271
+ - Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
272
+
273
+ SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
274
+ EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
275
+
276
+ MOST FREQUENT WORDS: {list(metrics['word_frequency'].keys())[:10]}
277
+ """
278
+
279
+ # Format rich transcript for analysis
280
+ transcript_text = format_rich_transcript(rich_transcript)
281
+
282
+ notes_section = f"\nSLP CLINICAL NOTES:\n{slp_notes}" if slp_notes else ""
283
+
284
+ prompt = f"""
285
+ You are a speech-language pathologist conducting a comprehensive analysis of a speech transcript with rich metadata.
286
+
287
+ PATIENT: {age}-year-old {gender}
288
+
289
+ {metrics_text}
290
+
291
+ TRANSCRIPT WITH METADATA:
292
+ {transcript_text}{notes_section}
293
+
294
+ Please provide a comprehensive analysis including:
295
+
296
+ 1. SPEECH FLUENCY ANALYSIS:
297
+ - Speech rate patterns and variability
298
+ - Pause patterns and their significance
299
+ - Overall fluency assessment
300
+
301
+ 2. LANGUAGE COMPLEXITY:
302
+ - Vocabulary diversity and word frequency patterns
303
+ - Sentence structure and complexity
304
+ - Language development level assessment
305
+
306
+ 3. EMOTIONAL AND AFFECTIVE ANALYSIS:
307
+ - Sentiment patterns throughout the transcript
308
+ - Emotional expression and regulation
309
+ - Impact on communication effectiveness
310
+
311
+ 4. SPEECH FACTORS:
312
+ - Word retrieval patterns
313
+ - Grammatical accuracy
314
+ - Repetitions and revisions
315
+
316
+ 5. CLINICAL IMPLICATIONS:
317
+ - Specific intervention targets
318
+ - Strengths and areas for improvement
319
+ - Recommendations for therapy
320
+
321
+ 6. COMPREHENSIVE SUMMARY:
322
+ - Overall communication profile
323
+ - Developmental appropriateness
324
+ - Prognosis and treatment priorities
325
+
326
+ Use the quantitative metrics and qualitative observations to support your analysis.
327
+ """
328
+
329
+ return prompt
330
+
331
+ def analyze_rich_transcript(rich_transcript, age, gender, slp_notes=""):
332
+ """Analyze rich transcript with comprehensive metrics"""
333
+ if not rich_transcript:
334
+ return "No transcript data available for analysis."
335
+
336
+ # Calculate SLP metrics
337
+ metrics = calculate_slp_metrics(rich_transcript)
338
+
339
+ # Generate analysis prompt
340
+ prompt = generate_slp_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes)
341
+
342
+ # Get analysis from Claude API
343
+ if ANTHROPIC_API_KEY:
344
+ result = call_claude_api(prompt)
345
+ else:
346
+ result = generate_demo_analysis(rich_transcript, metrics)
347
+
348
+ return result
349
+
350
+ def generate_demo_analysis(rich_transcript, metrics):
351
+ """Generate demo analysis when API is not available"""
352
+ return f"""## Comprehensive SLP Analysis
353
+
354
+ ### SPEECH FLUENCY ANALYSIS
355
+ **Speech Rate**: {metrics['avg_speech_rate_wpm']} words per minute (variability: {metrics['speech_rate_variability']} wpm)
356
+ - Speech rate appears {'within normal limits' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'below typical range' if metrics['avg_speech_rate_wpm'] < 120 else 'above typical range'}
357
+ - Variability suggests {'consistent' if metrics['speech_rate_variability'] < 20 else 'variable'} speech patterns
358
+
359
+ **Pause Analysis**: Average pause duration of {metrics['avg_pause_duration']:.1f} seconds
360
+ - {'Appropriate' if 0.5 <= metrics['avg_pause_duration'] <= 2.0 else 'Short' if metrics['avg_pause_duration'] < 0.5 else 'Long'} pauses between utterances
361
+
362
+ ### LANGUAGE COMPLEXITY
363
+ **Vocabulary Diversity**: Type-Token Ratio of {metrics['type_token_ratio']}
364
+ - {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited' if metrics['type_token_ratio'] < 0.3 else 'Moderate'} vocabulary diversity
365
+
366
+ **Sentence Structure**: Average {metrics['avg_sentence_length']} words per sentence
367
+ - Sentence length appears {'age-appropriate' if 5 <= metrics['avg_sentence_length'] <= 12 else 'below age expectations' if metrics['avg_sentence_length'] < 5 else 'above age expectations'}
368
+
369
+ **Most Frequent Words**: {', '.join(list(metrics['word_frequency'].keys())[:5])}
370
+
371
+ ### EMOTIONAL AND AFFECTIVE ANALYSIS
372
+ **Sentiment Distribution**: {metrics['sentiment_distribution']}
373
+ **Emotion Distribution**: {metrics['emotion_distribution']}
374
+
375
+ ### CLINICAL IMPLICATIONS
376
+ Based on the quantitative analysis, this patient shows:
377
+ - {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited'} vocabulary diversity
378
+ - {'Appropriate' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'Atypical'} speech rate
379
+ - {'Consistent' if metrics['speech_rate_variability'] < 20 else 'Variable'} speech patterns
380
+
381
+ ### RECOMMENDATIONS
382
+ 1. Focus on vocabulary expansion if TTR < 0.4
383
+ 2. Address speech rate if outside normal range
384
+ 3. Work on sentence complexity if below age expectations
385
+ 4. Consider emotional regulation strategies based on sentiment patterns"""
386
+
387
+ def create_enhanced_interface():
388
+ """Create the enhanced Gradio interface"""
389
+ with gr.Blocks(title="Enhanced CASL Analysis Tool", theme=gr.themes.Soft()) as app:
390
+ gr.Markdown("# πŸ—£οΈ Enhanced CASL Analysis Tool")
391
+ gr.Markdown("Advanced speech analysis with sentiment, timestamps, and comprehensive SLP metrics")
392
+
393
+ with gr.Tabs():
394
+ # Audio Upload & Transcription Tab
395
+ with gr.Tab("🎀 Audio Analysis"):
396
+ with gr.Row():
397
+ with gr.Column(scale=1):
398
+ gr.Markdown("### Audio Upload")
399
+
400
+ audio_input = gr.Audio(
401
+ type="filepath",
402
+ label="Upload Audio Recording"
403
+ )
404
+
405
+ transcribe_btn = gr.Button(
406
+ "🎀 Transcribe & Analyze",
407
+ variant="primary",
408
+ size="lg"
409
+ )
410
+
411
+ transcription_status = gr.Markdown("")
412
+
413
+ with gr.Column(scale=2):
414
+ gr.Markdown("### Rich Transcript")
415
+
416
+ rich_transcript_display = gr.Textbox(
417
+ label="Transcription with Timestamps & Sentiment",
418
+ lines=15,
419
+ max_lines=20
420
+ )
421
+
422
+ # Analysis Tab
423
+ with gr.Tab("πŸ“Š Analysis"):
424
+ with gr.Row():
425
+ with gr.Column(scale=1):
426
+ gr.Markdown("### Patient Information")
427
+
428
+ with gr.Row():
429
+ age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
430
+ gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
431
+
432
+ slp_notes = gr.Textbox(
433
+ label="SLP Clinical Notes (Optional)",
434
+ placeholder="Enter additional clinical observations...",
435
+ lines=3
436
+ )
437
+
438
+ analyze_btn = gr.Button(
439
+ "πŸ” Analyze Transcript",
440
+ variant="primary",
441
+ size="lg"
442
+ )
443
+
444
+ with gr.Column(scale=2):
445
+ gr.Markdown("### Comprehensive Analysis")
446
+
447
+ analysis_output = gr.Textbox(
448
+ label="SLP Analysis Report",
449
+ lines=25,
450
+ max_lines=30
451
+ )
452
+
453
+ # Metrics Tab
454
+ with gr.Tab("πŸ“ˆ Metrics Dashboard"):
455
+ with gr.Row():
456
+ with gr.Column():
457
+ gr.Markdown("### Quantitative Metrics")
458
+
459
+ metrics_display = gr.JSON(
460
+ label="SLP Metrics",
461
+ interactive=False
462
+ )
463
+
464
+ with gr.Column():
465
+ gr.Markdown("### Word Frequency")
466
+
467
+ word_freq_display = gr.Dataframe(
468
+ headers=["Word", "Frequency"],
469
+ label="Most Frequent Words",
470
+ interactive=False
471
+ )
472
+
473
+ # Event handlers
474
+ def on_transcribe(audio_file):
475
+ """Handle audio transcription"""
476
+ if not audio_file:
477
+ return "", "Please upload an audio file first."
478
+
479
+ rich_transcript, status = transcribe_audio_with_metadata(audio_file)
480
+
481
+ if rich_transcript:
482
+ formatted = format_rich_transcript(rich_transcript)
483
+ return formatted, status
484
+ else:
485
+ return "", status
486
+
487
+ def on_analyze(rich_transcript_text, age_val, gender_val, notes):
488
+ """Handle analysis"""
489
+ # Convert formatted text back to rich transcript structure
490
+ # This is a simplified version - in practice you'd want to store the rich data
491
+ if not rich_transcript_text or rich_transcript_text == "No transcript data available":
492
+ return "Please transcribe audio first."
493
+
494
+ # For demo purposes, create a simple rich transcript from the text
495
+ lines = rich_transcript_text.split('\n')
496
+ rich_transcript = []
497
+
498
+ for i, line in enumerate(lines):
499
+ if line.strip():
500
+ # Extract sentence from the line
501
+ sentence_match = re.search(r'\*PAR: (.+?)(?=\s*\[|$)', line)
502
+ if sentence_match:
503
+ sentence = sentence_match.group(1).strip()
504
+ rich_transcript.append({
505
+ 'timestamp': i * 2,
506
+ 'sentence': sentence,
507
+ 'word_count': len(sentence.split()),
508
+ 'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
509
+ 'speech_rate_wpm': 120.0,
510
+ 'sentiment': 'neutral',
511
+ 'sentiment_score': 0.5,
512
+ 'emotion': 'neutral',
513
+ 'emotion_score': 0.5
514
+ })
515
+
516
+ return analyze_rich_transcript(rich_transcript, age_val, gender_val, notes)
517
+
518
+ def update_metrics(rich_transcript_text):
519
+ """Update metrics display"""
520
+ if not rich_transcript_text or rich_transcript_text == "No transcript data available":
521
+ return {}, []
522
+
523
+ # Convert text back to rich transcript (simplified)
524
+ lines = rich_transcript_text.split('\n')
525
+ rich_transcript = []
526
+
527
+ for i, line in enumerate(lines):
528
+ if line.strip():
529
+ sentence_match = re.search(r'\*PAR: (.+?)(?=\s*\[|$)', line)
530
+ if sentence_match:
531
+ sentence = sentence_match.group(1).strip()
532
+ rich_transcript.append({
533
+ 'timestamp': i * 2,
534
+ 'sentence': sentence,
535
+ 'word_count': len(sentence.split()),
536
+ 'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
537
+ 'speech_rate_wpm': 120.0,
538
+ 'sentiment': 'neutral',
539
+ 'sentiment_score': 0.5,
540
+ 'emotion': 'neutral',
541
+ 'emotion_score': 0.5
542
+ })
543
+
544
+ metrics = calculate_slp_metrics(rich_transcript)
545
+
546
+ # Create word frequency dataframe
547
+ word_freq_data = [[word, freq] for word, freq in list(metrics['word_frequency'].items())[:20]]
548
+
549
+ return metrics, word_freq_data
550
+
551
+ # Connect event handlers
552
+ transcribe_btn.click(
553
+ on_transcribe,
554
+ inputs=[audio_input],
555
+ outputs=[rich_transcript_display, transcription_status]
556
+ )
557
+
558
+ analyze_btn.click(
559
+ on_analyze,
560
+ inputs=[rich_transcript_display, age, gender, slp_notes],
561
+ outputs=[analysis_output]
562
+ )
563
+
564
+ # Update metrics when transcript changes
565
+ rich_transcript_display.change(
566
+ update_metrics,
567
+ inputs=[rich_transcript_display],
568
+ outputs=[metrics_display, word_freq_display]
569
+ )
570
+
571
+ return app
572
+
573
+ if __name__ == "__main__":
574
+ print("πŸš€ Starting Enhanced CASL Analysis Tool...")
575
+
576
+ if not ANTHROPIC_API_KEY:
577
+ print("⚠️ ANTHROPIC_API_KEY not configured - analysis will show demo response")
578
+ print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
579
+ print(" For local use: export ANTHROPIC_API_KEY='your-key-here'")
580
+ else:
581
+ print("βœ… Claude API configured")
582
+
583
+ if not SPEECHBRAIN_AVAILABLE:
584
+ print("⚠️ SpeechBrain not available - audio transcription will use demo mode")
585
+ print(" Install with: pip install speechbrain transformers torch")
586
+ else:
587
+ print("βœ… SpeechBrain and HuggingFace models loaded")
588
+
589
+ app = create_enhanced_interface()
590
+ app.launch(show_api=False)
moderate_casl_app.py CHANGED
@@ -155,14 +155,19 @@ def call_bedrock(prompt, max_tokens=4096):
155
  "messages": [
156
  {
157
  "role": "user",
158
- "content": prompt
 
 
 
 
 
159
  }
160
  ],
161
  "temperature": 0.3,
162
  "top_p": 0.9
163
  })
164
 
165
- modelId = 'anthropic.claude-3-sonnet-20240229-v1:0'
166
  response = bedrock_client.invoke_model(
167
  body=body,
168
  modelId=modelId,
 
155
  "messages": [
156
  {
157
  "role": "user",
158
+ "content": [
159
+ {
160
+ "type": "text",
161
+ "text": prompt
162
+ }
163
+ ]
164
  }
165
  ],
166
  "temperature": 0.3,
167
  "top_p": 0.9
168
  })
169
 
170
+ modelId = 'anthropic.claude-3-5-sonnet-20240620-v1:0'
171
  response = bedrock_client.invoke_model(
172
  body=body,
173
  modelId=modelId,
moderate_casl_app_fixed.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import logging
5
+ import requests
6
+ import re
7
+ from datetime import datetime
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Anthropic API key - can be set as HuggingFace secret or environment variable
14
+ ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
15
+
16
+ # Check if API key is available
17
+ if ANTHROPIC_API_KEY:
18
+ logger.info("Claude API key found")
19
+ else:
20
+ logger.warning("Claude API key not found - using demo mode")
21
+
22
+ def call_claude_api(prompt):
23
+ """Call Claude API directly"""
24
+ if not ANTHROPIC_API_KEY:
25
+ return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
26
+
27
+ try:
28
+ headers = {
29
+ "Content-Type": "application/json",
30
+ "x-api-key": ANTHROPIC_API_KEY,
31
+ "anthropic-version": "2023-06-01"
32
+ }
33
+
34
+ data = {
35
+ "model": "claude-3-5-sonnet-20241022",
36
+ "max_tokens": 4096,
37
+ "messages": [
38
+ {
39
+ "role": "user",
40
+ "content": prompt
41
+ }
42
+ ]
43
+ }
44
+
45
+ response = requests.post(
46
+ "https://api.anthropic.com/v1/messages",
47
+ headers=headers,
48
+ json=data,
49
+ timeout=60
50
+ )
51
+
52
+ if response.status_code == 200:
53
+ response_json = response.json()
54
+ return response_json['content'][0]['text']
55
+ else:
56
+ logger.error(f"Claude API error: {response.status_code} - {response.text}")
57
+ return f"❌ Claude API Error: {response.status_code}"
58
+
59
+ except Exception as e:
60
+ logger.error(f"Error calling Claude API: {str(e)}")
61
+ return f"❌ Error: {str(e)}"
62
+
63
+ def process_file(file):
64
+ """Process uploaded file"""
65
+ if file is None:
66
+ return "Please upload a file first."
67
+
68
+ try:
69
+ # Read file content
70
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
71
+ content = f.read()
72
+
73
+ if not content.strip():
74
+ return "File appears to be empty."
75
+
76
+ return content
77
+ except Exception as e:
78
+ return f"Error reading file: {str(e)}"
79
+
80
+ def read_cha_file(file_path):
81
+ """Read and parse a .cha transcript file"""
82
+ try:
83
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
84
+ content = f.read()
85
+
86
+ # Extract participant lines (starting with *PAR:)
87
+ par_lines = []
88
+ for line in content.splitlines():
89
+ if line.startswith('*PAR:'):
90
+ par_lines.append(line)
91
+
92
+ # If no PAR lines found, just return the whole content
93
+ if not par_lines:
94
+ return content
95
+
96
+ return '\n'.join(par_lines)
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error reading CHA file: {str(e)}")
100
+ return ""
101
+
102
+ def process_upload(file):
103
+ """Process an uploaded file (text or CHA)"""
104
+ if file is None:
105
+ return ""
106
+
107
+ file_path = file.name
108
+ if file_path.endswith('.cha'):
109
+ return read_cha_file(file_path)
110
+ else:
111
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
112
+ return f.read()
113
+
114
+ def generate_demo_response(prompt):
115
+ """Generate a demo response when API is not available"""
116
+ return """## Speech Factors Analysis
117
+
118
+ **Difficulty producing fluent speech**: 8 instances, moderate severity
119
+ - Examples: "today I would &-um like to talk about &-um a fun trip"
120
+ - "we went to the &-um &-um beach [//] no to the mountains [//] I mean the beach actually"
121
+
122
+ **Word retrieval issues**: 6 instances, mild-moderate severity
123
+ - Examples: "what do you call those &-um &-um sprinkles! that's the word"
124
+ - "sometimes I forget [//] forgetted [: forgot] [*] what they call those things we built"
125
+
126
+ **Grammatical errors**: 4 instances, moderate severity
127
+ - Examples: "after swimming we [//] I eat [: ate] [*] &-um ice cream"
128
+ - "we saw [/] saw fishies [: fish] [*] swimming in the water"
129
+
130
+ **Repetitions and revisions**: 5 instances, mild severity
131
+ - Examples: "we [/] we stayed for &-um three no [//] four days"
132
+ - "I want to go back to the beach [/] beach next year"
133
+
134
+ ## Language Skills Assessment
135
+
136
+ **Lexical/Semantic Skills**:
137
+ - Vocabulary diversity appears age-appropriate with some word-finding difficulties
138
+ - Examples: "what do you call those &-um &-um sprinkles! that's the word"
139
+ - Shows good semantic understanding but retrieval challenges
140
+
141
+ **Syntactic Skills**:
142
+ - Basic sentence structure is intact with some grammatical inconsistencies
143
+ - Examples: "my brother he [//] he helped me dig a big hole"
144
+ - Verb tense errors noted: "forgetted" for "forgot", "eat" for "ate"
145
+
146
+ **Supralinguistic Skills**:
147
+ - Narrative organization is good with logical sequence
148
+ - Examples: "sometimes I wonder [/] wonder where fishies [: fish] [*] go when it's cold"
149
+ - Shows creative thinking and topic maintenance
150
+
151
+ ## Treatment Recommendations
152
+
153
+ 1. **Word-finding strategies**: Implement semantic cuing techniques using the patient's experiences (beach, ice cream) as context
154
+ 2. **Grammar practice**: Focus on verb tense consistency with structured exercises
155
+ 3. **Fluency techniques**: Work on reducing fillers and improving speech flow
156
+ 4. **Self-monitoring**: Help patient identify and correct grammatical errors
157
+ 5. **Vocabulary expansion**: Build on existing semantic networks
158
+
159
+ ## Clinical Summary
160
+
161
+ This child demonstrates a mild-to-moderate expressive language disorder with primary concerns in word retrieval and grammatical accuracy. Strengths include good narrative organization and topic maintenance. The pattern suggests intervention should focus on word-finding strategies and grammatical form practice while building on existing semantic knowledge."""
162
+
163
+ def analyze_transcript(transcript, age, gender, slp_notes=""):
164
+ """Analyze a speech transcript using Claude"""
165
+ if not transcript or len(transcript.strip()) < 50:
166
+ return "Error: Please provide a longer transcript for analysis."
167
+
168
+ # Add SLP notes to the prompt if provided
169
+ notes_section = ""
170
+ if slp_notes and slp_notes.strip():
171
+ notes_section = f"""
172
+
173
+ SLP CLINICAL NOTES:
174
+ {slp_notes.strip()}
175
+ """
176
+
177
+ # Simplified analysis prompt
178
+ prompt = f"""
179
+ You are a speech-language pathologist analyzing a transcript for CASL assessment.
180
+
181
+ Patient: {age}-year-old {gender}
182
+
183
+ TRANSCRIPT:
184
+ {transcript}{notes_section}
185
+
186
+ Please provide a comprehensive CASL analysis including:
187
+
188
+ 1. SPEECH FACTORS (with counts and severity):
189
+ - Difficulty producing fluent speech
190
+ - Word retrieval issues
191
+ - Grammatical errors
192
+ - Repetitions and revisions
193
+
194
+ 2. LANGUAGE SKILLS ASSESSMENT:
195
+ - Lexical/Semantic Skills (qualitative assessment)
196
+ - Syntactic Skills (qualitative assessment)
197
+ - Supralinguistic Skills (qualitative assessment)
198
+
199
+ 3. TREATMENT RECOMMENDATIONS:
200
+ - List 3-5 specific intervention strategies
201
+
202
+ 4. CLINICAL SUMMARY:
203
+ - Brief explanation of findings and prognosis
204
+
205
+ Use exact quotes from the transcript as evidence.
206
+ Focus on qualitative observations rather than standardized scores.
207
+ Be specific and provide concrete examples from the transcript.
208
+ {f"Consider the SLP clinical notes in your analysis." if slp_notes and slp_notes.strip() else ""}
209
+ """
210
+
211
+ # Get analysis from Claude API or demo
212
+ if ANTHROPIC_API_KEY:
213
+ result = call_claude_api(prompt)
214
+ else:
215
+ result = generate_demo_response(prompt)
216
+
217
+ return result
218
+
219
+ def create_interface():
220
+ """Create the Gradio interface"""
221
+ with gr.Blocks(title="Enhanced CASL Analysis Tool", theme=gr.themes.Soft()) as app:
222
+ gr.Markdown("# πŸ—£οΈ Enhanced CASL Analysis Tool")
223
+ gr.Markdown("Upload a speech transcript and get comprehensive CASL assessment results.")
224
+
225
+ with gr.Tabs():
226
+ # Analysis Tab
227
+ with gr.Tab("πŸ“Š Analysis"):
228
+ with gr.Row():
229
+ with gr.Column():
230
+ gr.Markdown("### Patient Information")
231
+
232
+ with gr.Row():
233
+ age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
234
+ gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
235
+
236
+ slp_notes = gr.Textbox(
237
+ label="SLP Clinical Notes (Optional)",
238
+ placeholder="Enter any additional clinical observations, context, or notes...",
239
+ lines=3
240
+ )
241
+
242
+ gr.Markdown("### Transcript Input")
243
+
244
+ file_upload = gr.File(
245
+ label="Upload Transcript File",
246
+ file_types=[".txt", ".cha"]
247
+ )
248
+
249
+ transcript = gr.Textbox(
250
+ label="Or Paste Transcript Here",
251
+ placeholder="Enter transcript text or upload a file...",
252
+ lines=10
253
+ )
254
+
255
+ analyze_btn = gr.Button("πŸ” Analyze Transcript", variant="primary")
256
+
257
+ with gr.Column():
258
+ gr.Markdown("### Analysis Results")
259
+
260
+ analysis_output = gr.Textbox(
261
+ label="CASL Analysis Report",
262
+ placeholder="Analysis results will appear here...",
263
+ lines=25,
264
+ max_lines=30
265
+ )
266
+
267
+ # Sample Transcripts Tab
268
+ with gr.Tab("πŸ“ Sample Transcripts"):
269
+ with gr.Row():
270
+ with gr.Column():
271
+ gr.Markdown("### Sample Transcripts")
272
+
273
+ sample_choice = gr.Dropdown(
274
+ choices=[
275
+ "Beach Trip (Child)",
276
+ "School Day (Adolescent)",
277
+ "Adult Recovery"
278
+ ],
279
+ label="Select a sample transcript:",
280
+ value="Beach Trip (Child)"
281
+ )
282
+
283
+ load_sample_btn = gr.Button("Load Sample", variant="secondary")
284
+
285
+ sample_transcript = gr.Textbox(
286
+ label="Sample Transcript",
287
+ lines=15,
288
+ interactive=False
289
+ )
290
+
291
+ use_sample_btn = gr.Button("Use This Sample for Analysis", variant="primary")
292
+
293
+ with gr.Column():
294
+ gr.Markdown("### Sample Descriptions")
295
+
296
+ gr.Markdown("""
297
+ **Beach Trip (Child)**: 8-year-old child describing a family beach vacation
298
+ - Shows typical child language patterns
299
+ - Contains word-finding difficulties and grammatical errors
300
+ - Good narrative structure despite language challenges
301
+
302
+ **School Day (Adolescent)**: Teenager describing a school day
303
+ - More complex language but still some disfluencies
304
+ - Shows adolescent speech patterns
305
+ - Academic vocabulary and social language
306
+
307
+ **Adult Recovery**: Adult describing stroke recovery
308
+ - Post-stroke language patterns
309
+ - Word-finding difficulties
310
+ - Shows recovery progress
311
+ """)
312
+
313
+ # Sample transcripts
314
+ SAMPLE_TRANSCRIPTS = {
315
+ "Beach Trip (Child)": """*PAR: today I would &-um like to talk about &-um a fun trip I took last &-um summer with my family.
316
+ *PAR: we went to the &-um &-um beach [//] no to the mountains [//] I mean the beach actually.
317
+ *PAR: there was lots of &-um &-um swimming and &-um sun.
318
+ *PAR: we [/] we stayed for &-um three no [//] four days in a &-um hotel near the water [: ocean] [*].
319
+ *PAR: my favorite part was &-um building &-um castles with sand.
320
+ *PAR: sometimes I forget [//] forgetted [: forgot] [*] what they call those things we built.
321
+ *PAR: my brother he [//] he helped me dig a big hole.
322
+ *PAR: we saw [/] saw fishies [: fish] [*] swimming in the water.
323
+ *PAR: sometimes I wonder [/] wonder where fishies [: fish] [*] go when it's cold.
324
+ *PAR: maybe they have [/] have houses under the water.
325
+ *PAR: after swimming we [//] I eat [: ate] [*] &-um ice cream with &-um chocolate things on top.
326
+ *PAR: what do you call those &-um &-um sprinkles! that's the word.
327
+ *PAR: my mom said to &-um that I could have &-um two scoops next time.
328
+ *PAR: I want to go back to the beach [/] beach next year.""",
329
+
330
+ "School Day (Adolescent)": """*PAR: yesterday was &-um kind of a weird day at school.
331
+ *PAR: I had this big test in math and I was like really nervous about it.
332
+ *PAR: when I got there [//] when I got to class the teacher said we could use calculators.
333
+ *PAR: I was like &-oh &-um that's good because I always mess up the &-um the calculations.
334
+ *PAR: there was this one problem about &-um what do you call it &-um geometry I think.
335
+ *PAR: I couldn't remember the formula for [//] I mean I knew it but I just couldn't think of it.
336
+ *PAR: so I raised my hand and asked the teacher and she was really nice about it.
337
+ *PAR: after the test me and my friends went to lunch and we talked about how we did.
338
+ *PAR: everyone was saying it was hard but I think I did okay.
339
+ *PAR: oh and then in English class we had to read our essays out loud.
340
+ *PAR: I hate doing that because I get really nervous and I start talking fast.
341
+ *PAR: but the teacher said mine was good which made me feel better.""",
342
+
343
+ "Adult Recovery": """*PAR: I &-um I want to talk about &-uh my &-um recovery.
344
+ *PAR: it's been &-um [//] it's hard to &-um to find the words sometimes.
345
+ *PAR: before the &-um the stroke I was &-um working at the &-uh at the bank.
346
+ *PAR: now I have to &-um practice speaking every day with my therapist.
347
+ *PAR: my wife she [//] she helps me a lot at home.
348
+ *PAR: we do &-um exercises together like &-uh reading and &-um talking about pictures.
349
+ *PAR: sometimes I get frustrated because I know what I want to say but &-um the words don't come out right.
350
+ *PAR: but I'm getting better little by little.
351
+ *PAR: the doctor says I'm making good progress.
352
+ *PAR: I hope to go back to work someday but right now I'm focusing on &-um getting better."""
353
+ }
354
+
355
+ # Event handlers
356
+ def load_sample_transcript(sample_name):
357
+ """Load a sample transcript"""
358
+ return SAMPLE_TRANSCRIPTS.get(sample_name, "")
359
+
360
+ def use_sample_for_analysis(sample_text, age_val, gender_val, notes):
361
+ """Use sample transcript for analysis"""
362
+ if not sample_text:
363
+ return "Please load a sample transcript first."
364
+ return analyze_transcript(sample_text, age_val, gender_val, notes)
365
+
366
+ def on_analyze(transcript_text, age_val, gender_val, notes):
367
+ """Handle analysis"""
368
+ if not transcript_text or len(transcript_text.strip()) < 50:
369
+ return "Error: Please provide a longer transcript for analysis."
370
+ return analyze_transcript(transcript_text, age_val, gender_val, notes)
371
+
372
+ # Connect event handlers
373
+ load_sample_btn.click(
374
+ load_sample_transcript,
375
+ inputs=[sample_choice],
376
+ outputs=[sample_transcript]
377
+ )
378
+
379
+ use_sample_btn.click(
380
+ use_sample_for_analysis,
381
+ inputs=[sample_transcript, age, gender, slp_notes],
382
+ outputs=[analysis_output]
383
+ )
384
+
385
+ analyze_btn.click(
386
+ on_analyze,
387
+ inputs=[transcript, age, gender, slp_notes],
388
+ outputs=[analysis_output]
389
+ )
390
+
391
+ # File upload handler
392
+ file_upload.upload(process_upload, file_upload, transcript)
393
+
394
+ return app
395
+
396
+ if __name__ == "__main__":
397
+ print("πŸš€ Starting Enhanced CASL Analysis Tool...")
398
+ if not ANTHROPIC_API_KEY:
399
+ print("⚠️ ANTHROPIC_API_KEY not configured - analysis will show demo response")
400
+ print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
401
+ print(" For local use: export ANTHROPIC_API_KEY='your-key-here'")
402
+ else:
403
+ print("βœ… Claude API configured")
404
+
405
+ app = create_interface()
406
+ app.launch(show_api=False)
requirements.txt CHANGED
@@ -5,5 +5,18 @@ matplotlib>=3.3.0
5
  requests>=2.25.0
6
  reportlab>=3.6.0
7
  PyPDF2>=2.0.0
8
- speechrecognition>=3.8.1
9
- pydub>=0.25.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  requests>=2.25.0
6
  reportlab>=3.6.0
7
  PyPDF2>=2.0.0
8
+ speechrecognition>=3.8.0
9
+ pydub>=0.25.0
10
+
11
+ # Transcription and audio processing
12
+ speechbrain>=0.5.15
13
+ torch>=1.9.0
14
+ transformers>=4.20.0
15
+ moviepy>=1.0.3
16
+
17
+ # Optional: Speaker diarization (requires HF token)
18
+ # pyannote.audio>=2.1.0
19
+
20
+ # Optional: Additional audio processing
21
+ librosa>=0.9.0
22
+ soundfile>=0.10.0
simple_casl_app.py CHANGED
@@ -3,6 +3,9 @@ import json
3
  import os
4
  import logging
5
  import requests
 
 
 
6
 
7
  # Configure logging
8
  logging.basicConfig(level=logging.INFO)
@@ -11,52 +14,483 @@ logger = logging.getLogger(__name__)
11
  # Anthropic API key - can be set as HuggingFace secret or environment variable
12
  ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Check if API key is available
15
  if ANTHROPIC_API_KEY:
16
  logger.info("Claude API key found")
17
  else:
18
  logger.warning("Claude API key not found - using demo mode")
19
 
20
- def call_claude_api(prompt):
21
- """Call Claude API directly"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  if not ANTHROPIC_API_KEY:
23
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
24
 
25
  try:
26
- headers = {
27
- "Content-Type": "application/json",
28
- "x-api-key": ANTHROPIC_API_KEY,
29
- "anthropic-version": "2023-06-01"
30
- }
31
-
32
- data = {
33
- "model": "claude-3-5-sonnet-20241022",
34
- "max_tokens": 4096,
35
- "messages": [
36
- {
37
- "role": "user",
38
- "content": prompt
39
- }
40
- ]
41
- }
42
-
43
- response = requests.post(
44
- "https://api.anthropic.com/v1/messages",
45
- headers=headers,
46
- json=data,
47
- timeout=60
48
- )
49
 
50
- if response.status_code == 200:
51
- response_json = response.json()
52
- return response_json['content'][0]['text']
53
- else:
54
- logger.error(f"Claude API error: {response.status_code} - {response.text}")
55
- return f"❌ Claude API Error: {response.status_code}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  except Exception as e:
58
  logger.error(f"Error calling Claude API: {str(e)}")
59
  return f"❌ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def process_file(file):
62
  """Process uploaded file"""
@@ -75,15 +509,10 @@ def process_file(file):
75
  except Exception as e:
76
  return f"Error reading file: {str(e)}"
77
 
78
- def analyze_transcript(file, age, gender, slp_notes):
79
- """Simple CASL analysis"""
80
- if file is None:
81
- return "Please upload a transcript file first."
82
-
83
- # Get transcript content
84
- transcript = process_file(file)
85
- if transcript.startswith("Error") or transcript.startswith("Please"):
86
- return transcript
87
 
88
  # Add SLP notes to the prompt if provided
89
  notes_section = ""
@@ -94,45 +523,249 @@ def analyze_transcript(file, age, gender, slp_notes):
94
  {slp_notes.strip()}
95
  """
96
 
97
- # Simple analysis prompt - removing CASL-2 scores as requested
98
  prompt = f"""
99
- You are a speech-language pathologist analyzing a transcript for CASL assessment.
100
-
101
  Patient: {age}-year-old {gender}
102
 
103
  TRANSCRIPT:
104
- {transcript}{notes_section}
105
-
106
- Please provide a CASL analysis including:
107
 
108
- 1. SPEECH FACTORS (with counts and severity):
109
- - Difficulty producing fluent speech
110
- - Word retrieval issues
111
- - Grammatical errors
112
- - Repetitions and revisions
113
-
114
- 2. LANGUAGE SKILLS ASSESSMENT:
115
- - Lexical/Semantic Skills (qualitative assessment)
116
- - Syntactic Skills (qualitative assessment)
117
- - Supralinguistic Skills (qualitative assessment)
118
-
119
- 3. TREATMENT RECOMMENDATIONS:
120
- - List 3-5 specific intervention strategies
121
-
122
- 4. CLINICAL SUMMARY:
123
- - Brief explanation of findings and prognosis
124
-
125
- Use exact quotes from the transcript as evidence.
126
- Focus on qualitative observations rather than standardized scores.
127
- {f"Consider the SLP clinical notes in your analysis." if slp_notes and slp_notes.strip() else ""}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  """
129
 
130
  # Get analysis from Claude API
131
- result = call_claude_api(prompt)
132
  return result
133
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
135
- """Perform targeted analysis based on custom questions"""
136
  if not transcript or not transcript.strip():
137
  return "Please provide a transcript first."
138
 
@@ -148,9 +781,9 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
148
  {slp_notes.strip()}
149
  """
150
 
151
- # Targeted analysis prompt
152
  prompt = f"""
153
- You are a speech-language pathologist conducting a targeted analysis of a speech transcript.
154
 
155
  Patient: {age}-year-old {gender}
156
 
@@ -160,27 +793,94 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
160
  SPECIFIC QUESTION FOR ANALYSIS:
161
  {custom_question.strip()}
162
 
163
- Please provide a detailed, evidence-based analysis that directly addresses this specific question.
 
 
 
 
 
 
 
 
164
 
165
- Your response should:
166
- 1. Directly answer the question asked
167
- 2. Provide specific examples from the transcript as evidence
168
- 3. Include relevant clinical observations
169
- 4. Offer practical insights for clinical practice
170
- 5. Be concise but comprehensive
171
 
172
- Use exact quotes from the transcript to support your analysis.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  """
174
 
175
  # Get targeted analysis from Claude API
176
- result = call_claude_api(prompt)
177
  return result
178
 
179
  # Create enhanced interface with tabs
180
  with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
181
 
182
  gr.Markdown("# πŸ—£οΈ Enhanced CASL Analysis Tool")
183
- gr.Markdown("Upload a speech transcript and get instant CASL assessment results with targeted analysis options.")
184
 
185
  # Store transcript globally
186
  transcript_state = gr.State("")
@@ -190,12 +890,46 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
190
  with gr.Tab("οΏ½οΏ½οΏ½οΏ½ Basic Analysis"):
191
  with gr.Row():
192
  with gr.Column():
193
- gr.Markdown("### Upload & Settings")
194
 
195
- file_upload = gr.File(
196
- label="Upload Transcript File",
197
- file_types=[".txt", ".cha"]
198
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  age = gr.Number(
201
  label="Patient Age",
@@ -215,11 +949,6 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
215
  placeholder="Enter any additional clinical observations, context, or notes...",
216
  lines=3
217
  )
218
-
219
- analyze_btn = gr.Button(
220
- "πŸ” Analyze Transcript",
221
- variant="primary"
222
- )
223
 
224
  with gr.Column():
225
  gr.Markdown("### Analysis Results")
@@ -230,6 +959,8 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
230
  lines=25,
231
  max_lines=30
232
  )
 
 
233
 
234
  # Tab 2: Targeted Analysis
235
  with gr.Tab("🎯 Targeted Analysis"):
@@ -257,7 +988,9 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
257
  "What narrative organization skills are evident?",
258
  "What specific intervention targets would you recommend?",
259
  "How does this patient's language compare to typical development?",
260
- "What evidence suggests cognitive-linguistic strengths/weaknesses?"
 
 
261
  ],
262
  label="Question Templates (Optional)",
263
  value="Select a template or write your own..."
@@ -283,6 +1016,8 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
283
  lines=25,
284
  max_lines=30
285
  )
 
 
286
 
287
  # Tab 3: Quick Questions
288
  with gr.Tab("⚑ Quick Questions"):
@@ -307,7 +1042,19 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
307
  "Narrative structure",
308
  "Vocabulary level",
309
  "Sentence complexity",
310
- "Speech rate patterns"
 
 
 
 
 
 
 
 
 
 
 
 
311
  ],
312
  label="Select questions to analyze:",
313
  value=[]
@@ -327,17 +1074,108 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
327
  lines=25,
328
  max_lines=30
329
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  # Event handlers
332
- def on_analyze(file, age_val, gender_val, notes):
333
- """Handle basic analysis and store transcript"""
334
  result = analyze_transcript(file, age_val, gender_val, notes)
335
  transcript = process_file(file) if file else ""
336
- return result, transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
  def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
339
  """Handle targeted analysis"""
340
- return targeted_analysis(transcript, question, age_val, gender_val, notes)
 
 
341
 
342
  def on_question_template_change(template):
343
  """Handle question template selection"""
@@ -348,10 +1186,10 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
348
  def on_quick_analyze(transcript, questions, age_val, gender_val, notes):
349
  """Handle quick analysis with multiple questions"""
350
  if not transcript or not transcript.strip():
351
- return "Please provide a transcript first."
352
 
353
  if not questions:
354
- return "Please select at least one question to analyze."
355
 
356
  # Add SLP notes to the prompt if provided
357
  notes_section = ""
@@ -362,40 +1200,150 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
362
  {notes.strip()}
363
  """
364
 
365
- # Create quick analysis prompt
366
  questions_text = "\n".join([f"- {q}" for q in questions])
367
  prompt = f"""
368
- You are a speech-language pathologist conducting a quick analysis of a speech transcript.
369
 
370
  Patient: {age_val}-year-old {gender_val}
371
 
372
  TRANSCRIPT:
373
  {transcript}{notes_section}
374
 
375
- Please provide a brief analysis addressing these specific areas:
376
  {questions_text}
377
 
378
- For each area, provide:
379
- 1. Brief observations
380
- 2. Specific examples from the transcript
381
- 3. Clinical significance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
- Keep each section concise but informative.
384
  """
385
 
386
- return call_claude_api(prompt)
 
 
387
 
388
  # Connect event handlers
389
- analyze_btn.click(
390
- on_analyze,
391
  inputs=[file_upload, age, gender, slp_notes],
392
- outputs=[output, transcript_input]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  )
394
 
395
  targeted_analyze_btn.click(
396
  on_targeted_analyze,
397
  inputs=[transcript_input, custom_question, age, gender, slp_notes],
398
- outputs=[targeted_output]
399
  )
400
 
401
  question_templates.change(
@@ -407,11 +1355,15 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
407
  quick_analyze_btn.click(
408
  on_quick_analyze,
409
  inputs=[quick_transcript, quick_questions, age, gender, slp_notes],
410
- outputs=[quick_output]
411
  )
412
 
413
  if __name__ == "__main__":
414
  print("πŸš€ Starting Enhanced CASL Analysis Tool...")
 
 
 
 
415
  if not ANTHROPIC_API_KEY:
416
  print("⚠️ ANTHROPIC_API_KEY not configured - analysis will show error message")
417
  print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
@@ -419,4 +1371,29 @@ if __name__ == "__main__":
419
  else:
420
  print("βœ… Claude API configured")
421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  app.launch(show_api=False)
 
3
  import os
4
  import logging
5
  import requests
6
+ import re
7
+ import tempfile
8
+ import numpy as np
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
 
14
  # Anthropic API key - can be set as HuggingFace secret or environment variable
15
  ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
16
 
17
+ # Try to import transcription libraries
18
+ try:
19
+ from speechbrain.pretrained import EncoderDecoderASR
20
+ import torch
21
+ SPEECHBRAIN_AVAILABLE = True
22
+ logger.info("SpeechBrain available for transcription")
23
+ except ImportError as e:
24
+ logger.warning(f"SpeechBrain not available: {e}")
25
+ SPEECHBRAIN_AVAILABLE = False
26
+
27
+ # Try to import video processing
28
+ try:
29
+ import moviepy.editor as mp
30
+ MOVIEPY_AVAILABLE = True
31
+ logger.info("MoviePy available for video processing")
32
+ except ImportError as e:
33
+ logger.warning(f"MoviePy not available: {e}")
34
+ MOVIEPY_AVAILABLE = False
35
+
36
+ # Try to import speaker diarization
37
+ try:
38
+ from pyannote.audio import Pipeline
39
+ from pyannote.audio.pipelines.utils.hook import ProgressHook
40
+ DIARIZATION_AVAILABLE = True
41
+ logger.info("Pyannote.audio available for speaker diarization")
42
+ except ImportError as e:
43
+ logger.warning(f"Pyannote.audio not available: {e}")
44
+ DIARIZATION_AVAILABLE = False
45
+
46
+ # Try to import sentiment and emotion analysis
47
+ try:
48
+ from transformers import pipeline
49
+ SENTIMENT_AVAILABLE = True
50
+ logger.info("Transformers available for sentiment analysis")
51
+ except ImportError as e:
52
+ logger.warning(f"Transformers not available: {e}")
53
+ SENTIMENT_AVAILABLE = False
54
+
55
+ # Initialize models if available
56
+ asr_model = None
57
+ sentiment_model = None
58
+ emotion_model = None
59
+ diarization_pipeline = None
60
+
61
+ if SPEECHBRAIN_AVAILABLE:
62
+ try:
63
+ asr_model = EncoderDecoderASR.from_hparams(
64
+ source="speechbrain/asr-crdnn-rnnlm-librispeech",
65
+ savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
66
+ )
67
+ logger.info("ASR model loaded successfully")
68
+ except Exception as e:
69
+ logger.error(f"Error loading ASR model: {e}")
70
+ SPEECHBRAIN_AVAILABLE = False
71
+
72
+ if SENTIMENT_AVAILABLE:
73
+ try:
74
+ sentiment_model = pipeline(
75
+ "sentiment-analysis",
76
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
77
+ top_k=None
78
+ )
79
+ emotion_model = pipeline(
80
+ "text-classification",
81
+ model="j-hartmann/emotion-english-distilroberta-base",
82
+ top_k=None
83
+ )
84
+ logger.info("Sentiment and emotion models loaded")
85
+ except Exception as e:
86
+ logger.error(f"Error loading sentiment models: {e}")
87
+ SENTIMENT_AVAILABLE = False
88
+
89
+ if DIARIZATION_AVAILABLE:
90
+ try:
91
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
92
+ if HF_TOKEN:
93
+ diarization_pipeline = Pipeline.from_pretrained(
94
+ "pyannote/speaker-diarization@2.1",
95
+ use_auth_token=HF_TOKEN
96
+ )
97
+ logger.info("Speaker diarization pipeline loaded")
98
+ else:
99
+ logger.warning("HF_TOKEN not set - speaker diarization will be disabled")
100
+ except Exception as e:
101
+ logger.error(f"Error loading diarization pipeline: {e}")
102
+
103
  # Check if API key is available
104
  if ANTHROPIC_API_KEY:
105
  logger.info("Claude API key found")
106
  else:
107
  logger.warning("Claude API key not found - using demo mode")
108
 
109
+ def validate_analysis_completeness(response_text):
110
+ """Validate that all 12 sections are present in the analysis"""
111
+ required_sections = [
112
+ "1. SPEECH FACTORS",
113
+ "2. LANGUAGE SKILLS ASSESSMENT",
114
+ "3. COMPLEX SENTENCE ANALYSIS",
115
+ "4. FIGURATIVE LANGUAGE ANALYSIS",
116
+ "5. PRAGMATIC LANGUAGE ASSESSMENT",
117
+ "6. VOCABULARY AND SEMANTIC ANALYSIS",
118
+ "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
119
+ "8. COGNITIVE-LINGUISTIC FACTORS",
120
+ "9. FLUENCY AND RHYTHM ANALYSIS",
121
+ "10. QUANTITATIVE METRICS",
122
+ "11. CLINICAL IMPLICATIONS",
123
+ "12. PROGNOSIS AND SUMMARY"
124
+ ]
125
+
126
+ missing_sections = []
127
+ for section in required_sections:
128
+ if section not in response_text:
129
+ missing_sections.append(section)
130
+
131
+ if missing_sections:
132
+ print(f"\n⚠️ MISSING SECTIONS: {missing_sections}")
133
+ return False
134
+ else:
135
+ print(f"\nβœ… ALL 12 SECTIONS PRESENT")
136
+ return True
137
+
138
+ def call_claude_api_with_continuation(prompt, max_continuations=3):
139
+ """Call Claude API with continuation prompting to ensure complete responses"""
140
  if not ANTHROPIC_API_KEY:
141
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
142
 
143
  try:
144
+ full_response = ""
145
+ continuation_count = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ # Add continuation instruction to original prompt
148
+ initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
149
+
150
+ while continuation_count <= max_continuations:
151
+ if continuation_count == 0:
152
+ current_prompt = initial_prompt
153
+ else:
154
+ # For continuations, provide context about what was already covered
155
+ current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1} of {max_continuations}):\n\nIMPORTANT: Do not repeat what you've already written. Continue with the next section or complete any unfinished sections. If you're done, do not include <CONTINUE>. Provide the remaining analysis sections. Make sure to complete ALL 12 sections of the analysis."
156
+
157
+ headers = {
158
+ "Content-Type": "application/json",
159
+ "x-api-key": ANTHROPIC_API_KEY,
160
+ "anthropic-version": "2023-06-01"
161
+ }
162
+
163
+ data = {
164
+ "model": "claude-3-5-sonnet-20241022",
165
+ "max_tokens": 4096,
166
+ "messages": [
167
+ {
168
+ "role": "user",
169
+ "content": current_prompt
170
+ }
171
+ ]
172
+ }
173
 
174
+ response = requests.post(
175
+ "https://api.anthropic.com/v1/messages",
176
+ headers=headers,
177
+ json=data,
178
+ timeout=90
179
+ )
180
+
181
+ if response.status_code == 200:
182
+ response_json = response.json()
183
+ response_text = response_json['content'][0]['text']
184
+
185
+ # Log response for debugging
186
+ print(f"\n=== PART {continuation_count + 1} RESPONSE ===")
187
+ print(f"Length: {len(response_text)} characters")
188
+ print(f"Contains CONTINUE: {'<CONTINUE>' in response_text}")
189
+ print(f"First 200 chars: {response_text[:200]}...")
190
+ print(f"Last 200 chars: {response_text[-200:]}...")
191
+ print("=" * 50)
192
+
193
+ # Simple string combination - no complex processing
194
+ if continuation_count == 0:
195
+ full_response = response_text
196
+ else:
197
+ # Just add a newline and append the continuation
198
+ full_response += "\n\n" + response_text
199
+
200
+ # Check if response indicates continuation is needed
201
+ needs_continuation = "<CONTINUE>" in response_text
202
+
203
+ print(f"Needs continuation: {needs_continuation}")
204
+ print(f"Continuation count: {continuation_count}/{max_continuations}")
205
+
206
+ # Continue if <CONTINUE> is present and we haven't reached max
207
+ if needs_continuation and continuation_count < max_continuations:
208
+ # Remove the CONTINUE marker
209
+ full_response = full_response.replace("<CONTINUE>", "")
210
+ continuation_count += 1
211
+ logger.info(f"Continuing analysis (attempt {continuation_count}/{max_continuations})")
212
+ continue
213
+ else:
214
+ # Clean up any remaining continuation markers
215
+ full_response = full_response.replace("<CONTINUE>", "")
216
+ break
217
+ else:
218
+ logger.error(f"Claude API error: {response.status_code} - {response.text}")
219
+ return f"❌ Claude API Error: {response.status_code}"
220
+
221
  except Exception as e:
222
  logger.error(f"Error calling Claude API: {str(e)}")
223
  return f"❌ Error: {str(e)}"
224
+
225
+ # Add completion indicator
226
+ if continuation_count > 0:
227
+ full_response += f"\n\n[Analysis completed in {continuation_count + 1} parts]"
228
+
229
+ # Log final response for debugging
230
+ print(f"\n=== FINAL COMPLETE RESPONSE ===")
231
+ print(f"Total length: {len(full_response)} characters")
232
+ print(f"Number of parts: {continuation_count + 1}")
233
+ print("=" * 50)
234
+
235
+ # Print the entire final response for debugging
236
+ print(f"\n=== ENTIRE FINAL RESPONSE ===")
237
+ print(full_response)
238
+ print("=" * 50)
239
+
240
+ return full_response
241
+
242
+ def call_claude_api(prompt):
243
+ """Call Claude API directly (legacy function for backward compatibility)"""
244
+ return call_claude_api_with_continuation(prompt, max_continuations=0)
245
+
246
+ def extract_audio_from_video(video_path):
247
+ """Extract audio from video file"""
248
+ if not MOVIEPY_AVAILABLE:
249
+ return None, "MoviePy not available for video processing"
250
+
251
+ try:
252
+ temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
253
+ temp_audio_path = temp_audio.name
254
+ temp_audio.close()
255
+
256
+ video = mp.VideoFileClip(video_path)
257
+ audio = video.audio
258
+
259
+ if audio is None:
260
+ return None, "No audio track found in video file"
261
+
262
+ audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
263
+ video.close()
264
+ audio.close()
265
+
266
+ return temp_audio_path, "Audio extracted successfully"
267
+
268
+ except Exception as e:
269
+ logger.error(f"Error extracting audio: {e}")
270
+ return None, f"Error extracting audio: {str(e)}"
271
+
272
+ def perform_speaker_diarization(audio_path):
273
+ """Perform speaker diarization on audio file"""
274
+ if not DIARIZATION_AVAILABLE or not diarization_pipeline:
275
+ return None, "Speaker diarization not available"
276
+
277
+ try:
278
+ with ProgressHook() as hook:
279
+ diarization = diarization_pipeline(audio_path, hook=hook)
280
+
281
+ speaker_segments = []
282
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
283
+ speaker_segments.append({
284
+ 'start': turn.start,
285
+ 'end': turn.end,
286
+ 'speaker': speaker,
287
+ 'duration': turn.end - turn.start
288
+ })
289
+
290
+ logger.info(f"Diarization completed: {len(speaker_segments)} segments found")
291
+ return speaker_segments, "Diarization completed successfully"
292
+
293
+ except Exception as e:
294
+ logger.error(f"Error in diarization: {e}")
295
+ return None, f"Diarization error: {str(e)}"
296
+
297
+ def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
298
+ """Transcribe audio with timestamps, sentiment, and metadata"""
299
+ if not audio_file:
300
+ return None, "No audio file provided"
301
+
302
+ if not SPEECHBRAIN_AVAILABLE:
303
+ return None, "SpeechBrain not available for transcription"
304
+
305
+ try:
306
+ # Check if it's a video file
307
+ file_extension = os.path.splitext(audio_file)[1].lower()
308
+ if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
309
+ processed_audio, status = extract_audio_from_video(audio_file)
310
+ if not processed_audio:
311
+ return None, status
312
+ else:
313
+ processed_audio = audio_file
314
+
315
+ # Perform speaker diarization if enabled
316
+ speaker_segments = None
317
+ diarization_status = ""
318
+ if enable_diarization:
319
+ speaker_segments, diarization_status = perform_speaker_diarization(processed_audio)
320
+
321
+ # Get transcription
322
+ transcript = asr_model.transcribe_file(processed_audio)
323
+
324
+ # Clean up temporary file if created
325
+ if processed_audio != audio_file and os.path.exists(processed_audio):
326
+ try:
327
+ os.unlink(processed_audio)
328
+ except:
329
+ pass
330
+
331
+ # Split into sentences and add metadata
332
+ sentences = re.split(r'[.!?]+', transcript)
333
+ sentences = [s.strip() for s in sentences if s.strip()]
334
+
335
+ rich_transcript = []
336
+ current_time = 0
337
+
338
+ for i, sentence in enumerate(sentences):
339
+ timestamp = current_time + (i * 2)
340
+
341
+ # Determine speaker
342
+ speaker = "UNKNOWN"
343
+ if speaker_segments:
344
+ for segment in speaker_segments:
345
+ if segment['start'] <= timestamp <= segment['end']:
346
+ speaker = segment['speaker']
347
+ break
348
+
349
+ # Sentiment and emotion analysis
350
+ sentiment = {'label': 'neutral', 'score': 0.5}
351
+ emotion = {'label': 'neutral', 'score': 0.5}
352
+
353
+ if SENTIMENT_AVAILABLE:
354
+ try:
355
+ sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
356
+ sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else sentiment
357
+
358
+ emotion_result = emotion_model(sentence)[0] if emotion_model else None
359
+ emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else emotion
360
+ except:
361
+ pass
362
+
363
+ # Word metrics
364
+ words = sentence.split()
365
+ word_count = len(words)
366
+ avg_word_length = np.mean([len(word) for word in words]) if words else 0
367
+ speech_rate = word_count * 30 / 60
368
+
369
+ rich_transcript.append({
370
+ 'timestamp': timestamp,
371
+ 'speaker': speaker,
372
+ 'sentence': sentence,
373
+ 'word_count': word_count,
374
+ 'avg_word_length': round(avg_word_length, 2),
375
+ 'speech_rate_wpm': round(speech_rate, 1),
376
+ 'sentiment': sentiment['label'],
377
+ 'sentiment_score': round(sentiment['score'], 3),
378
+ 'emotion': emotion['label'],
379
+ 'emotion_score': round(emotion['score'], 3)
380
+ })
381
+
382
+ current_time = timestamp
383
+
384
+ status_msg = f"Transcription completed successfully"
385
+ if diarization_status:
386
+ status_msg += f" {diarization_status}"
387
+
388
+ return rich_transcript, status_msg
389
+
390
+ except Exception as e:
391
+ logger.error(f"Error in transcription: {e}")
392
+ return None, f"Transcription error: {str(e)}"
393
+
394
+ def format_rich_transcript(rich_transcript):
395
+ """Format rich transcript for display"""
396
+ if not rich_transcript:
397
+ return "No transcript data available"
398
+
399
+ formatted_lines = []
400
+ for entry in rich_transcript:
401
+ timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
402
+
403
+ line = f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}"
404
+ line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
405
+ line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
406
+ line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
407
+
408
+ formatted_lines.append(line)
409
+
410
+ return '\n'.join(formatted_lines)
411
+
412
+ def calculate_slp_metrics(rich_transcript):
413
+ """Calculate comprehensive SLP metrics"""
414
+ if not rich_transcript:
415
+ return {}
416
+
417
+ # Basic metrics
418
+ total_sentences = len(rich_transcript)
419
+ total_words = sum(entry['word_count'] for entry in rich_transcript)
420
+ total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
421
+
422
+ # Speaker analysis
423
+ speakers = {}
424
+ for entry in rich_transcript:
425
+ speaker = entry['speaker']
426
+ if speaker not in speakers:
427
+ speakers[speaker] = {
428
+ 'sentences': 0,
429
+ 'words': 0,
430
+ 'sentiments': [],
431
+ 'emotions': []
432
+ }
433
+ speakers[speaker]['sentences'] += 1
434
+ speakers[speaker]['words'] += entry['word_count']
435
+ speakers[speaker]['sentiments'].append(entry['sentiment'])
436
+ speakers[speaker]['emotions'].append(entry['emotion'])
437
+
438
+ # Word-level analysis
439
+ all_words = []
440
+ for entry in rich_transcript:
441
+ words = entry['sentence'].lower().split()
442
+ all_words.extend(words)
443
+
444
+ # Word frequency distribution
445
+ word_freq = {}
446
+ for word in all_words:
447
+ word_clean = re.sub(r'[^\w\s]', '', word)
448
+ if word_clean:
449
+ word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
450
+
451
+ # Vocabulary diversity (Type-Token Ratio)
452
+ unique_words = len(set(all_words))
453
+ ttr = unique_words / total_words if total_words > 0 else 0
454
+
455
+ # Speech rate analysis
456
+ speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
457
+ avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
458
+
459
+ # Sentiment analysis
460
+ sentiment_counts = {}
461
+ emotion_counts = {}
462
+ for entry in rich_transcript:
463
+ sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
464
+ emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
465
+
466
+ # Sentence complexity
467
+ sentence_lengths = [entry['word_count'] for entry in rich_transcript]
468
+ avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
469
+
470
+ # Pause analysis
471
+ pauses = []
472
+ for i in range(1, len(rich_transcript)):
473
+ pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
474
+ pauses.append(pause)
475
+
476
+ avg_pause_duration = np.mean(pauses) if pauses else 0
477
+
478
+ return {
479
+ 'total_sentences': total_sentences,
480
+ 'total_words': total_words,
481
+ 'total_duration_seconds': total_duration,
482
+ 'unique_words': unique_words,
483
+ 'type_token_ratio': round(ttr, 3),
484
+ 'avg_sentence_length': round(avg_sentence_length, 1),
485
+ 'avg_speech_rate_wpm': round(avg_speech_rate, 1),
486
+ 'avg_pause_duration': round(avg_pause_duration, 1),
487
+ 'sentiment_distribution': sentiment_counts,
488
+ 'emotion_distribution': emotion_counts,
489
+ 'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
490
+ 'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0,
491
+ 'speakers': speakers,
492
+ 'speaker_count': len(speakers)
493
+ }
494
 
495
  def process_file(file):
496
  """Process uploaded file"""
 
509
  except Exception as e:
510
  return f"Error reading file: {str(e)}"
511
 
512
+ def analyze_transcript_content(transcript_content, age, gender, slp_notes):
513
+ """Analyze transcript content with comprehensive quantification and detailed citations"""
514
+ if not transcript_content or len(transcript_content.strip()) < 50:
515
+ return "Error: Please provide a longer transcript for analysis."
 
 
 
 
 
516
 
517
  # Add SLP notes to the prompt if provided
518
  notes_section = ""
 
523
  {slp_notes.strip()}
524
  """
525
 
526
+ # Enhanced comprehensive analysis prompt with detailed quantification
527
  prompt = f"""
528
+ You are a speech-language pathologist conducting a COMPREHENSIVE CASL assessment. Provide a SINGLE, DETAILED analysis that quantifies EVERY occurrence and cites specific examples.
529
+
530
  Patient: {age}-year-old {gender}
531
 
532
  TRANSCRIPT:
533
+ {transcript_content}{notes_section}
 
 
534
 
535
+ INSTRUCTIONS: Provide ONE comprehensive analysis covering ALL areas below. QUANTIFY EVERYTHING with exact counts and cite SPECIFIC examples from the transcript. Be thorough and detailed. COMPLETE ALL 12 SECTIONS.
536
+
537
+ COMPREHENSIVE CASL ANALYSIS:
538
+
539
+ 1. SPEECH FACTORS (with EXACT counts and specific citations):
540
+
541
+ A. Fluency Issues:
542
+ - Count and cite EVERY filler word ("um", "uh", "like", "you know", etc.)
543
+ - Count and cite EVERY false start/self-correction
544
+ - Count and cite EVERY repetition of words/phrases
545
+ - Count and cite EVERY revision/restart
546
+ - Calculate percentage of disfluent speech
547
+
548
+ B. Word Retrieval Issues:
549
+ - Count and cite EVERY instance of circumlocution
550
+ - Count and cite EVERY incomplete thought/abandoned utterance
551
+ - Count and cite EVERY word-finding pause
552
+ - Count and cite EVERY use of generic terms ("thing", "stuff", etc.)
553
+
554
+ C. Grammatical Errors:
555
+ - Count and cite EVERY grammatical error (verb tense, subject-verb agreement, etc.)
556
+ - Count and cite EVERY syntactic error
557
+ - Count and cite EVERY morphological error
558
+ - Count and cite EVERY run-on sentence
559
+
560
+ 2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
561
+
562
+ A. Lexical/Semantic Skills:
563
+ - Count total unique words vs. total words (Type-Token Ratio)
564
+ - List and categorize vocabulary by sophistication level
565
+ - Identify semantic relationships demonstrated
566
+ - Assess word retrieval strategies used
567
+ - Evaluate semantic precision
568
+
569
+ B. Syntactic Skills:
570
+ - Count sentence types (simple, compound, complex, compound-complex)
571
+ - Calculate average sentence length
572
+ - Identify syntactic patterns and errors
573
+ - Assess clause complexity and embedding
574
+
575
+ C. Supralinguistic Skills:
576
+ - Identify and cite examples of:
577
+ * Cause-effect relationships
578
+ * Inferences made
579
+ * Non-literal language use
580
+ * Problem-solving language
581
+ * Metalinguistic awareness
582
+
583
+ 3. COMPLEX SENTENCE ANALYSIS (with exact counts):
584
+
585
+ A. Coordinating Conjunctions:
586
+ - Count and cite EVERY use of: and, but, or, so, yet, for, nor
587
+ - Analyze patterns of use
588
+ - Assess age-appropriateness
589
+
590
+ B. Subordinating Conjunctions:
591
+ - Count and cite EVERY use of: because, although, while, since, if, when, where, that, which, who, whom, whose
592
+ - Analyze clause complexity
593
+ - Assess embedding depth
594
+
595
+ C. Sentence Structure Analysis:
596
+ - Count each sentence type with examples
597
+ - Calculate complexity ratios
598
+ - Assess developmental appropriateness
599
+
600
+ 4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
601
+
602
+ A. Similes:
603
+ - Count and cite EVERY simile (comparisons using "like" or "as")
604
+ - Analyze creativity and appropriateness
605
+
606
+ B. Metaphors:
607
+ - Count and cite EVERY metaphor (direct comparisons)
608
+ - Assess comprehension and use
609
+
610
+ C. Idioms:
611
+ - Count and cite EVERY idiom used
612
+ - Assess comprehension and appropriate use
613
+
614
+ D. Non-literal Language:
615
+ - Count and cite EVERY instance of sarcasm, humor, irony
616
+ - Assess comprehension level
617
+
618
+ 5. PRAGMATIC LANGUAGE ASSESSMENT (with specific examples):
619
+
620
+ A. Turn-taking:
621
+ - Analyze conversational flow
622
+ - Count interruptions or overlaps
623
+ - Assess reciprocity
624
+
625
+ B. Topic Management:
626
+ - Count topic shifts
627
+ - Assess topic maintenance
628
+ - Evaluate topic introduction
629
+
630
+ C. Social Communication:
631
+ - Assess register appropriateness
632
+ - Evaluate politeness markers
633
+ - Analyze social awareness
634
+
635
+ 6. VOCABULARY AND SEMANTIC ANALYSIS (with quantification):
636
+
637
+ A. Vocabulary Diversity:
638
+ - Calculate Type-Token Ratio
639
+ - List most frequent words
640
+ - Assess vocabulary sophistication
641
+
642
+ B. Semantic Relationships:
643
+ - Count and cite examples of:
644
+ * Synonyms/antonyms
645
+ * Categories/hierarchies
646
+ * Part-whole relationships
647
+ * Cause-effect vocabulary
648
+
649
+ 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
650
+
651
+ A. Morphological Markers:
652
+ - Count and cite use of:
653
+ * Plurals (-s, -es)
654
+ * Possessives
655
+ * Verb tenses
656
+ * Derivational morphemes
657
+
658
+ B. Phonological Patterns:
659
+ - Identify speech sound errors
660
+ - Count phonological processes
661
+ - Assess syllable structure
662
+
663
+ 8. COGNITIVE-LINGUISTIC FACTORS (with evidence):
664
+
665
+ A. Working Memory:
666
+ - Assess sentence length complexity
667
+ - Analyze information retention
668
+ - Evaluate processing demands
669
+
670
+ B. Processing Speed:
671
+ - Analyze speech rate
672
+ - Assess response time
673
+ - Evaluate efficiency
674
+
675
+ C. Executive Function:
676
+ - Assess planning and organization
677
+ - Evaluate self-monitoring
678
+ - Analyze cognitive flexibility
679
+
680
+ 9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
681
+
682
+ A. Speech Rate:
683
+ - Calculate words per minute
684
+ - Analyze rate variability
685
+ - Assess naturalness
686
+
687
+ B. Pause Patterns:
688
+ - Count and analyze pauses
689
+ - Assess pause function
690
+ - Evaluate rhythm
691
+
692
+ 10. QUANTITATIVE METRICS:
693
+
694
+ - Total words: [count]
695
+ - Total sentences: [count]
696
+ - Average sentence length: [calculation]
697
+ - Type-Token Ratio: [calculation]
698
+ - Disfluency rate: [percentage]
699
+ - Error rate: [percentage]
700
+ - Vocabulary diversity score: [calculation]
701
+
702
+ 11. CLINICAL IMPLICATIONS:
703
+
704
+ A. Strengths:
705
+ - List specific strengths with evidence
706
+ - Identify areas of competence
707
+
708
+ B. Areas of Need:
709
+ - Prioritize intervention targets
710
+ - Provide specific examples
711
+
712
+ C. Treatment Recommendations:
713
+ - List 5-7 specific intervention strategies
714
+ - Include intensity and frequency recommendations
715
+ - Address all identified areas of need
716
+
717
+ 12. PROGNOSIS AND SUMMARY:
718
+
719
+ - Overall communication profile
720
+ - Developmental appropriateness
721
+ - Impact on academic/social functioning
722
+ - Expected progress with intervention
723
+
724
+ FORMAT REQUIREMENTS:
725
+ - Use bullet points for organization
726
+ - Include exact counts for everything
727
+ - Cite specific quotes from transcript
728
+ - Use clear headings and subheadings
729
+ - Provide percentages and ratios where applicable
730
+ - Be comprehensive but organized
731
+ - Focus on clinical relevance
732
+ - COMPLETE ALL 12 SECTIONS
733
+
734
+ SECTION CHECKLIST - COMPLETE ALL:
735
+ β–‘ 1. SPEECH FACTORS (A, B, C)
736
+ β–‘ 2. LANGUAGE SKILLS ASSESSMENT (A, B, C)
737
+ β–‘ 3. COMPLEX SENTENCE ANALYSIS (A, B, C)
738
+ β–‘ 4. FIGURATIVE LANGUAGE ANALYSIS (A, B, C, D)
739
+ β–‘ 5. PRAGMATIC LANGUAGE ASSESSMENT (A, B, C)
740
+ β–‘ 6. VOCABULARY AND SEMANTIC ANALYSIS (A, B)
741
+ β–‘ 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (A, B)
742
+ β–‘ 8. COGNITIVE-LINGUISTIC FACTORS (A, B, C)
743
+ β–‘ 9. FLUENCY AND RHYTHM ANALYSIS (A, B)
744
+ β–‘ 10. QUANTITATIVE METRICS
745
+ β–‘ 11. CLINICAL IMPLICATIONS (A, B, C)
746
+ β–‘ 12. PROGNOSIS AND SUMMARY
747
+
748
+ CRITICAL: If you cannot complete all 12 sections in one response, end with <CONTINUE> and continue with the remaining sections. Do not skip any sections. Use the checklist to ensure all sections are completed.
749
  """
750
 
751
  # Get analysis from Claude API
752
+ result = call_claude_api_with_continuation(prompt, max_continuations=5)
753
  return result
754
 
755
+ def analyze_transcript(file, age, gender, slp_notes):
756
+ """Analyze transcript from file upload"""
757
+ if file is None:
758
+ return "Please upload a transcript file first."
759
+
760
+ # Get transcript content
761
+ transcript = process_file(file)
762
+ if transcript.startswith("Error") or transcript.startswith("Please"):
763
+ return transcript
764
+
765
+ return analyze_transcript_content(transcript, age, gender, slp_notes)
766
+
767
  def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
768
+ """Perform targeted analysis based on custom questions with comprehensive detail"""
769
  if not transcript or not transcript.strip():
770
  return "Please provide a transcript first."
771
 
 
781
  {slp_notes.strip()}
782
  """
783
 
784
+ # Enhanced targeted analysis prompt with comprehensive detail
785
  prompt = f"""
786
+ You are a speech-language pathologist conducting a DETAILED targeted analysis of a speech transcript.
787
 
788
  Patient: {age}-year-old {gender}
789
 
 
793
  SPECIFIC QUESTION FOR ANALYSIS:
794
  {custom_question.strip()}
795
 
796
+ INSTRUCTIONS: Provide a COMPREHENSIVE, DETAILED analysis that directly addresses this specific question. Include:
797
+ - EXACT counts and quantification
798
+ - SPECIFIC citations from the transcript
799
+ - DETAILED examples for every observation
800
+ - PERCENTAGES and ratios where applicable
801
+ - CLINICAL significance of findings
802
+ - AGE-APPROPRIATE assessment
803
+
804
+ ANALYSIS REQUIREMENTS:
805
 
806
+ 1. QUANTIFICATION:
807
+ - Count every relevant occurrence
808
+ - Calculate percentages and ratios
809
+ - Provide specific numbers for all observations
 
 
810
 
811
+ 2. EVIDENCE:
812
+ - Cite exact quotes from the transcript
813
+ - Provide line-by-line examples
814
+ - Include specific timestamps or context
815
+
816
+ 3. DETAILED EXAMPLES:
817
+ - Give multiple examples for each pattern
818
+ - Show variations in the pattern
819
+ - Demonstrate the range of severity
820
+
821
+ 4. CLINICAL ASSESSMENT:
822
+ - Assess severity level
823
+ - Compare to age expectations
824
+ - Identify clinical significance
825
+ - Suggest intervention implications
826
+
827
+ 5. COMPREHENSIVE COVERAGE:
828
+ - Address all aspects of the question
829
+ - Consider related language areas
830
+ - Include both strengths and weaknesses
831
+ - Provide developmental context
832
+
833
+ ANALYSIS STRUCTURE:
834
+
835
+ A. DIRECT ANSWER TO QUESTION:
836
+ - Provide a clear, direct answer
837
+ - Include quantification and severity assessment
838
+
839
+ B. DETAILED EVIDENCE:
840
+ - List every relevant example with exact quotes
841
+ - Provide counts and percentages
842
+ - Show patterns and variations
843
+
844
+ C. PATTERN ANALYSIS:
845
+ - Identify underlying patterns
846
+ - Analyze frequency and consistency
847
+ - Assess variability across the transcript
848
+
849
+ D. DEVELOPMENTAL ASSESSMENT:
850
+ - Compare to age-appropriate expectations
851
+ - Identify developmental level
852
+ - Assess progress and challenges
853
+
854
+ E. CLINICAL IMPLICATIONS:
855
+ - Impact on communication
856
+ - Effect on academic/social functioning
857
+ - Priority for intervention
858
+
859
+ F. INTERVENTION CONSIDERATIONS:
860
+ - Specific strategies to address the issue
861
+ - Intensity and frequency recommendations
862
+ - Expected outcomes and timeline
863
+
864
+ FORMAT REQUIREMENTS:
865
+ - Use clear headings and subheadings
866
+ - Include bullet points for organization
867
+ - Provide exact counts and percentages
868
+ - Cite specific quotes with context
869
+ - Be thorough and comprehensive
870
+ - Focus on clinical relevance and utility
871
+
872
+ Remember: This should be a DETAILED, COMPREHENSIVE analysis that thoroughly addresses the specific question with quantification, evidence, and clinical implications.
873
  """
874
 
875
  # Get targeted analysis from Claude API
876
+ result = call_claude_api_with_continuation(prompt, max_continuations=3)
877
  return result
878
 
879
  # Create enhanced interface with tabs
880
  with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
881
 
882
  gr.Markdown("# πŸ—£οΈ Enhanced CASL Analysis Tool")
883
+ gr.Markdown("Upload a speech transcript, paste text, or transcribe audio/video and get instant CASL assessment results with targeted analysis options.")
884
 
885
  # Store transcript globally
886
  transcript_state = gr.State("")
 
890
  with gr.Tab("οΏ½οΏ½οΏ½οΏ½ Basic Analysis"):
891
  with gr.Row():
892
  with gr.Column():
893
+ gr.Markdown("### Input Options")
894
 
895
+ with gr.Tabs():
896
+ with gr.Tab("πŸ“ File Upload"):
897
+ file_upload = gr.File(
898
+ label="Upload Transcript File",
899
+ file_types=[".txt", ".cha"]
900
+ )
901
+
902
+ analyze_file_btn = gr.Button(
903
+ "πŸ” Analyze File",
904
+ variant="primary"
905
+ )
906
+
907
+ with gr.Tab("πŸ“ Text Input"):
908
+ text_input = gr.Textbox(
909
+ label="Paste Transcript Here",
910
+ placeholder="Paste your transcript text here...",
911
+ lines=10
912
+ )
913
+
914
+ analyze_text_btn = gr.Button(
915
+ "πŸ” Analyze Text",
916
+ variant="primary"
917
+ )
918
+
919
+ with gr.Tab("🎀 Audio/Video Transcription"):
920
+ audio_input = gr.File(
921
+ label="Upload Audio/Video File",
922
+ file_types=["audio", "video"]
923
+ )
924
+
925
+ transcribe_btn = gr.Button(
926
+ "🎀 Transcribe & Analyze",
927
+ variant="primary"
928
+ )
929
+
930
+ transcription_status = gr.Markdown("")
931
+
932
+ gr.Markdown("### Patient Information")
933
 
934
  age = gr.Number(
935
  label="Patient Age",
 
949
  placeholder="Enter any additional clinical observations, context, or notes...",
950
  lines=3
951
  )
 
 
 
 
 
952
 
953
  with gr.Column():
954
  gr.Markdown("### Analysis Results")
 
959
  lines=25,
960
  max_lines=30
961
  )
962
+
963
+ analysis_progress = gr.Markdown("")
964
 
965
  # Tab 2: Targeted Analysis
966
  with gr.Tab("🎯 Targeted Analysis"):
 
988
  "What narrative organization skills are evident?",
989
  "What specific intervention targets would you recommend?",
990
  "How does this patient's language compare to typical development?",
991
+ "What evidence suggests cognitive-linguistic strengths/weaknesses?",
992
+ "Analyze the use of conjunctions and complex sentences",
993
+ "Identify and analyze figurative language use"
994
  ],
995
  label="Question Templates (Optional)",
996
  value="Select a template or write your own..."
 
1016
  lines=25,
1017
  max_lines=30
1018
  )
1019
+
1020
+ targeted_progress = gr.Markdown("")
1021
 
1022
  # Tab 3: Quick Questions
1023
  with gr.Tab("⚑ Quick Questions"):
 
1042
  "Narrative structure",
1043
  "Vocabulary level",
1044
  "Sentence complexity",
1045
+ "Speech rate patterns",
1046
+ "Complex sentence analysis",
1047
+ "Figurative language use",
1048
+ "Morphological markers",
1049
+ "Phonological patterns",
1050
+ "Turn-taking skills",
1051
+ "Topic maintenance",
1052
+ "Social communication",
1053
+ "Cognitive-linguistic factors",
1054
+ "Working memory demands",
1055
+ "Executive function skills",
1056
+ "Metalinguistic awareness",
1057
+ "Academic language use"
1058
  ],
1059
  label="Select questions to analyze:",
1060
  value=[]
 
1074
  lines=25,
1075
  max_lines=30
1076
  )
1077
+
1078
+ quick_progress = gr.Markdown("")
1079
+
1080
+ # Tab 4: Advanced Transcription
1081
+ with gr.Tab("🎀 Advanced Transcription"):
1082
+ with gr.Row():
1083
+ with gr.Column(scale=1):
1084
+ gr.Markdown("### Audio/Video Upload")
1085
+ gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
1086
+
1087
+ transcription_file_input = gr.File(
1088
+ label="Upload Audio or Video File",
1089
+ file_types=["audio", "video"]
1090
+ )
1091
+
1092
+ enable_diarization = gr.Checkbox(
1093
+ label="Enable Speaker Diarization",
1094
+ value=True,
1095
+ info="Identify different speakers in the audio"
1096
+ )
1097
+
1098
+ transcribe_advanced_btn = gr.Button(
1099
+ "🎀 Transcribe with Metadata",
1100
+ variant="primary",
1101
+ size="lg"
1102
+ )
1103
+
1104
+ transcription_status = gr.Markdown("")
1105
+
1106
+ with gr.Column(scale=2):
1107
+ gr.Markdown("### Rich Transcript with Metadata")
1108
+
1109
+ rich_transcript_display = gr.Textbox(
1110
+ label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
1111
+ lines=15,
1112
+ max_lines=20
1113
+ )
1114
+
1115
+ with gr.Row():
1116
+ with gr.Column():
1117
+ gr.Markdown("### Speech Metrics")
1118
+
1119
+ transcription_metrics_display = gr.Textbox(
1120
+ label="SLP Metrics",
1121
+ lines=10,
1122
+ max_lines=15
1123
+ )
1124
+
1125
+ with gr.Column():
1126
+ gr.Markdown("### Word Frequency")
1127
+
1128
+ transcription_word_freq_display = gr.Dataframe(
1129
+ headers=["Word", "Frequency"],
1130
+ label="Most Frequent Words",
1131
+ interactive=False
1132
+ )
1133
 
1134
  # Event handlers
1135
+ def on_analyze_file(file, age_val, gender_val, notes):
1136
+ """Handle file analysis"""
1137
  result = analyze_transcript(file, age_val, gender_val, notes)
1138
  transcript = process_file(file) if file else ""
1139
+ progress_msg = "βœ… Analysis completed" if "[Analysis completed in" in result else "πŸ”„ Analysis in progress..."
1140
+ return result, transcript, progress_msg
1141
+
1142
+ def on_analyze_text(text, age_val, gender_val, notes):
1143
+ """Handle text analysis"""
1144
+ result = analyze_transcript_content(text, age_val, gender_val, notes)
1145
+ progress_msg = "βœ… Analysis completed" if "[Analysis completed in" in result else "πŸ”„ Analysis in progress..."
1146
+ return result, text, progress_msg
1147
+
1148
+ def on_transcribe_and_analyze(audio_file, age_val, gender_val, notes):
1149
+ """Handle transcription and analysis"""
1150
+ if not audio_file:
1151
+ return "Please upload an audio/video file first.", "", "No file provided"
1152
+
1153
+ transcript, status = transcribe_audio(audio_file.name)
1154
+ if transcript:
1155
+ result = analyze_transcript_content(transcript, age_val, gender_val, notes)
1156
+ progress_msg = "βœ… Analysis completed" if "[Analysis completed in" in result else "πŸ”„ Analysis in progress..."
1157
+ return result, transcript, status
1158
+ else:
1159
+ return f"Transcription failed: {status}", "", status
1160
+
1161
+ def on_transcribe_advanced(audio_file, enable_diarization):
1162
+ """Handle advanced transcription"""
1163
+ if not audio_file:
1164
+ return "Please upload an audio/video file first.", "", "No file provided"
1165
+
1166
+ transcript, status = transcribe_audio_with_metadata(audio_file.name, enable_diarization)
1167
+ if transcript:
1168
+ metrics = calculate_slp_metrics(transcript)
1169
+ word_freq_data = metrics.get('word_frequency', {})
1170
+ return transcript, status, metrics, word_freq_data
1171
+ else:
1172
+ return f"Transcription failed: {status}", "", {}, {}
1173
 
1174
  def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
1175
  """Handle targeted analysis"""
1176
+ result = targeted_analysis(transcript, question, age_val, gender_val, notes)
1177
+ progress_msg = "βœ… Targeted analysis completed" if "[Analysis completed in" in result else "πŸ”„ Targeted analysis in progress..."
1178
+ return result, progress_msg
1179
 
1180
  def on_question_template_change(template):
1181
  """Handle question template selection"""
 
1186
  def on_quick_analyze(transcript, questions, age_val, gender_val, notes):
1187
  """Handle quick analysis with multiple questions"""
1188
  if not transcript or not transcript.strip():
1189
+ return "Please provide a transcript first.", "❌ No transcript provided"
1190
 
1191
  if not questions:
1192
+ return "Please select at least one question to analyze.", "❌ No questions selected"
1193
 
1194
  # Add SLP notes to the prompt if provided
1195
  notes_section = ""
 
1200
  {notes.strip()}
1201
  """
1202
 
1203
+ # Create enhanced quick analysis prompt with comprehensive SLP analysis
1204
  questions_text = "\n".join([f"- {q}" for q in questions])
1205
  prompt = f"""
1206
+ You are a speech-language pathologist conducting a COMPREHENSIVE quick analysis of a speech transcript.
1207
 
1208
  Patient: {age_val}-year-old {gender_val}
1209
 
1210
  TRANSCRIPT:
1211
  {transcript}{notes_section}
1212
 
1213
+ Please provide a DETAILED analysis addressing these specific areas:
1214
  {questions_text}
1215
 
1216
+ ANALYSIS REQUIREMENTS:
1217
+
1218
+ For each selected area, provide:
1219
+ 1. EXACT COUNTS and quantification
1220
+ 2. SPECIFIC EXAMPLES with exact quotes from transcript
1221
+ 3. PERCENTAGES and ratios where applicable
1222
+ 4. SEVERITY assessment
1223
+ 5. AGE-APPROPRIATE evaluation
1224
+ 6. CLINICAL significance
1225
+ 7. INTERVENTION considerations
1226
+
1227
+ DETAILED ANALYSIS GUIDELINES:
1228
+
1229
+ For SYNTAX and COMPLEX SENTENCE analysis:
1230
+ - Count and cite EVERY coordinating conjunction (and, but, or, so, yet, for, nor)
1231
+ - Count and cite EVERY subordinating conjunction (because, although, while, since, if, when, where, that, which, who, whom, whose)
1232
+ - Identify and count each sentence type (simple, compound, complex, compound-complex)
1233
+ - Calculate complexity ratios and percentages
1234
+ - Assess embedding depth and clause complexity
1235
+ - Provide specific examples for each pattern
1236
+
1237
+ For FIGURATIVE LANGUAGE analysis:
1238
+ - Count and cite EVERY simile (comparisons using "like" or "as")
1239
+ - Count and cite EVERY metaphor (direct comparisons without "like" or "as")
1240
+ - Count and cite EVERY idiom and non-literal expression
1241
+ - Assess creativity and age-appropriate use
1242
+ - Provide specific examples with context
1243
+
1244
+ For PRAGMATIC and SOCIAL COMMUNICATION:
1245
+ - Count and analyze turn-taking patterns
1246
+ - Assess topic maintenance and shifting abilities
1247
+ - Evaluate social appropriateness and register use
1248
+ - Count interruptions or conversational breakdowns
1249
+ - Analyze non-literal language comprehension
1250
+ - Provide specific examples of pragmatic behaviors
1251
+
1252
+ For VOCABULARY and SEMANTIC analysis:
1253
+ - Calculate Type-Token Ratio
1254
+ - Count and categorize vocabulary by sophistication level
1255
+ - Analyze word retrieval strategies and circumlocution
1256
+ - Assess semantic precision and relationships
1257
+ - Count academic vs. everyday vocabulary use
1258
+ - Provide specific examples of vocabulary patterns
1259
+
1260
+ For MORPHOLOGICAL and PHONOLOGICAL analysis:
1261
+ - Count and cite EVERY morphological marker (plurals, possessives, verb tenses)
1262
+ - Count and cite EVERY derivational morpheme (prefixes, suffixes)
1263
+ - Identify and count phonological patterns and errors
1264
+ - Assess syllable structure and stress patterns
1265
+ - Provide specific examples of morphological use
1266
+
1267
+ For COGNITIVE-LINGUISTIC factors:
1268
+ - Assess working memory demands in language production
1269
+ - Analyze processing speed and efficiency
1270
+ - Count and evaluate attention and focus patterns
1271
+ - Assess executive function skills and self-monitoring
1272
+ - Provide specific examples of cognitive-linguistic patterns
1273
+
1274
+ For FLUENCY and SPEECH RATE:
1275
+ - Count and cite EVERY disfluency (fillers, repetitions, revisions)
1276
+ - Calculate speech rate and variability
1277
+ - Analyze pause patterns and their function
1278
+ - Assess overall speech naturalness
1279
+ - Provide specific examples of fluency patterns
1280
+
1281
+ For GRAMMAR and LANGUAGE ERRORS:
1282
+ - Count and cite EVERY grammatical error
1283
+ - Count and cite EVERY syntactic error
1284
+ - Count and cite EVERY morphological error
1285
+ - Calculate error rates and percentages
1286
+ - Provide specific examples of error patterns
1287
+
1288
+ For WORD-FINDING and RETRIEVAL:
1289
+ - Count and cite EVERY instance of circumlocution
1290
+ - Count and cite EVERY incomplete thought
1291
+ - Count and cite EVERY word-finding pause
1292
+ - Analyze word retrieval strategies used
1293
+ - Provide specific examples of retrieval patterns
1294
+
1295
+ For NARRATIVE and DISCOURSE:
1296
+ - Assess narrative organization and coherence
1297
+ - Count topic shifts and maintenance
1298
+ - Analyze discourse markers and transitions
1299
+ - Evaluate story structure and completeness
1300
+ - Provide specific examples of narrative patterns
1301
+
1302
+ FORMAT REQUIREMENTS:
1303
+ - Use clear headings for each area analyzed
1304
+ - Include bullet points for organization
1305
+ - Provide exact counts and percentages
1306
+ - Cite specific quotes from transcript
1307
+ - Include severity assessments
1308
+ - Provide clinical implications
1309
+ - Be comprehensive but focused on selected areas
1310
 
1311
+ Remember: This should be a DETAILED analysis that thoroughly addresses each selected area with quantification, evidence, and clinical relevance.
1312
  """
1313
 
1314
+ result = call_claude_api_with_continuation(prompt, max_continuations=2)
1315
+ progress_msg = "βœ… Quick analysis completed" if "[Analysis completed in" in result else "πŸ”„ Quick analysis in progress..."
1316
+ return result, progress_msg
1317
 
1318
  # Connect event handlers
1319
+ analyze_file_btn.click(
1320
+ on_analyze_file,
1321
  inputs=[file_upload, age, gender, slp_notes],
1322
+ outputs=[output, transcript_input, analysis_progress]
1323
+ )
1324
+
1325
+ analyze_text_btn.click(
1326
+ on_analyze_text,
1327
+ inputs=[text_input, age, gender, slp_notes],
1328
+ outputs=[output, transcript_input, analysis_progress]
1329
+ )
1330
+
1331
+ transcribe_btn.click(
1332
+ on_transcribe_and_analyze,
1333
+ inputs=[audio_input, age, gender, slp_notes],
1334
+ outputs=[output, transcript_input, transcription_status]
1335
+ )
1336
+
1337
+ transcribe_advanced_btn.click(
1338
+ on_transcribe_advanced,
1339
+ inputs=[transcription_file_input, enable_diarization],
1340
+ outputs=[rich_transcript_display, transcription_status, transcription_metrics_display, transcription_word_freq_display]
1341
  )
1342
 
1343
  targeted_analyze_btn.click(
1344
  on_targeted_analyze,
1345
  inputs=[transcript_input, custom_question, age, gender, slp_notes],
1346
+ outputs=[targeted_output, targeted_progress]
1347
  )
1348
 
1349
  question_templates.change(
 
1355
  quick_analyze_btn.click(
1356
  on_quick_analyze,
1357
  inputs=[quick_transcript, quick_questions, age, gender, slp_notes],
1358
+ outputs=[quick_output, quick_progress]
1359
  )
1360
 
1361
  if __name__ == "__main__":
1362
  print("πŸš€ Starting Enhanced CASL Analysis Tool...")
1363
+ print("πŸ“Š Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis, Advanced Transcription")
1364
+ print("🎀 Transcription: Audio/Video support with speaker diarization, sentiment, and emotion analysis")
1365
+ print("πŸ“ˆ Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
1366
+
1367
  if not ANTHROPIC_API_KEY:
1368
  print("⚠️ ANTHROPIC_API_KEY not configured - analysis will show error message")
1369
  print(" For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
 
1371
  else:
1372
  print("βœ… Claude API configured")
1373
 
1374
+ if not SPEECHBRAIN_AVAILABLE:
1375
+ print("⚠️ SpeechBrain not available - transcription will be disabled")
1376
+ print(" Install with: pip install speechbrain transformers torch")
1377
+ else:
1378
+ print("βœ… SpeechBrain available for transcription")
1379
+
1380
+ if not MOVIEPY_AVAILABLE:
1381
+ print("⚠️ MoviePy not available - video processing will be limited")
1382
+ print(" Install with: pip install moviepy")
1383
+ else:
1384
+ print("βœ… MoviePy available for video processing")
1385
+
1386
+ if not DIARIZATION_AVAILABLE:
1387
+ print("⚠️ Pyannote.audio not available - speaker diarization will be disabled")
1388
+ print(" Install with: pip install pyannote.audio")
1389
+ print(" Note: Requires HuggingFace token for model access")
1390
+ else:
1391
+ print("βœ… Pyannote.audio available for speaker diarization")
1392
+
1393
+ if not SENTIMENT_AVAILABLE:
1394
+ print("⚠️ Transformers not available - sentiment/emotion analysis will be disabled")
1395
+ print(" Install with: pip install transformers torch")
1396
+ else:
1397
+ print("βœ… Transformers available for sentiment and emotion analysis")
1398
+
1399
  app.launch(show_api=False)
transcription_demo.py ADDED
@@ -0,0 +1,826 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import logging
5
+ import re
6
+ import numpy as np
7
+ import pandas as pd
8
+ from datetime import datetime
9
+ import time
10
+ import tempfile
11
+ from typing import Dict, List, Tuple, Optional
12
+ import requests
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Try to import video processing libraries
19
+ try:
20
+ import moviepy.editor as mp
21
+ MOVIEPY_AVAILABLE = True
22
+ logger.info("MoviePy available for video processing")
23
+ except ImportError as e:
24
+ logger.warning(f"MoviePy not available: {e}")
25
+ MOVIEPY_AVAILABLE = False
26
+
27
+ # Try to import speaker diarization
28
+ try:
29
+ from pyannote.audio import Pipeline
30
+ from pyannote.audio.pipelines.utils.hook import ProgressHook
31
+ DIARIZATION_AVAILABLE = True
32
+ logger.info("Pyannote.audio available for speaker diarization")
33
+ except ImportError as e:
34
+ logger.warning(f"Pyannote.audio not available: {e}")
35
+ DIARIZATION_AVAILABLE = False
36
+
37
+ # Try to import SpeechBrain and HuggingFace components
38
+ try:
39
+ from speechbrain.pretrained import EncoderDecoderASR, VAD, EncoderClassifier
40
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
41
+ import torch
42
+ SPEECHBRAIN_AVAILABLE = True
43
+ HUGGINGFACE_AVAILABLE = True
44
+ logger.info("SpeechBrain and HuggingFace models available")
45
+ except ImportError as e:
46
+ logger.warning(f"SpeechBrain/HuggingFace not available: {e}")
47
+ SPEECHBRAIN_AVAILABLE = False
48
+ HUGGINGFACE_AVAILABLE = False
49
+
50
+ # Initialize models if available
51
+ asr_model = None
52
+ vad_model = None
53
+ sentiment_model = None
54
+ emotion_model = None
55
+ diarization_pipeline = None
56
+
57
+ if SPEECHBRAIN_AVAILABLE and HUGGINGFACE_AVAILABLE:
58
+ try:
59
+ # Speech-to-text model
60
+ asr_model = EncoderDecoderASR.from_hparams(
61
+ source="speechbrain/asr-crdnn-rnnlm-librispeech",
62
+ savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
63
+ )
64
+
65
+ # Voice Activity Detection
66
+ vad_model = VAD.from_hparams(
67
+ source="speechbrain/vad-crdnn-libriparty",
68
+ savedir="pretrained_models/vad-crdnn-libriparty"
69
+ )
70
+
71
+ # Sentiment analysis
72
+ sentiment_model = pipeline(
73
+ "sentiment-analysis",
74
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
75
+ top_k=None
76
+ )
77
+
78
+ # Emotion analysis
79
+ emotion_model = pipeline(
80
+ "text-classification",
81
+ model="j-hartmann/emotion-english-distilroberta-base",
82
+ top_k=None
83
+ )
84
+
85
+ logger.info("All models loaded successfully")
86
+ except Exception as e:
87
+ logger.error(f"Error loading models: {e}")
88
+ SPEECHBRAIN_AVAILABLE = False
89
+ HUGGINGFACE_AVAILABLE = False
90
+
91
+ # Initialize diarization pipeline
92
+ if DIARIZATION_AVAILABLE:
93
+ try:
94
+ # Note: You'll need to get a HuggingFace token and accept the model terms
95
+ # at https://huggingface.co/pyannote/speaker-diarization
96
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
97
+ if HF_TOKEN:
98
+ diarization_pipeline = Pipeline.from_pretrained(
99
+ "pyannote/speaker-diarization@2.1",
100
+ use_auth_token=HF_TOKEN
101
+ )
102
+ logger.info("Speaker diarization pipeline loaded")
103
+ else:
104
+ logger.warning("HF_TOKEN not set - speaker diarization will be disabled")
105
+ except Exception as e:
106
+ logger.error(f"Error loading diarization pipeline: {e}")
107
+
108
+ def extract_audio_from_video(video_path):
109
+ """Extract audio from video file (MP4, etc.)"""
110
+ if not MOVIEPY_AVAILABLE:
111
+ return None, "MoviePy not available for video processing"
112
+
113
+ try:
114
+ # Create temporary file for audio
115
+ temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
116
+ temp_audio_path = temp_audio.name
117
+ temp_audio.close()
118
+
119
+ # Load video and extract audio
120
+ video = mp.VideoFileClip(video_path)
121
+ audio = video.audio
122
+
123
+ if audio is None:
124
+ return None, "No audio track found in video file"
125
+
126
+ # Export audio to temporary WAV file
127
+ audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
128
+
129
+ # Close video to free memory
130
+ video.close()
131
+ audio.close()
132
+
133
+ logger.info(f"Audio extracted from video: {temp_audio_path}")
134
+ return temp_audio_path, "Audio extracted successfully"
135
+
136
+ except Exception as e:
137
+ logger.error(f"Error extracting audio from video: {e}")
138
+ return None, f"Error extracting audio: {str(e)}"
139
+
140
+ def perform_speaker_diarization(audio_path):
141
+ """Perform speaker diarization on audio file"""
142
+ if not DIARIZATION_AVAILABLE or not diarization_pipeline:
143
+ return None, "Speaker diarization not available"
144
+
145
+ try:
146
+ # Perform diarization
147
+ with ProgressHook() as hook:
148
+ diarization = diarization_pipeline(audio_path, hook=hook)
149
+
150
+ # Extract speaker segments
151
+ speaker_segments = []
152
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
153
+ speaker_segments.append({
154
+ 'start': turn.start,
155
+ 'end': turn.end,
156
+ 'speaker': speaker,
157
+ 'duration': turn.end - turn.start
158
+ })
159
+
160
+ logger.info(f"Diarization completed: {len(speaker_segments)} segments found")
161
+ return speaker_segments, "Diarization completed successfully"
162
+
163
+ except Exception as e:
164
+ logger.error(f"Error in diarization: {e}")
165
+ return None, f"Diarization error: {str(e)}"
166
+
167
+ def process_audio_file(file_path):
168
+ """Process audio file, extracting from video if needed"""
169
+ if not file_path:
170
+ return None, "No file provided"
171
+
172
+ file_extension = os.path.splitext(file_path)[1].lower()
173
+
174
+ # If it's a video file, extract audio first
175
+ if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
176
+ logger.info(f"Processing video file: {file_path}")
177
+ audio_path, status = extract_audio_from_video(file_path)
178
+ if audio_path:
179
+ return audio_path, f"Video processed: {status}"
180
+ else:
181
+ return None, status
182
+
183
+ # If it's already an audio file, use it directly
184
+ elif file_extension in ['.wav', '.mp3', '.m4a', '.flac', '.ogg']:
185
+ logger.info(f"Processing audio file: {file_path}")
186
+ return file_path, "Audio file ready for transcription"
187
+
188
+ else:
189
+ return None, f"Unsupported file format: {file_extension}"
190
+
191
+ def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
192
+ """Transcribe audio with timestamps, sentiment, and metadata"""
193
+ if not audio_file:
194
+ return None, "No audio file provided"
195
+
196
+ if not SPEECHBRAIN_AVAILABLE:
197
+ return None, "SpeechBrain not available - using demo transcription"
198
+
199
+ try:
200
+ # Process the file (extract audio if it's a video)
201
+ processed_audio_path, process_status = process_audio_file(audio_file)
202
+
203
+ if not processed_audio_path:
204
+ return None, process_status
205
+
206
+ # Perform speaker diarization if enabled
207
+ speaker_segments = None
208
+ diarization_status = ""
209
+ if enable_diarization:
210
+ speaker_segments, diarization_status = perform_speaker_diarization(processed_audio_path)
211
+
212
+ # Get transcription with timestamps
213
+ transcript = asr_model.transcribe_file(processed_audio_path)
214
+
215
+ # Clean up temporary audio file if it was created from video
216
+ if processed_audio_path != audio_file and os.path.exists(processed_audio_path):
217
+ try:
218
+ os.unlink(processed_audio_path)
219
+ logger.info("Temporary audio file cleaned up")
220
+ except Exception as e:
221
+ logger.warning(f"Could not clean up temporary file: {e}")
222
+
223
+ # Split into sentences for analysis
224
+ sentences = re.split(r'[.!?]+', transcript)
225
+ sentences = [s.strip() for s in sentences if s.strip()]
226
+
227
+ # Analyze each sentence
228
+ rich_transcript = []
229
+ current_time = 0
230
+
231
+ for i, sentence in enumerate(sentences):
232
+ # Estimate timestamp (rough approximation)
233
+ timestamp = current_time + (i * 2) # Assume ~2 seconds per sentence
234
+
235
+ # Determine speaker for this timestamp
236
+ speaker = "UNKNOWN"
237
+ if speaker_segments:
238
+ for segment in speaker_segments:
239
+ if segment['start'] <= timestamp <= segment['end']:
240
+ speaker = segment['speaker']
241
+ break
242
+
243
+ # Sentiment analysis
244
+ sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
245
+ sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else {'label': 'neutral', 'score': 0.5}
246
+
247
+ # Emotion analysis
248
+ emotion_result = emotion_model(sentence)[0] if emotion_model else None
249
+ emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else {'label': 'neutral', 'score': 0.5}
250
+
251
+ # Word count and complexity metrics
252
+ words = sentence.split()
253
+ word_count = len(words)
254
+ avg_word_length = np.mean([len(word) for word in words]) if words else 0
255
+
256
+ # Calculate speech rate (words per minute estimate)
257
+ speech_rate = word_count * 30 / 60 # Rough estimate
258
+
259
+ rich_transcript.append({
260
+ 'timestamp': timestamp,
261
+ 'speaker': speaker,
262
+ 'sentence': sentence,
263
+ 'word_count': word_count,
264
+ 'avg_word_length': round(avg_word_length, 2),
265
+ 'speech_rate_wpm': round(speech_rate, 1),
266
+ 'sentiment': sentiment['label'],
267
+ 'sentiment_score': round(sentiment['score'], 3),
268
+ 'emotion': emotion['label'],
269
+ 'emotion_score': round(emotion['score'], 3)
270
+ })
271
+
272
+ current_time = timestamp
273
+
274
+ status_msg = f"Transcription completed successfully. {process_status}"
275
+ if diarization_status:
276
+ status_msg += f" {diarization_status}"
277
+
278
+ return rich_transcript, status_msg
279
+
280
+ except Exception as e:
281
+ logger.error(f"Error in transcription: {e}")
282
+ return None, f"Transcription error: {str(e)}"
283
+
284
+ def format_rich_transcript(rich_transcript):
285
+ """Format rich transcript for display"""
286
+ if not rich_transcript:
287
+ return "No transcript data available"
288
+
289
+ formatted_lines = []
290
+ for entry in rich_transcript:
291
+ timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
292
+
293
+ line = f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}"
294
+ line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
295
+ line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
296
+ line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
297
+
298
+ formatted_lines.append(line)
299
+
300
+ return '\n'.join(formatted_lines)
301
+
302
+ def calculate_slp_metrics(rich_transcript):
303
+ """Calculate comprehensive SLP metrics"""
304
+ if not rich_transcript:
305
+ return {}
306
+
307
+ # Basic metrics
308
+ total_sentences = len(rich_transcript)
309
+ total_words = sum(entry['word_count'] for entry in rich_transcript)
310
+ total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
311
+
312
+ # Speaker analysis
313
+ speakers = {}
314
+ for entry in rich_transcript:
315
+ speaker = entry['speaker']
316
+ if speaker not in speakers:
317
+ speakers[speaker] = {
318
+ 'sentences': 0,
319
+ 'words': 0,
320
+ 'sentiments': [],
321
+ 'emotions': []
322
+ }
323
+ speakers[speaker]['sentences'] += 1
324
+ speakers[speaker]['words'] += entry['word_count']
325
+ speakers[speaker]['sentiments'].append(entry['sentiment'])
326
+ speakers[speaker]['emotions'].append(entry['emotion'])
327
+
328
+ # Word-level analysis
329
+ all_words = []
330
+ for entry in rich_transcript:
331
+ words = entry['sentence'].lower().split()
332
+ all_words.extend(words)
333
+
334
+ # Word frequency distribution
335
+ word_freq = {}
336
+ for word in all_words:
337
+ word_clean = re.sub(r'[^\w\s]', '', word)
338
+ if word_clean:
339
+ word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
340
+
341
+ # Vocabulary diversity (Type-Token Ratio)
342
+ unique_words = len(set(all_words))
343
+ ttr = unique_words / total_words if total_words > 0 else 0
344
+
345
+ # Speech rate analysis
346
+ speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
347
+ avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
348
+
349
+ # Sentiment analysis
350
+ sentiment_counts = {}
351
+ emotion_counts = {}
352
+ for entry in rich_transcript:
353
+ sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
354
+ emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
355
+
356
+ # Sentence complexity
357
+ sentence_lengths = [entry['word_count'] for entry in rich_transcript]
358
+ avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
359
+
360
+ # Pause analysis (gaps between sentences)
361
+ pauses = []
362
+ for i in range(1, len(rich_transcript)):
363
+ pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
364
+ pauses.append(pause)
365
+
366
+ avg_pause_duration = np.mean(pauses) if pauses else 0
367
+
368
+ return {
369
+ 'total_sentences': total_sentences,
370
+ 'total_words': total_words,
371
+ 'total_duration_seconds': total_duration,
372
+ 'unique_words': unique_words,
373
+ 'type_token_ratio': round(ttr, 3),
374
+ 'avg_sentence_length': round(avg_sentence_length, 1),
375
+ 'avg_speech_rate_wpm': round(avg_speech_rate, 1),
376
+ 'avg_pause_duration': round(avg_pause_duration, 1),
377
+ 'sentiment_distribution': sentiment_counts,
378
+ 'emotion_distribution': emotion_counts,
379
+ 'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
380
+ 'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0,
381
+ 'speakers': speakers,
382
+ 'speaker_count': len(speakers)
383
+ }
384
+
385
+ def generate_comprehensive_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes=""):
386
+ """Generate comprehensive analysis prompt using rich transcript data"""
387
+
388
+ # Format rich transcript with timestamps and metadata
389
+ transcript_lines = []
390
+ for entry in rich_transcript:
391
+ timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
392
+ transcript_lines.append(f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}")
393
+
394
+ transcript_text = '\n'.join(transcript_lines)
395
+
396
+ # Format metrics for analysis
397
+ metrics_text = f"""
398
+ TRANSCRIPT METRICS:
399
+ β€’ Total sentences: {metrics['total_sentences']}
400
+ β€’ Total words: {metrics['total_words']}
401
+ β€’ Duration: {metrics['total_duration_seconds']:.1f} seconds
402
+ β€’ Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
403
+ β€’ Average sentence length: {metrics['avg_sentence_length']} words
404
+ β€’ Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
405
+ β€’ Speech rate variability: {metrics['speech_rate_variability']} wpm
406
+ β€’ Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
407
+ β€’ Number of speakers: {metrics['speaker_count']}
408
+
409
+ SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
410
+ EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
411
+
412
+ SPEAKER ANALYSIS:"""
413
+
414
+ for speaker, data in metrics['speakers'].items():
415
+ metrics_text += f"\nβ€’ {speaker}: {data['sentences']} sentences, {data['words']} words"
416
+
417
+ metrics_text += f"\n\nMOST FREQUENT WORDS: {list(metrics['word_frequency'].keys())[:10]}"
418
+
419
+ notes_section = f"\nSLP CLINICAL NOTES:\n{slp_notes}" if slp_notes else ""
420
+
421
+ prompt = f"""
422
+ You are a speech-language pathologist conducting a comprehensive analysis of a speech transcript with rich temporal and affective metadata.
423
+
424
+ PATIENT: {age}-year-old {gender}
425
+
426
+ {metrics_text}
427
+
428
+ TRANSCRIPT WITH TIMESTAMPS AND METADATA:
429
+ {transcript_text}{notes_section}
430
+
431
+ Please provide a comprehensive analysis including:
432
+
433
+ 1. TEMPORAL SPEECH PATTERNS:
434
+ - Analyze speech rate changes over time using timestamps
435
+ - Identify patterns in pause duration and frequency
436
+ - Assess temporal consistency in speech production
437
+ - Note any significant changes in speech patterns throughout the session
438
+
439
+ 2. AFFECTIVE AND EMOTIONAL ANALYSIS:
440
+ - Analyze sentiment patterns throughout the transcript using timestamp data
441
+ - Identify emotional shifts and their potential causes
442
+ - Assess emotional regulation and expression
443
+ - Note any correlations between emotional state and speech characteristics
444
+
445
+ 3. SPEAKER-SPECIFIC ANALYSIS (if multiple speakers):
446
+ - Compare speech patterns between speakers
447
+ - Analyze turn-taking patterns and timing
448
+ - Assess interaction dynamics
449
+ - Note speaker-specific emotional and sentiment patterns
450
+
451
+ 4. SPEECH FLUENCY AND RATE ANALYSIS:
452
+ - Analyze speech rate variability using the provided metrics
453
+ - Identify periods of fluent vs. dysfluent speech
454
+ - Assess the impact of emotional state on speech rate
455
+ - Note any temporal patterns in speech rate changes
456
+
457
+ 5. LANGUAGE COMPLEXITY ASSESSMENT:
458
+ - Analyze vocabulary diversity using Type-Token Ratio
459
+ - Assess sentence complexity and variety
460
+ - Identify patterns in word frequency and usage
461
+ - Note any temporal changes in language complexity
462
+
463
+ 6. COMPLEX SENTENCE ANALYSIS:
464
+ - Count and analyze use of coordinating conjunctions (and, but, or, so, yet, for, nor)
465
+ - Count and analyze use of subordinating conjunctions (because, although, while, since, if, when, where, that, which, who, whom, whose)
466
+ - Identify compound, complex, and compound-complex sentences
467
+ - Assess sentence variety and complexity level for age
468
+
469
+ 7. FIGURATIVE LANGUAGE ANALYSIS:
470
+ - Identify and count similes (comparisons using "like" or "as")
471
+ - Identify and count metaphors (direct comparisons without "like" or "as")
472
+ - Identify and count idioms (common expressions with non-literal meanings)
473
+ - Assess figurative language comprehension and use for age
474
+
475
+ 8. CLINICAL IMPLICATIONS:
476
+ - Specific intervention targets based on temporal patterns
477
+ - Recommendations for emotional regulation if needed
478
+ - Suggestions for improving speech rate consistency
479
+ - Strategies for enhancing language complexity
480
+ - Age-appropriate development recommendations
481
+
482
+ 9. COMPREHENSIVE SUMMARY:
483
+ - Overall communication profile with temporal considerations
484
+ - Assessment of emotional and affective communication
485
+ - Developmental appropriateness considering age
486
+ - Prognosis and treatment priorities
487
+
488
+ Use the temporal data, sentiment scores, and emotional labels to provide insights that would not be possible with a simple transcript. Reference specific timestamps and emotional states when making observations.
489
+ """
490
+
491
+ return prompt
492
+
493
+ def analyze_rich_transcript_with_llm(rich_transcript, age, gender, slp_notes=""):
494
+ """Analyze rich transcript using LLM with comprehensive metadata"""
495
+ if not rich_transcript:
496
+ return "No transcript data available for analysis."
497
+
498
+ # Calculate SLP metrics
499
+ metrics = calculate_slp_metrics(rich_transcript)
500
+
501
+ # Generate comprehensive analysis prompt
502
+ prompt = generate_comprehensive_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes)
503
+
504
+ # Get analysis from Claude API
505
+ if ANTHROPIC_API_KEY:
506
+ result = call_claude_api(prompt)
507
+ else:
508
+ result = generate_demo_analysis(rich_transcript, metrics)
509
+
510
+ return result
511
+
512
+ def call_claude_api(prompt):
513
+ """Call Claude API directly"""
514
+ if not ANTHROPIC_API_KEY:
515
+ return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
516
+
517
+ try:
518
+ headers = {
519
+ "Content-Type": "application/json",
520
+ "x-api-key": ANTHROPIC_API_KEY,
521
+ "anthropic-version": "2023-06-01"
522
+ }
523
+
524
+ data = {
525
+ "model": "claude-3-5-sonnet-20241022",
526
+ "max_tokens": 4096,
527
+ "messages": [
528
+ {
529
+ "role": "user",
530
+ "content": prompt
531
+ }
532
+ ]
533
+ }
534
+
535
+ response = requests.post(
536
+ "https://api.anthropic.com/v1/messages",
537
+ headers=headers,
538
+ json=data,
539
+ timeout=60
540
+ )
541
+
542
+ if response.status_code == 200:
543
+ response_json = response.json()
544
+ return response_json['content'][0]['text']
545
+ else:
546
+ logger.error(f"Claude API error: {response.status_code} - {response.text}")
547
+ return f"❌ Claude API Error: {response.status_code}"
548
+
549
+ except Exception as e:
550
+ logger.error(f"Error calling Claude API: {str(e)}")
551
+ return f"❌ Error: {str(e)}"
552
+
553
+ def generate_demo_analysis(rich_transcript, metrics):
554
+ """Generate demo analysis when API is not available"""
555
+ return f"""## Comprehensive SLP Analysis with Temporal and Affective Data
556
+
557
+ ### TEMPORAL SPEECH PATTERNS
558
+ **Speech Rate Analysis**: {metrics['avg_speech_rate_wpm']} words per minute (variability: {metrics['speech_rate_variability']} wpm)
559
+ - Speech rate appears {'within normal limits' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'below typical range' if metrics['avg_speech_rate_wpm'] < 120 else 'above typical range'}
560
+ - Variability suggests {'consistent' if metrics['speech_rate_variability'] < 20 else 'variable'} speech patterns
561
+
562
+ **Pause Analysis**: Average pause duration of {metrics['avg_pause_duration']:.1f} seconds
563
+ - {'Appropriate' if 0.5 <= metrics['avg_pause_duration'] <= 2.0 else 'Short' if metrics['avg_pause_duration'] < 0.5 else 'Long'} pauses between utterances
564
+
565
+ ### AFFECTIVE AND EMOTIONAL ANALYSIS
566
+ **Sentiment Distribution**: {metrics['sentiment_distribution']}
567
+ **Emotion Distribution**: {metrics['emotion_distribution']}
568
+
569
+ The emotional patterns suggest {'positive' if 'positive' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['positive'] > 2 else 'neutral' if 'neutral' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['neutral'] > 2 else 'mixed'} emotional expression throughout the session.
570
+
571
+ ### LANGUAGE COMPLEXITY
572
+ **Vocabulary Diversity**: Type-Token Ratio of {metrics['type_token_ratio']}
573
+ - {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited' if metrics['type_token_ratio'] < 0.3 else 'Moderate'} vocabulary diversity
574
+
575
+ **Sentence Structure**: Average {metrics['avg_sentence_length']} words per sentence
576
+ - Sentence length appears {'age-appropriate' if 5 <= metrics['avg_sentence_length'] <= 12 else 'below age expectations' if metrics['avg_sentence_length'] < 5 else 'above age expectations'}
577
+
578
+ **Most Frequent Words**: {', '.join(list(metrics['word_frequency'].keys())[:5])}
579
+
580
+ ### SPEAKER ANALYSIS
581
+ **Number of Speakers**: {metrics['speaker_count']}
582
+ {chr(10).join([f"β€’ {speaker}: {data['sentences']} sentences, {data['words']} words" for speaker, data in metrics['speakers'].items()])}
583
+
584
+ ### CLINICAL IMPLICATIONS
585
+ Based on the temporal and affective analysis, this patient shows:
586
+ - {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited'} vocabulary diversity
587
+ - {'Appropriate' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'Atypical'} speech rate
588
+ - {'Consistent' if metrics['speech_rate_variability'] < 20 else 'Variable'} speech patterns
589
+ - {'Positive' if 'positive' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['positive'] > 2 else 'Neutral'} emotional expression
590
+
591
+ ### RECOMMENDATIONS
592
+ 1. Focus on vocabulary expansion if TTR < 0.4
593
+ 2. Address speech rate if outside normal range
594
+ 3. Work on sentence complexity if below age expectations
595
+ 4. Consider emotional regulation strategies based on sentiment patterns
596
+ 5. Monitor temporal patterns in speech rate and fluency"""
597
+
598
+ def create_transcription_interface():
599
+ """Create the transcription-focused Gradio interface"""
600
+ with gr.Blocks(title="Advanced Transcription Tool", theme=gr.themes.Soft()) as app:
601
+ gr.Markdown("# 🎀 Advanced Transcription Tool")
602
+ gr.Markdown("Transcribe audio/video with speaker diarization, timestamps, sentiment analysis, and comprehensive LLM analysis")
603
+
604
+ with gr.Tabs():
605
+ # Audio/Video Upload & Transcription Tab
606
+ with gr.Tab("🎀 Audio/Video Transcription"):
607
+ with gr.Row():
608
+ with gr.Column(scale=1):
609
+ gr.Markdown("### File Upload")
610
+ gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
611
+
612
+ file_input = gr.File(
613
+ label="Upload Audio or Video File",
614
+ file_types=["audio", "video"]
615
+ )
616
+
617
+ enable_diarization = gr.Checkbox(
618
+ label="Enable Speaker Diarization",
619
+ value=True,
620
+ info="Identify different speakers in the audio"
621
+ )
622
+
623
+ transcribe_btn = gr.Button(
624
+ "🎀 Transcribe File",
625
+ variant="primary",
626
+ size="lg"
627
+ )
628
+
629
+ transcription_status = gr.Markdown("")
630
+
631
+ with gr.Column(scale=2):
632
+ gr.Markdown("### Rich Transcript with Metadata")
633
+
634
+ rich_transcript_display = gr.Textbox(
635
+ label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
636
+ lines=15,
637
+ max_lines=20
638
+ )
639
+
640
+ # Analysis Tab
641
+ with gr.Tab("πŸ“Š LLM Analysis"):
642
+ with gr.Row():
643
+ with gr.Column(scale=1):
644
+ gr.Markdown("### Patient Information")
645
+
646
+ with gr.Row():
647
+ age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
648
+ gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
649
+
650
+ slp_notes = gr.Textbox(
651
+ label="SLP Clinical Notes (Optional)",
652
+ placeholder="Enter additional clinical observations...",
653
+ lines=3
654
+ )
655
+
656
+ analyze_btn = gr.Button(
657
+ "πŸ” Analyze with LLM",
658
+ variant="primary",
659
+ size="lg"
660
+ )
661
+
662
+ with gr.Column(scale=2):
663
+ gr.Markdown("### Comprehensive LLM Analysis")
664
+
665
+ analysis_output = gr.Textbox(
666
+ label="LLM Analysis Report",
667
+ lines=25,
668
+ max_lines=30
669
+ )
670
+
671
+ # Metrics Tab
672
+ with gr.Tab("πŸ“ˆ Speech Metrics"):
673
+ with gr.Row():
674
+ with gr.Column():
675
+ gr.Markdown("### Quantitative Speech Metrics")
676
+
677
+ metrics_display = gr.Textbox(
678
+ label="SLP Metrics",
679
+ lines=15,
680
+ max_lines=20
681
+ )
682
+
683
+ with gr.Column():
684
+ gr.Markdown("### Word Frequency Analysis")
685
+
686
+ word_freq_display = gr.Dataframe(
687
+ headers=["Word", "Frequency"],
688
+ label="Most Frequent Words",
689
+ interactive=False
690
+ )
691
+
692
+ # Raw Data Tab
693
+ with gr.Tab("πŸ“Š Raw Data"):
694
+ with gr.Row():
695
+ with gr.Column():
696
+ gr.Markdown("### JSON Data")
697
+
698
+ json_display = gr.Textbox(
699
+ label="Raw JSON Data",
700
+ lines=20,
701
+ max_lines=25
702
+ )
703
+
704
+ # Event handlers
705
+ def on_transcribe(file, diarization_enabled):
706
+ """Handle file transcription"""
707
+ if not file:
708
+ return "", "", "", "Please upload a file first."
709
+
710
+ rich_transcript, status = transcribe_audio_with_metadata(file.name, diarization_enabled)
711
+
712
+ if rich_transcript:
713
+ formatted = format_rich_transcript(rich_transcript)
714
+ metrics = calculate_slp_metrics(rich_transcript)
715
+
716
+ # Format metrics for display
717
+ metrics_text = f"""SPEECH METRICS:
718
+ β€’ Total sentences: {metrics['total_sentences']}
719
+ β€’ Total words: {metrics['total_words']}
720
+ β€’ Duration: {metrics['total_duration_seconds']:.1f} seconds
721
+ β€’ Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
722
+ β€’ Average sentence length: {metrics['avg_sentence_length']} words
723
+ β€’ Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
724
+ β€’ Speech rate variability: {metrics['speech_rate_variability']} wpm
725
+ β€’ Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
726
+ β€’ Number of speakers: {metrics['speaker_count']}
727
+
728
+ SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
729
+ EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
730
+
731
+ SPEAKER ANALYSIS:"""
732
+
733
+ for speaker, data in metrics['speakers'].items():
734
+ metrics_text += f"\nβ€’ {speaker}: {data['sentences']} sentences, {data['words']} words"
735
+
736
+ # Create word frequency dataframe
737
+ word_freq_data = [[word, freq] for word, freq in list(metrics['word_frequency'].items())[:20]]
738
+
739
+ # JSON data
740
+ json_data = json.dumps(rich_transcript, indent=2)
741
+
742
+ return formatted, metrics_text, word_freq_data, status
743
+ else:
744
+ return "", "", [], status
745
+
746
+ def on_analyze(rich_transcript_text, age_val, gender_val, notes):
747
+ """Handle LLM analysis"""
748
+ if not rich_transcript_text or rich_transcript_text == "No transcript data available":
749
+ return "Please transcribe audio first."
750
+
751
+ # Convert formatted text back to rich transcript structure
752
+ lines = rich_transcript_text.split('\n')
753
+ rich_transcript = []
754
+
755
+ for i, line in enumerate(lines):
756
+ if line.strip():
757
+ # Extract data from the formatted line
758
+ timestamp_match = re.search(r'\[(\d{2}:\d{2})\]', line)
759
+ speaker_match = re.search(r'\*(\w+):', line)
760
+ sentence_match = re.search(r'\*\w+:\s*(.+?)(?=\s*\[|$)', line)
761
+
762
+ if timestamp_match and speaker_match and sentence_match:
763
+ timestamp_str = timestamp_match.group(1)
764
+ minutes, seconds = map(int, timestamp_str.split(':'))
765
+ timestamp = minutes * 60 + seconds
766
+
767
+ speaker = speaker_match.group(1)
768
+ sentence = sentence_match.group(1).strip()
769
+
770
+ rich_transcript.append({
771
+ 'timestamp': timestamp,
772
+ 'speaker': speaker,
773
+ 'sentence': sentence,
774
+ 'word_count': len(sentence.split()),
775
+ 'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
776
+ 'speech_rate_wpm': 120.0,
777
+ 'sentiment': 'neutral',
778
+ 'sentiment_score': 0.5,
779
+ 'emotion': 'neutral',
780
+ 'emotion_score': 0.5
781
+ })
782
+
783
+ return analyze_rich_transcript_with_llm(rich_transcript, age_val, gender_val, notes)
784
+
785
+ # Connect event handlers
786
+ transcribe_btn.click(
787
+ on_transcribe,
788
+ inputs=[file_input, enable_diarization],
789
+ outputs=[rich_transcript_display, metrics_display, word_freq_display, transcription_status]
790
+ )
791
+
792
+ analyze_btn.click(
793
+ on_analyze,
794
+ inputs=[rich_transcript_display, age, gender, slp_notes],
795
+ outputs=[analysis_output]
796
+ )
797
+
798
+ return app
799
+
800
+ if __name__ == "__main__":
801
+ print("πŸš€ Starting Advanced Transcription Tool...")
802
+
803
+ if not MOVIEPY_AVAILABLE:
804
+ print("⚠️ MoviePy not available - video processing will be limited")
805
+ print(" Install with: pip install moviepy")
806
+ else:
807
+ print("βœ… MoviePy available for video processing")
808
+
809
+ if not DIARIZATION_AVAILABLE:
810
+ print("⚠️ Pyannote.audio not available - speaker diarization will be disabled")
811
+ print(" Install with: pip install pyannote.audio")
812
+ else:
813
+ print("βœ… Pyannote.audio available for speaker diarization")
814
+ if not os.getenv("HF_TOKEN"):
815
+ print("⚠️ HF_TOKEN not set - set it to enable speaker diarization")
816
+ print(" Get token from: https://huggingface.co/settings/tokens")
817
+ print(" Accept model terms at: https://huggingface.co/pyannote/speaker-diarization")
818
+
819
+ if not SPEECHBRAIN_AVAILABLE:
820
+ print("⚠️ SpeechBrain not available - audio transcription will use demo mode")
821
+ print(" Install with: pip install speechbrain transformers torch")
822
+ else:
823
+ print("βœ… SpeechBrain and HuggingFace models loaded")
824
+
825
+ app = create_transcription_interface()
826
+ app.launch(show_api=False)