Spaces:

SreekarB
/

SLPAnalysis

Sleeping

App Files Files Community

SreekarB commited on Jun 23

Commit

d60565b

verified ·

1 Parent(s): d76b08e

Upload 8 files

Browse files

Files changed (6) hide show

enhanced_casl_app.py +590 -0
moderate_casl_app.py +7 -2
moderate_casl_app_fixed.py +406 -0
requirements.txt +15 -2
simple_casl_app.py +1089 -112
transcription_demo.py +826 -0

enhanced_casl_app.py ADDED Viewed

	@@ -0,0 +1,590 @@

+import gradio as gr
+import json
+import os
+import logging
+import requests
+import re
+import numpy as np
+import pandas as pd
+from datetime import datetime
+import time
+from typing import Dict, List, Tuple, Optional
+import tempfile
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Anthropic API key
+ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
+# Try to import SpeechBrain and HuggingFace components
+try:
+    from speechbrain.pretrained import EncoderDecoderASR, VAD, EncoderClassifier
+    from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+    import torch
+    SPEECHBRAIN_AVAILABLE = True
+    HUGGINGFACE_AVAILABLE = True
+    logger.info("SpeechBrain and HuggingFace models available")
+except ImportError as e:
+    logger.warning(f"SpeechBrain/HuggingFace not available: {e}")
+    SPEECHBRAIN_AVAILABLE = False
+    HUGGINGFACE_AVAILABLE = False
+# Initialize models if available
+asr_model = None
+vad_model = None
+sentiment_model = None
+emotion_model = None
+if SPEECHBRAIN_AVAILABLE and HUGGINGFACE_AVAILABLE:
+    try:
+        # Speech-to-text model
+        asr_model = EncoderDecoderASR.from_hparams(
+            source="speechbrain/asr-crdnn-rnnlm-librispeech",
+            savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
+        )
+        # Voice Activity Detection
+        vad_model = VAD.from_hparams(
+            source="speechbrain/vad-crdnn-libriparty",
+            savedir="pretrained_models/vad-crdnn-libriparty"
+        )
+        # Sentiment analysis
+        sentiment_model = pipeline(
+            "sentiment-analysis",
+            model="cardiffnlp/twitter-roberta-base-sentiment-latest",
+            return_all_scores=True
+        )
+        # Emotion analysis
+        emotion_model = pipeline(
+            "text-classification",
+            model="j-hartmann/emotion-english-distilroberta-base",
+            return_all_scores=True
+        )
+        logger.info("All models loaded successfully")
+    except Exception as e:
+        logger.error(f"Error loading models: {e}")
+        SPEECHBRAIN_AVAILABLE = False
+        HUGGINGFACE_AVAILABLE = False
+def call_claude_api(prompt):
+    """Call Claude API directly"""
+    if not ANTHROPIC_API_KEY:
+        return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
+    try:
+        headers = {
+            "Content-Type": "application/json",
+            "x-api-key": ANTHROPIC_API_KEY,
+            "anthropic-version": "2023-06-01"
+        }
+        data = {
+            "model": "claude-3-5-sonnet-20241022",
+            "max_tokens": 4096,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        }
+        response = requests.post(
+            "https://api.anthropic.com/v1/messages",
+            headers=headers,
+            json=data,
+            timeout=60
+        )
+        if response.status_code == 200:
+            response_json = response.json()
+            return response_json['content'][0]['text']
+        else:
+            logger.error(f"Claude API error: {response.status_code} - {response.text}")
+            return f"❌ Claude API Error: {response.status_code}"
+    except Exception as e:
+        logger.error(f"Error calling Claude API: {str(e)}")
+        return f"❌ Error: {str(e)}"
+def transcribe_audio_with_metadata(audio_file):
+    """Transcribe audio with timestamps, sentiment, and metadata"""
+    if not audio_file:
+        return None, "No audio file provided"
+    if not SPEECHBRAIN_AVAILABLE:
+        return None, "SpeechBrain not available - using demo transcription"
+    try:
+        # Get transcription with timestamps
+        transcript = asr_model.transcribe_file(audio_file)
+        # Split into sentences for analysis
+        sentences = re.split(r'[.!?]+', transcript)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        # Analyze each sentence
+        rich_transcript = []
+        current_time = 0
+        for i, sentence in enumerate(sentences):
+            # Estimate timestamp (rough approximation)
+            timestamp = current_time + (i * 2)  # Assume ~2 seconds per sentence
+            # Sentiment analysis
+            sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
+            sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else {'label': 'neutral', 'score': 0.5}
+            # Emotion analysis
+            emotion_result = emotion_model(sentence)[0] if emotion_model else None
+            emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else {'label': 'neutral', 'score': 0.5}
+            # Word count and complexity metrics
+            words = sentence.split()
+            word_count = len(words)
+            avg_word_length = np.mean([len(word) for word in words]) if words else 0
+            # Calculate speech rate (words per minute estimate)
+            speech_rate = word_count * 30 / 60  # Rough estimate
+            rich_transcript.append({
+                'timestamp': timestamp,
+                'sentence': sentence,
+                'word_count': word_count,
+                'avg_word_length': round(avg_word_length, 2),
+                'speech_rate_wpm': round(speech_rate, 1),
+                'sentiment': sentiment['label'],
+                'sentiment_score': round(sentiment['score'], 3),
+                'emotion': emotion['label'],
+                'emotion_score': round(emotion['score'], 3)
+            })
+            current_time = timestamp
+        return rich_transcript, "Transcription completed successfully"
+    except Exception as e:
+        logger.error(f"Error in transcription: {e}")
+        return None, f"Transcription error: {str(e)}"
+def format_rich_transcript(rich_transcript):
+    """Format rich transcript for display"""
+    if not rich_transcript:
+        return "No transcript data available"
+    formatted_lines = []
+    for entry in rich_transcript:
+        timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
+        line = f"[{timestamp_str}] *PAR: {entry['sentence']}"
+        line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
+        line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
+        line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
+        formatted_lines.append(line)
+    return '\n'.join(formatted_lines)
+def calculate_slp_metrics(rich_transcript):
+    """Calculate comprehensive SLP metrics"""
+    if not rich_transcript:
+        return {}
+    # Basic metrics
+    total_sentences = len(rich_transcript)
+    total_words = sum(entry['word_count'] for entry in rich_transcript)
+    total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
+    # Word-level analysis
+    all_words = []
+    for entry in rich_transcript:
+        words = entry['sentence'].lower().split()
+        all_words.extend(words)
+    # Word frequency distribution
+    word_freq = {}
+    for word in all_words:
+        word_clean = re.sub(r'[^\w\s]', '', word)
+        if word_clean:
+            word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
+    # Vocabulary diversity (Type-Token Ratio)
+    unique_words = len(set(all_words))
+    ttr = unique_words / total_words if total_words > 0 else 0
+    # Speech rate analysis
+    speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
+    avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
+    # Sentiment analysis
+    sentiment_counts = {}
+    emotion_counts = {}
+    for entry in rich_transcript:
+        sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
+        emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
+    # Sentence complexity
+    sentence_lengths = [entry['word_count'] for entry in rich_transcript]
+    avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
+    # Pause analysis (gaps between sentences)
+    pauses = []
+    for i in range(1, len(rich_transcript)):
+        pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
+        pauses.append(pause)
+    avg_pause_duration = np.mean(pauses) if pauses else 0
+    return {
+        'total_sentences': total_sentences,
+        'total_words': total_words,
+        'total_duration_seconds': total_duration,
+        'unique_words': unique_words,
+        'type_token_ratio': round(ttr, 3),
+        'avg_sentence_length': round(avg_sentence_length, 1),
+        'avg_speech_rate_wpm': round(avg_speech_rate, 1),
+        'avg_pause_duration': round(avg_pause_duration, 1),
+        'sentiment_distribution': sentiment_counts,
+        'emotion_distribution': emotion_counts,
+        'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
+        'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0
+    }
+def generate_slp_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes=""):
+    """Generate comprehensive SLP analysis prompt"""
+    # Format metrics for the prompt
+    metrics_text = f"""
+TRANSCRIPT METRICS:
+- Total sentences: {metrics['total_sentences']}
+- Total words: {metrics['total_words']}
+- Duration: {metrics['total_duration_seconds']:.1f} seconds
+- Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
+- Average sentence length: {metrics['avg_sentence_length']} words
+- Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
+- Speech rate variability: {metrics['speech_rate_variability']} wpm
+- Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
+SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
+EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
+MOST FREQUENT WORDS: {list(metrics['word_frequency'].keys())[:10]}
+"""
+    # Format rich transcript for analysis
+    transcript_text = format_rich_transcript(rich_transcript)
+    notes_section = f"\nSLP CLINICAL NOTES:\n{slp_notes}" if slp_notes else ""
+    prompt = f"""
+You are a speech-language pathologist conducting a comprehensive analysis of a speech transcript with rich metadata.
+PATIENT: {age}-year-old {gender}
+{metrics_text}
+TRANSCRIPT WITH METADATA:
+{transcript_text}{notes_section}
+Please provide a comprehensive analysis including:
+1. SPEECH FLUENCY ANALYSIS:
+   - Speech rate patterns and variability
+   - Pause patterns and their significance
+   - Overall fluency assessment
+2. LANGUAGE COMPLEXITY:
+   - Vocabulary diversity and word frequency patterns
+   - Sentence structure and complexity
+   - Language development level assessment
+3. EMOTIONAL AND AFFECTIVE ANALYSIS:
+   - Sentiment patterns throughout the transcript
+   - Emotional expression and regulation
+   - Impact on communication effectiveness
+4. SPEECH FACTORS:
+   - Word retrieval patterns
+   - Grammatical accuracy
+   - Repetitions and revisions
+5. CLINICAL IMPLICATIONS:
+   - Specific intervention targets
+   - Strengths and areas for improvement
+   - Recommendations for therapy
+6. COMPREHENSIVE SUMMARY:
+   - Overall communication profile
+   - Developmental appropriateness
+   - Prognosis and treatment priorities
+Use the quantitative metrics and qualitative observations to support your analysis.
+"""
+    return prompt
+def analyze_rich_transcript(rich_transcript, age, gender, slp_notes=""):
+    """Analyze rich transcript with comprehensive metrics"""
+    if not rich_transcript:
+        return "No transcript data available for analysis."
+    # Calculate SLP metrics
+    metrics = calculate_slp_metrics(rich_transcript)
+    # Generate analysis prompt
+    prompt = generate_slp_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes)
+    # Get analysis from Claude API
+    if ANTHROPIC_API_KEY:
+        result = call_claude_api(prompt)
+    else:
+        result = generate_demo_analysis(rich_transcript, metrics)
+    return result
+def generate_demo_analysis(rich_transcript, metrics):
+    """Generate demo analysis when API is not available"""
+    return f"""## Comprehensive SLP Analysis
+### SPEECH FLUENCY ANALYSIS
+**Speech Rate**: {metrics['avg_speech_rate_wpm']} words per minute (variability: {metrics['speech_rate_variability']} wpm)
+- Speech rate appears {'within normal limits' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'below typical range' if metrics['avg_speech_rate_wpm'] < 120 else 'above typical range'}
+- Variability suggests {'consistent' if metrics['speech_rate_variability'] < 20 else 'variable'} speech patterns
+**Pause Analysis**: Average pause duration of {metrics['avg_pause_duration']:.1f} seconds
+- {'Appropriate' if 0.5 <= metrics['avg_pause_duration'] <= 2.0 else 'Short' if metrics['avg_pause_duration'] < 0.5 else 'Long'} pauses between utterances
+### LANGUAGE COMPLEXITY
+**Vocabulary Diversity**: Type-Token Ratio of {metrics['type_token_ratio']}
+- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited' if metrics['type_token_ratio'] < 0.3 else 'Moderate'} vocabulary diversity
+**Sentence Structure**: Average {metrics['avg_sentence_length']} words per sentence
+- Sentence length appears {'age-appropriate' if 5 <= metrics['avg_sentence_length'] <= 12 else 'below age expectations' if metrics['avg_sentence_length'] < 5 else 'above age expectations'}
+**Most Frequent Words**: {', '.join(list(metrics['word_frequency'].keys())[:5])}
+### EMOTIONAL AND AFFECTIVE ANALYSIS
+**Sentiment Distribution**: {metrics['sentiment_distribution']}
+**Emotion Distribution**: {metrics['emotion_distribution']}
+### CLINICAL IMPLICATIONS
+Based on the quantitative analysis, this patient shows:
+- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited'} vocabulary diversity
+- {'Appropriate' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'Atypical'} speech rate
+- {'Consistent' if metrics['speech_rate_variability'] < 20 else 'Variable'} speech patterns
+### RECOMMENDATIONS
+1. Focus on vocabulary expansion if TTR < 0.4
+2. Address speech rate if outside normal range
+3. Work on sentence complexity if below age expectations
+4. Consider emotional regulation strategies based on sentiment patterns"""
+def create_enhanced_interface():
+    """Create the enhanced Gradio interface"""
+    with gr.Blocks(title="Enhanced CASL Analysis Tool", theme=gr.themes.Soft()) as app:
+        gr.Markdown("# 🗣️ Enhanced CASL Analysis Tool")
+        gr.Markdown("Advanced speech analysis with sentiment, timestamps, and comprehensive SLP metrics")
+        with gr.Tabs():
+            # Audio Upload & Transcription Tab
+            with gr.Tab("🎤 Audio Analysis"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Audio Upload")
+                        audio_input = gr.Audio(
+                            type="filepath",
+                            label="Upload Audio Recording"
+                        )
+                        transcribe_btn = gr.Button(
+                            "🎤 Transcribe & Analyze",
+                            variant="primary",
+                            size="lg"
+                        )
+                        transcription_status = gr.Markdown("")
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Rich Transcript")
+                        rich_transcript_display = gr.Textbox(
+                            label="Transcription with Timestamps & Sentiment",
+                            lines=15,
+                            max_lines=20
+                        )
+            # Analysis Tab
+            with gr.Tab("📊 Analysis"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Patient Information")
+                        with gr.Row():
+                            age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
+                            gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
+                        slp_notes = gr.Textbox(
+                            label="SLP Clinical Notes (Optional)",
+                            placeholder="Enter additional clinical observations...",
+                            lines=3
+                        )
+                        analyze_btn = gr.Button(
+                            "🔍 Analyze Transcript",
+                            variant="primary",
+                            size="lg"
+                        )
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Comprehensive Analysis")
+                        analysis_output = gr.Textbox(
+                            label="SLP Analysis Report",
+                            lines=25,
+                            max_lines=30
+                        )
+            # Metrics Tab
+            with gr.Tab("📈 Metrics Dashboard"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### Quantitative Metrics")
+                        metrics_display = gr.JSON(
+                            label="SLP Metrics",
+                            interactive=False
+                        )
+                    with gr.Column():
+                        gr.Markdown("### Word Frequency")
+                        word_freq_display = gr.Dataframe(
+                            headers=["Word", "Frequency"],
+                            label="Most Frequent Words",
+                            interactive=False
+                        )
+        # Event handlers
+        def on_transcribe(audio_file):
+            """Handle audio transcription"""
+            if not audio_file:
+                return "", "Please upload an audio file first."
+            rich_transcript, status = transcribe_audio_with_metadata(audio_file)
+            if rich_transcript:
+                formatted = format_rich_transcript(rich_transcript)
+                return formatted, status
+            else:
+                return "", status
+        def on_analyze(rich_transcript_text, age_val, gender_val, notes):
+            """Handle analysis"""
+            # Convert formatted text back to rich transcript structure
+            # This is a simplified version - in practice you'd want to store the rich data
+            if not rich_transcript_text or rich_transcript_text == "No transcript data available":
+                return "Please transcribe audio first."
+            # For demo purposes, create a simple rich transcript from the text
+            lines = rich_transcript_text.split('\n')
+            rich_transcript = []
+            for i, line in enumerate(lines):
+                if line.strip():
+                    # Extract sentence from the line
+                    sentence_match = re.search(r'\*PAR: (.+?)(?=\s*\[|$)', line)
+                    if sentence_match:
+                        sentence = sentence_match.group(1).strip()
+                        rich_transcript.append({
+                            'timestamp': i * 2,
+                            'sentence': sentence,
+                            'word_count': len(sentence.split()),
+                            'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
+                            'speech_rate_wpm': 120.0,
+                            'sentiment': 'neutral',
+                            'sentiment_score': 0.5,
+                            'emotion': 'neutral',
+                            'emotion_score': 0.5
+                        })
+            return analyze_rich_transcript(rich_transcript, age_val, gender_val, notes)
+        def update_metrics(rich_transcript_text):
+            """Update metrics display"""
+            if not rich_transcript_text or rich_transcript_text == "No transcript data available":
+                return {}, []
+            # Convert text back to rich transcript (simplified)
+            lines = rich_transcript_text.split('\n')
+            rich_transcript = []
+            for i, line in enumerate(lines):
+                if line.strip():
+                    sentence_match = re.search(r'\*PAR: (.+?)(?=\s*\[|$)', line)
+                    if sentence_match:
+                        sentence = sentence_match.group(1).strip()
+                        rich_transcript.append({
+                            'timestamp': i * 2,
+                            'sentence': sentence,
+                            'word_count': len(sentence.split()),
+                            'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
+                            'speech_rate_wpm': 120.0,
+                            'sentiment': 'neutral',
+                            'sentiment_score': 0.5,
+                            'emotion': 'neutral',
+                            'emotion_score': 0.5
+                        })
+            metrics = calculate_slp_metrics(rich_transcript)
+            # Create word frequency dataframe
+            word_freq_data = [[word, freq] for word, freq in list(metrics['word_frequency'].items())[:20]]
+            return metrics, word_freq_data
+        # Connect event handlers
+        transcribe_btn.click(
+            on_transcribe,
+            inputs=[audio_input],
+            outputs=[rich_transcript_display, transcription_status]
+        )
+        analyze_btn.click(
+            on_analyze,
+            inputs=[rich_transcript_display, age, gender, slp_notes],
+            outputs=[analysis_output]
+        )
+        # Update metrics when transcript changes
+        rich_transcript_display.change(
+            update_metrics,
+            inputs=[rich_transcript_display],
+            outputs=[metrics_display, word_freq_display]
+        )
+    return app
+if __name__ == "__main__":
+    print("🚀 Starting Enhanced CASL Analysis Tool...")
+    if not ANTHROPIC_API_KEY:
+        print("⚠️  ANTHROPIC_API_KEY not configured - analysis will show demo response")
+        print("   For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
+        print("   For local use: export ANTHROPIC_API_KEY='your-key-here'")
+    else:
+        print("✅ Claude API configured")
+    if not SPEECHBRAIN_AVAILABLE:
+        print("⚠️  SpeechBrain not available - audio transcription will use demo mode")
+        print("   Install with: pip install speechbrain transformers torch")
+    else:
+        print("✅ SpeechBrain and HuggingFace models loaded")
+    app = create_enhanced_interface()
+    app.launch(show_api=False)

moderate_casl_app.py CHANGED Viewed

@@ -155,14 +155,19 @@ def call_bedrock(prompt, max_tokens=4096):
             "messages": [
                 {
                     "role": "user",
-                    "content": prompt
                 }
             ],
             "temperature": 0.3,
             "top_p": 0.9
         })
-        modelId = 'anthropic.claude-3-sonnet-20240229-v1:0'
         response = bedrock_client.invoke_model(
             body=body,
             modelId=modelId,

             "messages": [
                 {
                     "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        }
+                    ]
                 }
             ],
             "temperature": 0.3,
             "top_p": 0.9
         })
+        modelId = 'anthropic.claude-3-5-sonnet-20240620-v1:0'
         response = bedrock_client.invoke_model(
             body=body,
             modelId=modelId,

moderate_casl_app_fixed.py ADDED Viewed

	@@ -0,0 +1,406 @@

+import gradio as gr
+import json
+import os
+import logging
+import requests
+import re
+from datetime import datetime
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Anthropic API key - can be set as HuggingFace secret or environment variable
+ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
+# Check if API key is available
+if ANTHROPIC_API_KEY:
+    logger.info("Claude API key found")
+else:
+    logger.warning("Claude API key not found - using demo mode")
+def call_claude_api(prompt):
+    """Call Claude API directly"""
+    if not ANTHROPIC_API_KEY:
+        return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
+    try:
+        headers = {
+            "Content-Type": "application/json",
+            "x-api-key": ANTHROPIC_API_KEY,
+            "anthropic-version": "2023-06-01"
+        }
+        data = {
+            "model": "claude-3-5-sonnet-20241022",
+            "max_tokens": 4096,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        }
+        response = requests.post(
+            "https://api.anthropic.com/v1/messages",
+            headers=headers,
+            json=data,
+            timeout=60
+        )
+        if response.status_code == 200:
+            response_json = response.json()
+            return response_json['content'][0]['text']
+        else:
+            logger.error(f"Claude API error: {response.status_code} - {response.text}")
+            return f"❌ Claude API Error: {response.status_code}"
+    except Exception as e:
+        logger.error(f"Error calling Claude API: {str(e)}")
+        return f"❌ Error: {str(e)}"
+def process_file(file):
+    """Process uploaded file"""
+    if file is None:
+        return "Please upload a file first."
+    try:
+        # Read file content
+        with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+        if not content.strip():
+            return "File appears to be empty."
+        return content
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+def read_cha_file(file_path):
+    """Read and parse a .cha transcript file"""
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+        # Extract participant lines (starting with *PAR:)
+        par_lines = []
+        for line in content.splitlines():
+            if line.startswith('*PAR:'):
+                par_lines.append(line)
+        # If no PAR lines found, just return the whole content
+        if not par_lines:
+            return content
+        return '\n'.join(par_lines)
+    except Exception as e:
+        logger.error(f"Error reading CHA file: {str(e)}")
+        return ""
+def process_upload(file):
+    """Process an uploaded file (text or CHA)"""
+    if file is None:
+        return ""
+    file_path = file.name
+    if file_path.endswith('.cha'):
+        return read_cha_file(file_path)
+    else:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            return f.read()
+def generate_demo_response(prompt):
+    """Generate a demo response when API is not available"""
+    return """## Speech Factors Analysis
+**Difficulty producing fluent speech**: 8 instances, moderate severity
+- Examples: "today I would &-um like to talk about &-um a fun trip"
+- "we went to the &-um &-um beach [//] no to the mountains [//] I mean the beach actually"
+**Word retrieval issues**: 6 instances, mild-moderate severity
+- Examples: "what do you call those &-um &-um sprinkles! that's the word"
+- "sometimes I forget [//] forgetted [: forgot] [*] what they call those things we built"
+**Grammatical errors**: 4 instances, moderate severity
+- Examples: "after swimming we [//] I eat [: ate] [*] &-um ice cream"
+- "we saw [/] saw fishies [: fish] [*] swimming in the water"
+**Repetitions and revisions**: 5 instances, mild severity
+- Examples: "we [/] we stayed for &-um three no [//] four days"
+- "I want to go back to the beach [/] beach next year"
+## Language Skills Assessment
+**Lexical/Semantic Skills**:
+- Vocabulary diversity appears age-appropriate with some word-finding difficulties
+- Examples: "what do you call those &-um &-um sprinkles! that's the word"
+- Shows good semantic understanding but retrieval challenges
+**Syntactic Skills**:
+- Basic sentence structure is intact with some grammatical inconsistencies
+- Examples: "my brother he [//] he helped me dig a big hole"
+- Verb tense errors noted: "forgetted" for "forgot", "eat" for "ate"
+**Supralinguistic Skills**:
+- Narrative organization is good with logical sequence
+- Examples: "sometimes I wonder [/] wonder where fishies [: fish] [*] go when it's cold"
+- Shows creative thinking and topic maintenance
+## Treatment Recommendations
+1. **Word-finding strategies**: Implement semantic cuing techniques using the patient's experiences (beach, ice cream) as context
+2. **Grammar practice**: Focus on verb tense consistency with structured exercises
+3. **Fluency techniques**: Work on reducing fillers and improving speech flow
+4. **Self-monitoring**: Help patient identify and correct grammatical errors
+5. **Vocabulary expansion**: Build on existing semantic networks
+## Clinical Summary
+This child demonstrates a mild-to-moderate expressive language disorder with primary concerns in word retrieval and grammatical accuracy. Strengths include good narrative organization and topic maintenance. The pattern suggests intervention should focus on word-finding strategies and grammatical form practice while building on existing semantic knowledge."""
+def analyze_transcript(transcript, age, gender, slp_notes=""):
+    """Analyze a speech transcript using Claude"""
+    if not transcript or len(transcript.strip()) < 50:
+        return "Error: Please provide a longer transcript for analysis."
+    # Add SLP notes to the prompt if provided
+    notes_section = ""
+    if slp_notes and slp_notes.strip():
+        notes_section = f"""
+    SLP CLINICAL NOTES:
+    {slp_notes.strip()}
+    """
+    # Simplified analysis prompt
+    prompt = f"""
+    You are a speech-language pathologist analyzing a transcript for CASL assessment.
+    Patient: {age}-year-old {gender}
+    TRANSCRIPT:
+    {transcript}{notes_section}
+    Please provide a comprehensive CASL analysis including:
+    1. SPEECH FACTORS (with counts and severity):
+    - Difficulty producing fluent speech
+    - Word retrieval issues
+    - Grammatical errors
+    - Repetitions and revisions
+    2. LANGUAGE SKILLS ASSESSMENT:
+    - Lexical/Semantic Skills (qualitative assessment)
+    - Syntactic Skills (qualitative assessment)
+    - Supralinguistic Skills (qualitative assessment)
+    3. TREATMENT RECOMMENDATIONS:
+    - List 3-5 specific intervention strategies
+    4. CLINICAL SUMMARY:
+    - Brief explanation of findings and prognosis
+    Use exact quotes from the transcript as evidence.
+    Focus on qualitative observations rather than standardized scores.
+    Be specific and provide concrete examples from the transcript.
+    {f"Consider the SLP clinical notes in your analysis." if slp_notes and slp_notes.strip() else ""}
+    """
+    # Get analysis from Claude API or demo
+    if ANTHROPIC_API_KEY:
+        result = call_claude_api(prompt)
+    else:
+        result = generate_demo_response(prompt)
+    return result
+def create_interface():
+    """Create the Gradio interface"""
+    with gr.Blocks(title="Enhanced CASL Analysis Tool", theme=gr.themes.Soft()) as app:
+        gr.Markdown("# 🗣️ Enhanced CASL Analysis Tool")
+        gr.Markdown("Upload a speech transcript and get comprehensive CASL assessment results.")
+        with gr.Tabs():
+            # Analysis Tab
+            with gr.Tab("📊 Analysis"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### Patient Information")
+                        with gr.Row():
+                            age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
+                            gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
+                        slp_notes = gr.Textbox(
+                            label="SLP Clinical Notes (Optional)",
+                            placeholder="Enter any additional clinical observations, context, or notes...",
+                            lines=3
+                        )
+                        gr.Markdown("### Transcript Input")
+                        file_upload = gr.File(
+                            label="Upload Transcript File",
+                            file_types=[".txt", ".cha"]
+                        )
+                        transcript = gr.Textbox(
+                            label="Or Paste Transcript Here",
+                            placeholder="Enter transcript text or upload a file...",
+                            lines=10
+                        )
+                        analyze_btn = gr.Button("🔍 Analyze Transcript", variant="primary")
+                    with gr.Column():
+                        gr.Markdown("### Analysis Results")
+                        analysis_output = gr.Textbox(
+                            label="CASL Analysis Report",
+                            placeholder="Analysis results will appear here...",
+                            lines=25,
+                            max_lines=30
+                        )
+            # Sample Transcripts Tab
+            with gr.Tab("📝 Sample Transcripts"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### Sample Transcripts")
+                        sample_choice = gr.Dropdown(
+                            choices=[
+                                "Beach Trip (Child)",
+                                "School Day (Adolescent)",
+                                "Adult Recovery"
+                            ],
+                            label="Select a sample transcript:",
+                            value="Beach Trip (Child)"
+                        )
+                        load_sample_btn = gr.Button("Load Sample", variant="secondary")
+                        sample_transcript = gr.Textbox(
+                            label="Sample Transcript",
+                            lines=15,
+                            interactive=False
+                        )
+                        use_sample_btn = gr.Button("Use This Sample for Analysis", variant="primary")
+                    with gr.Column():
+                        gr.Markdown("### Sample Descriptions")
+                        gr.Markdown("""
+                        **Beach Trip (Child)**: 8-year-old child describing a family beach vacation
+                        - Shows typical child language patterns
+                        - Contains word-finding difficulties and grammatical errors
+                        - Good narrative structure despite language challenges
+                        **School Day (Adolescent)**: Teenager describing a school day
+                        - More complex language but still some disfluencies
+                        - Shows adolescent speech patterns
+                        - Academic vocabulary and social language
+                        **Adult Recovery**: Adult describing stroke recovery
+                        - Post-stroke language patterns
+                        - Word-finding difficulties
+                        - Shows recovery progress
+                        """)
+        # Sample transcripts
+        SAMPLE_TRANSCRIPTS = {
+            "Beach Trip (Child)": """*PAR: today I would &-um like to talk about &-um a fun trip I took last &-um summer with my family.
+*PAR: we went to the &-um &-um beach [//] no to the mountains [//] I mean the beach actually.
+*PAR: there was lots of &-um &-um swimming and &-um sun.
+*PAR: we [/] we stayed for &-um three no [//] four days in a &-um hotel near the water [: ocean] [*].
+*PAR: my favorite part was &-um building &-um castles with sand.
+*PAR: sometimes I forget [//] forgetted [: forgot] [*] what they call those things we built.
+*PAR: my brother he [//] he helped me dig a big hole.
+*PAR: we saw [/] saw fishies [: fish] [*] swimming in the water.
+*PAR: sometimes I wonder [/] wonder where fishies [: fish] [*] go when it's cold.
+*PAR: maybe they have [/] have houses under the water.
+*PAR: after swimming we [//] I eat [: ate] [*] &-um ice cream with &-um chocolate things on top.
+*PAR: what do you call those &-um &-um sprinkles! that's the word.
+*PAR: my mom said to &-um that I could have &-um two scoops next time.
+*PAR: I want to go back to the beach [/] beach next year.""",
+            "School Day (Adolescent)": """*PAR: yesterday was &-um kind of a weird day at school.
+*PAR: I had this big test in math and I was like really nervous about it.
+*PAR: when I got there [//] when I got to class the teacher said we could use calculators.
+*PAR: I was like &-oh &-um that's good because I always mess up the &-um the calculations.
+*PAR: there was this one problem about &-um what do you call it &-um geometry I think.
+*PAR: I couldn't remember the formula for [//] I mean I knew it but I just couldn't think of it.
+*PAR: so I raised my hand and asked the teacher and she was really nice about it.
+*PAR: after the test me and my friends went to lunch and we talked about how we did.
+*PAR: everyone was saying it was hard but I think I did okay.
+*PAR: oh and then in English class we had to read our essays out loud.
+*PAR: I hate doing that because I get really nervous and I start talking fast.
+*PAR: but the teacher said mine was good which made me feel better.""",
+            "Adult Recovery": """*PAR: I &-um I want to talk about &-uh my &-um recovery.
+*PAR: it's been &-um [//] it's hard to &-um to find the words sometimes.
+*PAR: before the &-um the stroke I was &-um working at the &-uh at the bank.
+*PAR: now I have to &-um practice speaking every day with my therapist.
+*PAR: my wife she [//] she helps me a lot at home.
+*PAR: we do &-um exercises together like &-uh reading and &-um talking about pictures.
+*PAR: sometimes I get frustrated because I know what I want to say but &-um the words don't come out right.
+*PAR: but I'm getting better little by little.
+*PAR: the doctor says I'm making good progress.
+*PAR: I hope to go back to work someday but right now I'm focusing on &-um getting better."""
+        }
+        # Event handlers
+        def load_sample_transcript(sample_name):
+            """Load a sample transcript"""
+            return SAMPLE_TRANSCRIPTS.get(sample_name, "")
+        def use_sample_for_analysis(sample_text, age_val, gender_val, notes):
+            """Use sample transcript for analysis"""
+            if not sample_text:
+                return "Please load a sample transcript first."
+            return analyze_transcript(sample_text, age_val, gender_val, notes)
+        def on_analyze(transcript_text, age_val, gender_val, notes):
+            """Handle analysis"""
+            if not transcript_text or len(transcript_text.strip()) < 50:
+                return "Error: Please provide a longer transcript for analysis."
+            return analyze_transcript(transcript_text, age_val, gender_val, notes)
+        # Connect event handlers
+        load_sample_btn.click(
+            load_sample_transcript,
+            inputs=[sample_choice],
+            outputs=[sample_transcript]
+        )
+        use_sample_btn.click(
+            use_sample_for_analysis,
+            inputs=[sample_transcript, age, gender, slp_notes],
+            outputs=[analysis_output]
+        )
+        analyze_btn.click(
+            on_analyze,
+            inputs=[transcript, age, gender, slp_notes],
+            outputs=[analysis_output]
+        )
+        # File upload handler
+        file_upload.upload(process_upload, file_upload, transcript)
+    return app
+if __name__ == "__main__":
+    print("🚀 Starting Enhanced CASL Analysis Tool...")
+    if not ANTHROPIC_API_KEY:
+        print("⚠️  ANTHROPIC_API_KEY not configured - analysis will show demo response")
+        print("   For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
+        print("   For local use: export ANTHROPIC_API_KEY='your-key-here'")
+    else:
+        print("✅ Claude API configured")
+    app = create_interface()
+    app.launch(show_api=False)

requirements.txt CHANGED Viewed

@@ -5,5 +5,18 @@ matplotlib>=3.3.0
 requests>=2.25.0
 reportlab>=3.6.0
 PyPDF2>=2.0.0
-speechrecognition>=3.8.1
-pydub>=0.25.0

 requests>=2.25.0
 reportlab>=3.6.0
 PyPDF2>=2.0.0
+speechrecognition>=3.8.0
+pydub>=0.25.0
+# Transcription and audio processing
+speechbrain>=0.5.15
+torch>=1.9.0
+transformers>=4.20.0
+moviepy>=1.0.3
+# Optional: Speaker diarization (requires HF token)
+# pyannote.audio>=2.1.0
+# Optional: Additional audio processing
+librosa>=0.9.0
+soundfile>=0.10.0

simple_casl_app.py CHANGED Viewed

@@ -3,6 +3,9 @@ import json
 import os
 import logging
 import requests
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -11,52 +14,483 @@ logger = logging.getLogger(__name__)
 # Anthropic API key - can be set as HuggingFace secret or environment variable
 ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
 # Check if API key is available
 if ANTHROPIC_API_KEY:
     logger.info("Claude API key found")
 else:
     logger.warning("Claude API key not found - using demo mode")
-def call_claude_api(prompt):
-    """Call Claude API directly"""
     if not ANTHROPIC_API_KEY:
         return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
     try:
-        headers = {
-            "Content-Type": "application/json",
-            "x-api-key": ANTHROPIC_API_KEY,
-            "anthropic-version": "2023-06-01"
-        }
-        data = {
-            "model": "claude-3-5-sonnet-20241022",
-            "max_tokens": 4096,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": prompt
-                }
-            ]
-        }
-        response = requests.post(
-            "https://api.anthropic.com/v1/messages",
-            headers=headers,
-            json=data,
-            timeout=60
-        )
-        if response.status_code == 200:
-            response_json = response.json()
-            return response_json['content'][0]['text']
-        else:
-            logger.error(f"Claude API error: {response.status_code} - {response.text}")
-            return f"❌ Claude API Error: {response.status_code}"
     except Exception as e:
         logger.error(f"Error calling Claude API: {str(e)}")
         return f"❌ Error: {str(e)}"
 def process_file(file):
     """Process uploaded file"""
@@ -75,15 +509,10 @@ def process_file(file):
     except Exception as e:
         return f"Error reading file: {str(e)}"
-def analyze_transcript(file, age, gender, slp_notes):
-    """Simple CASL analysis"""
-    if file is None:
-        return "Please upload a transcript file first."
-    # Get transcript content
-    transcript = process_file(file)
-    if transcript.startswith("Error") or transcript.startswith("Please"):
-        return transcript
     # Add SLP notes to the prompt if provided
     notes_section = ""
@@ -94,45 +523,249 @@ def analyze_transcript(file, age, gender, slp_notes):
     {slp_notes.strip()}
     """
-    # Simple analysis prompt - removing CASL-2 scores as requested
     prompt = f"""
-    You are a speech-language pathologist analyzing a transcript for CASL assessment.
     Patient: {age}-year-old {gender}
     TRANSCRIPT:
-    {transcript}{notes_section}
-    Please provide a CASL analysis including:
-    1. SPEECH FACTORS (with counts and severity):
-    - Difficulty producing fluent speech
-    - Word retrieval issues
-    - Grammatical errors
-    - Repetitions and revisions
-    2. LANGUAGE SKILLS ASSESSMENT:
-    - Lexical/Semantic Skills (qualitative assessment)
-    - Syntactic Skills (qualitative assessment)
-    - Supralinguistic Skills (qualitative assessment)
-    3. TREATMENT RECOMMENDATIONS:
-    - List 3-5 specific intervention strategies
-    4. CLINICAL SUMMARY:
-    - Brief explanation of findings and prognosis
-    Use exact quotes from the transcript as evidence.
-    Focus on qualitative observations rather than standardized scores.
-    {f"Consider the SLP clinical notes in your analysis." if slp_notes and slp_notes.strip() else ""}
     """
     # Get analysis from Claude API
-    result = call_claude_api(prompt)
     return result
 def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
-    """Perform targeted analysis based on custom questions"""
     if not transcript or not transcript.strip():
         return "Please provide a transcript first."
@@ -148,9 +781,9 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
     {slp_notes.strip()}
     """
-    # Targeted analysis prompt
     prompt = f"""
-    You are a speech-language pathologist conducting a targeted analysis of a speech transcript.
     Patient: {age}-year-old {gender}
@@ -160,27 +793,94 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
     SPECIFIC QUESTION FOR ANALYSIS:
     {custom_question.strip()}
-    Please provide a detailed, evidence-based analysis that directly addresses this specific question.
-    Your response should:
-    1. Directly answer the question asked
-    2. Provide specific examples from the transcript as evidence
-    3. Include relevant clinical observations
-    4. Offer practical insights for clinical practice
-    5. Be concise but comprehensive
-    Use exact quotes from the transcript to support your analysis.
     """
     # Get targeted analysis from Claude API
-    result = call_claude_api(prompt)
     return result
 # Create enhanced interface with tabs
 with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
     gr.Markdown("# 🗣️ Enhanced CASL Analysis Tool")
-    gr.Markdown("Upload a speech transcript and get instant CASL assessment results with targeted analysis options.")
     # Store transcript globally
     transcript_state = gr.State("")
@@ -190,12 +890,46 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
         with gr.Tab("���� Basic Analysis"):
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("### Upload & Settings")
-                    file_upload = gr.File(
-                        label="Upload Transcript File",
-                        file_types=[".txt", ".cha"]
-                    )
                     age = gr.Number(
                         label="Patient Age",
@@ -215,11 +949,6 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
                         placeholder="Enter any additional clinical observations, context, or notes...",
                         lines=3
                     )
-                    analyze_btn = gr.Button(
-                        "🔍 Analyze Transcript",
-                        variant="primary"
-                    )
                 with gr.Column():
                     gr.Markdown("### Analysis Results")
@@ -230,6 +959,8 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
                         lines=25,
                         max_lines=30
                     )
         # Tab 2: Targeted Analysis
         with gr.Tab("🎯 Targeted Analysis"):
@@ -257,7 +988,9 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
                             "What narrative organization skills are evident?",
                             "What specific intervention targets would you recommend?",
                             "How does this patient's language compare to typical development?",
-                            "What evidence suggests cognitive-linguistic strengths/weaknesses?"
                         ],
                         label="Question Templates (Optional)",
                         value="Select a template or write your own..."
@@ -283,6 +1016,8 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
                         lines=25,
                         max_lines=30
                     )
         # Tab 3: Quick Questions
         with gr.Tab("⚡ Quick Questions"):
@@ -307,7 +1042,19 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
                             "Narrative structure",
                             "Vocabulary level",
                             "Sentence complexity",
-                            "Speech rate patterns"
                         ],
                         label="Select questions to analyze:",
                         value=[]
@@ -327,17 +1074,108 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
                         lines=25,
                         max_lines=30
                     )
     # Event handlers
-    def on_analyze(file, age_val, gender_val, notes):
-        """Handle basic analysis and store transcript"""
         result = analyze_transcript(file, age_val, gender_val, notes)
         transcript = process_file(file) if file else ""
-        return result, transcript
     def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
         """Handle targeted analysis"""
-        return targeted_analysis(transcript, question, age_val, gender_val, notes)
     def on_question_template_change(template):
         """Handle question template selection"""
@@ -348,10 +1186,10 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
     def on_quick_analyze(transcript, questions, age_val, gender_val, notes):
         """Handle quick analysis with multiple questions"""
         if not transcript or not transcript.strip():
-            return "Please provide a transcript first."
         if not questions:
-            return "Please select at least one question to analyze."
         # Add SLP notes to the prompt if provided
         notes_section = ""
@@ -362,40 +1200,150 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
         {notes.strip()}
         """
-        # Create quick analysis prompt
         questions_text = "\n".join([f"- {q}" for q in questions])
         prompt = f"""
-        You are a speech-language pathologist conducting a quick analysis of a speech transcript.
         Patient: {age_val}-year-old {gender_val}
         TRANSCRIPT:
         {transcript}{notes_section}
-        Please provide a brief analysis addressing these specific areas:
         {questions_text}
-        For each area, provide:
-        1. Brief observations
-        2. Specific examples from the transcript
-        3. Clinical significance
-        Keep each section concise but informative.
         """
-        return call_claude_api(prompt)
     # Connect event handlers
-    analyze_btn.click(
-        on_analyze,
         inputs=[file_upload, age, gender, slp_notes],
-        outputs=[output, transcript_input]
     )
     targeted_analyze_btn.click(
         on_targeted_analyze,
         inputs=[transcript_input, custom_question, age, gender, slp_notes],
-        outputs=[targeted_output]
     )
     question_templates.change(
@@ -407,11 +1355,15 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
     quick_analyze_btn.click(
         on_quick_analyze,
         inputs=[quick_transcript, quick_questions, age, gender, slp_notes],
-        outputs=[quick_output]
     )
 if __name__ == "__main__":
     print("🚀 Starting Enhanced CASL Analysis Tool...")
     if not ANTHROPIC_API_KEY:
         print("⚠️  ANTHROPIC_API_KEY not configured - analysis will show error message")
         print("   For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
@@ -419,4 +1371,29 @@ if __name__ == "__main__":
     else:
         print("✅ Claude API configured")
     app.launch(show_api=False)

 import os
 import logging
 import requests
+import re
+import tempfile
+import numpy as np
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 # Anthropic API key - can be set as HuggingFace secret or environment variable
 ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
+# Try to import transcription libraries
+try:
+    from speechbrain.pretrained import EncoderDecoderASR
+    import torch
+    SPEECHBRAIN_AVAILABLE = True
+    logger.info("SpeechBrain available for transcription")
+except ImportError as e:
+    logger.warning(f"SpeechBrain not available: {e}")
+    SPEECHBRAIN_AVAILABLE = False
+# Try to import video processing
+try:
+    import moviepy.editor as mp
+    MOVIEPY_AVAILABLE = True
+    logger.info("MoviePy available for video processing")
+except ImportError as e:
+    logger.warning(f"MoviePy not available: {e}")
+    MOVIEPY_AVAILABLE = False
+# Try to import speaker diarization
+try:
+    from pyannote.audio import Pipeline
+    from pyannote.audio.pipelines.utils.hook import ProgressHook
+    DIARIZATION_AVAILABLE = True
+    logger.info("Pyannote.audio available for speaker diarization")
+except ImportError as e:
+    logger.warning(f"Pyannote.audio not available: {e}")
+    DIARIZATION_AVAILABLE = False
+# Try to import sentiment and emotion analysis
+try:
+    from transformers import pipeline
+    SENTIMENT_AVAILABLE = True
+    logger.info("Transformers available for sentiment analysis")
+except ImportError as e:
+    logger.warning(f"Transformers not available: {e}")
+    SENTIMENT_AVAILABLE = False
+# Initialize models if available
+asr_model = None
+sentiment_model = None
+emotion_model = None
+diarization_pipeline = None
+if SPEECHBRAIN_AVAILABLE:
+    try:
+        asr_model = EncoderDecoderASR.from_hparams(
+            source="speechbrain/asr-crdnn-rnnlm-librispeech",
+            savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
+        )
+        logger.info("ASR model loaded successfully")
+    except Exception as e:
+        logger.error(f"Error loading ASR model: {e}")
+        SPEECHBRAIN_AVAILABLE = False
+if SENTIMENT_AVAILABLE:
+    try:
+        sentiment_model = pipeline(
+            "sentiment-analysis",
+            model="cardiffnlp/twitter-roberta-base-sentiment-latest",
+            top_k=None
+        )
+        emotion_model = pipeline(
+            "text-classification",
+            model="j-hartmann/emotion-english-distilroberta-base",
+            top_k=None
+        )
+        logger.info("Sentiment and emotion models loaded")
+    except Exception as e:
+        logger.error(f"Error loading sentiment models: {e}")
+        SENTIMENT_AVAILABLE = False
+if DIARIZATION_AVAILABLE:
+    try:
+        HF_TOKEN = os.getenv("HF_TOKEN", "")
+        if HF_TOKEN:
+            diarization_pipeline = Pipeline.from_pretrained(
+                "pyannote/speaker-diarization@2.1",
+                use_auth_token=HF_TOKEN
+            )
+            logger.info("Speaker diarization pipeline loaded")
+        else:
+            logger.warning("HF_TOKEN not set - speaker diarization will be disabled")
+    except Exception as e:
+        logger.error(f"Error loading diarization pipeline: {e}")
 # Check if API key is available
 if ANTHROPIC_API_KEY:
     logger.info("Claude API key found")
 else:
     logger.warning("Claude API key not found - using demo mode")
+def validate_analysis_completeness(response_text):
+    """Validate that all 12 sections are present in the analysis"""
+    required_sections = [
+        "1. SPEECH FACTORS",
+        "2. LANGUAGE SKILLS ASSESSMENT",
+        "3. COMPLEX SENTENCE ANALYSIS",
+        "4. FIGURATIVE LANGUAGE ANALYSIS",
+        "5. PRAGMATIC LANGUAGE ASSESSMENT",
+        "6. VOCABULARY AND SEMANTIC ANALYSIS",
+        "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
+        "8. COGNITIVE-LINGUISTIC FACTORS",
+        "9. FLUENCY AND RHYTHM ANALYSIS",
+        "10. QUANTITATIVE METRICS",
+        "11. CLINICAL IMPLICATIONS",
+        "12. PROGNOSIS AND SUMMARY"
+    ]
+    missing_sections = []
+    for section in required_sections:
+        if section not in response_text:
+            missing_sections.append(section)
+    if missing_sections:
+        print(f"\n⚠️  MISSING SECTIONS: {missing_sections}")
+        return False
+    else:
+        print(f"\n✅ ALL 12 SECTIONS PRESENT")
+        return True
+def call_claude_api_with_continuation(prompt, max_continuations=3):
+    """Call Claude API with continuation prompting to ensure complete responses"""
     if not ANTHROPIC_API_KEY:
         return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
     try:
+        full_response = ""
+        continuation_count = 0
+        # Add continuation instruction to original prompt
+        initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
+        while continuation_count <= max_continuations:
+            if continuation_count == 0:
+                current_prompt = initial_prompt
+            else:
+                # For continuations, provide context about what was already covered
+                current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1} of {max_continuations}):\n\nIMPORTANT: Do not repeat what you've already written. Continue with the next section or complete any unfinished sections. If you're done, do not include <CONTINUE>. Provide the remaining analysis sections. Make sure to complete ALL 12 sections of the analysis."
+            headers = {
+                "Content-Type": "application/json",
+                "x-api-key": ANTHROPIC_API_KEY,
+                "anthropic-version": "2023-06-01"
+            }
+            data = {
+                "model": "claude-3-5-sonnet-20241022",
+                "max_tokens": 4096,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": current_prompt
+                    }
+                ]
+            }
+            response = requests.post(
+                "https://api.anthropic.com/v1/messages",
+                headers=headers,
+                json=data,
+                timeout=90
+            )
+            if response.status_code == 200:
+                response_json = response.json()
+                response_text = response_json['content'][0]['text']
+                # Log response for debugging
+                print(f"\n=== PART {continuation_count + 1} RESPONSE ===")
+                print(f"Length: {len(response_text)} characters")
+                print(f"Contains CONTINUE: {'<CONTINUE>' in response_text}")
+                print(f"First 200 chars: {response_text[:200]}...")
+                print(f"Last 200 chars: {response_text[-200:]}...")
+                print("=" * 50)
+                # Simple string combination - no complex processing
+                if continuation_count == 0:
+                    full_response = response_text
+                else:
+                    # Just add a newline and append the continuation
+                    full_response += "\n\n" + response_text
+                # Check if response indicates continuation is needed
+                needs_continuation = "<CONTINUE>" in response_text
+                print(f"Needs continuation: {needs_continuation}")
+                print(f"Continuation count: {continuation_count}/{max_continuations}")
+                # Continue if <CONTINUE> is present and we haven't reached max
+                if needs_continuation and continuation_count < max_continuations:
+                    # Remove the CONTINUE marker
+                    full_response = full_response.replace("<CONTINUE>", "")
+                    continuation_count += 1
+                    logger.info(f"Continuing analysis (attempt {continuation_count}/{max_continuations})")
+                    continue
+                else:
+                    # Clean up any remaining continuation markers
+                    full_response = full_response.replace("<CONTINUE>", "")
+                    break
+            else:
+                logger.error(f"Claude API error: {response.status_code} - {response.text}")
+                return f"❌ Claude API Error: {response.status_code}"
     except Exception as e:
         logger.error(f"Error calling Claude API: {str(e)}")
         return f"❌ Error: {str(e)}"
+    # Add completion indicator
+    if continuation_count > 0:
+        full_response += f"\n\n[Analysis completed in {continuation_count + 1} parts]"
+    # Log final response for debugging
+    print(f"\n=== FINAL COMPLETE RESPONSE ===")
+    print(f"Total length: {len(full_response)} characters")
+    print(f"Number of parts: {continuation_count + 1}")
+    print("=" * 50)
+    # Print the entire final response for debugging
+    print(f"\n=== ENTIRE FINAL RESPONSE ===")
+    print(full_response)
+    print("=" * 50)
+    return full_response
+def call_claude_api(prompt):
+    """Call Claude API directly (legacy function for backward compatibility)"""
+    return call_claude_api_with_continuation(prompt, max_continuations=0)
+def extract_audio_from_video(video_path):
+    """Extract audio from video file"""
+    if not MOVIEPY_AVAILABLE:
+        return None, "MoviePy not available for video processing"
+    try:
+        temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+        temp_audio_path = temp_audio.name
+        temp_audio.close()
+        video = mp.VideoFileClip(video_path)
+        audio = video.audio
+        if audio is None:
+            return None, "No audio track found in video file"
+        audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
+        video.close()
+        audio.close()
+        return temp_audio_path, "Audio extracted successfully"
+    except Exception as e:
+        logger.error(f"Error extracting audio: {e}")
+        return None, f"Error extracting audio: {str(e)}"
+def perform_speaker_diarization(audio_path):
+    """Perform speaker diarization on audio file"""
+    if not DIARIZATION_AVAILABLE or not diarization_pipeline:
+        return None, "Speaker diarization not available"
+    try:
+        with ProgressHook() as hook:
+            diarization = diarization_pipeline(audio_path, hook=hook)
+        speaker_segments = []
+        for turn, _, speaker in diarization.itertracks(yield_label=True):
+            speaker_segments.append({
+                'start': turn.start,
+                'end': turn.end,
+                'speaker': speaker,
+                'duration': turn.end - turn.start
+            })
+        logger.info(f"Diarization completed: {len(speaker_segments)} segments found")
+        return speaker_segments, "Diarization completed successfully"
+    except Exception as e:
+        logger.error(f"Error in diarization: {e}")
+        return None, f"Diarization error: {str(e)}"
+def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
+    """Transcribe audio with timestamps, sentiment, and metadata"""
+    if not audio_file:
+        return None, "No audio file provided"
+    if not SPEECHBRAIN_AVAILABLE:
+        return None, "SpeechBrain not available for transcription"
+    try:
+        # Check if it's a video file
+        file_extension = os.path.splitext(audio_file)[1].lower()
+        if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
+            processed_audio, status = extract_audio_from_video(audio_file)
+            if not processed_audio:
+                return None, status
+        else:
+            processed_audio = audio_file
+        # Perform speaker diarization if enabled
+        speaker_segments = None
+        diarization_status = ""
+        if enable_diarization:
+            speaker_segments, diarization_status = perform_speaker_diarization(processed_audio)
+        # Get transcription
+        transcript = asr_model.transcribe_file(processed_audio)
+        # Clean up temporary file if created
+        if processed_audio != audio_file and os.path.exists(processed_audio):
+            try:
+                os.unlink(processed_audio)
+            except:
+                pass
+        # Split into sentences and add metadata
+        sentences = re.split(r'[.!?]+', transcript)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        rich_transcript = []
+        current_time = 0
+        for i, sentence in enumerate(sentences):
+            timestamp = current_time + (i * 2)
+            # Determine speaker
+            speaker = "UNKNOWN"
+            if speaker_segments:
+                for segment in speaker_segments:
+                    if segment['start'] <= timestamp <= segment['end']:
+                        speaker = segment['speaker']
+                        break
+            # Sentiment and emotion analysis
+            sentiment = {'label': 'neutral', 'score': 0.5}
+            emotion = {'label': 'neutral', 'score': 0.5}
+            if SENTIMENT_AVAILABLE:
+                try:
+                    sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
+                    sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else sentiment
+                    emotion_result = emotion_model(sentence)[0] if emotion_model else None
+                    emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else emotion
+                except:
+                    pass
+            # Word metrics
+            words = sentence.split()
+            word_count = len(words)
+            avg_word_length = np.mean([len(word) for word in words]) if words else 0
+            speech_rate = word_count * 30 / 60
+            rich_transcript.append({
+                'timestamp': timestamp,
+                'speaker': speaker,
+                'sentence': sentence,
+                'word_count': word_count,
+                'avg_word_length': round(avg_word_length, 2),
+                'speech_rate_wpm': round(speech_rate, 1),
+                'sentiment': sentiment['label'],
+                'sentiment_score': round(sentiment['score'], 3),
+                'emotion': emotion['label'],
+                'emotion_score': round(emotion['score'], 3)
+            })
+            current_time = timestamp
+        status_msg = f"Transcription completed successfully"
+        if diarization_status:
+            status_msg += f" {diarization_status}"
+        return rich_transcript, status_msg
+    except Exception as e:
+        logger.error(f"Error in transcription: {e}")
+        return None, f"Transcription error: {str(e)}"
+def format_rich_transcript(rich_transcript):
+    """Format rich transcript for display"""
+    if not rich_transcript:
+        return "No transcript data available"
+    formatted_lines = []
+    for entry in rich_transcript:
+        timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
+        line = f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}"
+        line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
+        line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
+        line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
+        formatted_lines.append(line)
+    return '\n'.join(formatted_lines)
+def calculate_slp_metrics(rich_transcript):
+    """Calculate comprehensive SLP metrics"""
+    if not rich_transcript:
+        return {}
+    # Basic metrics
+    total_sentences = len(rich_transcript)
+    total_words = sum(entry['word_count'] for entry in rich_transcript)
+    total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
+    # Speaker analysis
+    speakers = {}
+    for entry in rich_transcript:
+        speaker = entry['speaker']
+        if speaker not in speakers:
+            speakers[speaker] = {
+                'sentences': 0,
+                'words': 0,
+                'sentiments': [],
+                'emotions': []
+            }
+        speakers[speaker]['sentences'] += 1
+        speakers[speaker]['words'] += entry['word_count']
+        speakers[speaker]['sentiments'].append(entry['sentiment'])
+        speakers[speaker]['emotions'].append(entry['emotion'])
+    # Word-level analysis
+    all_words = []
+    for entry in rich_transcript:
+        words = entry['sentence'].lower().split()
+        all_words.extend(words)
+    # Word frequency distribution
+    word_freq = {}
+    for word in all_words:
+        word_clean = re.sub(r'[^\w\s]', '', word)
+        if word_clean:
+            word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
+    # Vocabulary diversity (Type-Token Ratio)
+    unique_words = len(set(all_words))
+    ttr = unique_words / total_words if total_words > 0 else 0
+    # Speech rate analysis
+    speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
+    avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
+    # Sentiment analysis
+    sentiment_counts = {}
+    emotion_counts = {}
+    for entry in rich_transcript:
+        sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
+        emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
+    # Sentence complexity
+    sentence_lengths = [entry['word_count'] for entry in rich_transcript]
+    avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
+    # Pause analysis
+    pauses = []
+    for i in range(1, len(rich_transcript)):
+        pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
+        pauses.append(pause)
+    avg_pause_duration = np.mean(pauses) if pauses else 0
+    return {
+        'total_sentences': total_sentences,
+        'total_words': total_words,
+        'total_duration_seconds': total_duration,
+        'unique_words': unique_words,
+        'type_token_ratio': round(ttr, 3),
+        'avg_sentence_length': round(avg_sentence_length, 1),
+        'avg_speech_rate_wpm': round(avg_speech_rate, 1),
+        'avg_pause_duration': round(avg_pause_duration, 1),
+        'sentiment_distribution': sentiment_counts,
+        'emotion_distribution': emotion_counts,
+        'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
+        'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0,
+        'speakers': speakers,
+        'speaker_count': len(speakers)
+    }
 def process_file(file):
     """Process uploaded file"""
     except Exception as e:
         return f"Error reading file: {str(e)}"
+def analyze_transcript_content(transcript_content, age, gender, slp_notes):
+    """Analyze transcript content with comprehensive quantification and detailed citations"""
+    if not transcript_content or len(transcript_content.strip()) < 50:
+        return "Error: Please provide a longer transcript for analysis."
     # Add SLP notes to the prompt if provided
     notes_section = ""
     {slp_notes.strip()}
     """
+    # Enhanced comprehensive analysis prompt with detailed quantification
     prompt = f"""
+    You are a speech-language pathologist conducting a COMPREHENSIVE CASL assessment. Provide a SINGLE, DETAILED analysis that quantifies EVERY occurrence and cites specific examples.
     Patient: {age}-year-old {gender}
     TRANSCRIPT:
+    {transcript_content}{notes_section}
+    INSTRUCTIONS: Provide ONE comprehensive analysis covering ALL areas below. QUANTIFY EVERYTHING with exact counts and cite SPECIFIC examples from the transcript. Be thorough and detailed. COMPLETE ALL 12 SECTIONS.
+    COMPREHENSIVE CASL ANALYSIS:
+    1. SPEECH FACTORS (with EXACT counts and specific citations):
+    A. Fluency Issues:
+    - Count and cite EVERY filler word ("um", "uh", "like", "you know", etc.)
+    - Count and cite EVERY false start/self-correction
+    - Count and cite EVERY repetition of words/phrases
+    - Count and cite EVERY revision/restart
+    - Calculate percentage of disfluent speech
+    B. Word Retrieval Issues:
+    - Count and cite EVERY instance of circumlocution
+    - Count and cite EVERY incomplete thought/abandoned utterance
+    - Count and cite EVERY word-finding pause
+    - Count and cite EVERY use of generic terms ("thing", "stuff", etc.)
+    C. Grammatical Errors:
+    - Count and cite EVERY grammatical error (verb tense, subject-verb agreement, etc.)
+    - Count and cite EVERY syntactic error
+    - Count and cite EVERY morphological error
+    - Count and cite EVERY run-on sentence
+    2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
+    A. Lexical/Semantic Skills:
+    - Count total unique words vs. total words (Type-Token Ratio)
+    - List and categorize vocabulary by sophistication level
+    - Identify semantic relationships demonstrated
+    - Assess word retrieval strategies used
+    - Evaluate semantic precision
+    B. Syntactic Skills:
+    - Count sentence types (simple, compound, complex, compound-complex)
+    - Calculate average sentence length
+    - Identify syntactic patterns and errors
+    - Assess clause complexity and embedding
+    C. Supralinguistic Skills:
+    - Identify and cite examples of:
+        * Cause-effect relationships
+        * Inferences made
+        * Non-literal language use
+        * Problem-solving language
+        * Metalinguistic awareness
+    3. COMPLEX SENTENCE ANALYSIS (with exact counts):
+    A. Coordinating Conjunctions:
+    - Count and cite EVERY use of: and, but, or, so, yet, for, nor
+    - Analyze patterns of use
+    - Assess age-appropriateness
+    B. Subordinating Conjunctions:
+    - Count and cite EVERY use of: because, although, while, since, if, when, where, that, which, who, whom, whose
+    - Analyze clause complexity
+    - Assess embedding depth
+    C. Sentence Structure Analysis:
+    - Count each sentence type with examples
+    - Calculate complexity ratios
+    - Assess developmental appropriateness
+    4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
+    A. Similes:
+    - Count and cite EVERY simile (comparisons using "like" or "as")
+    - Analyze creativity and appropriateness
+    B. Metaphors:
+    - Count and cite EVERY metaphor (direct comparisons)
+    - Assess comprehension and use
+    C. Idioms:
+    - Count and cite EVERY idiom used
+    - Assess comprehension and appropriate use
+    D. Non-literal Language:
+    - Count and cite EVERY instance of sarcasm, humor, irony
+    - Assess comprehension level
+    5. PRAGMATIC LANGUAGE ASSESSMENT (with specific examples):
+    A. Turn-taking:
+    - Analyze conversational flow
+    - Count interruptions or overlaps
+    - Assess reciprocity
+    B. Topic Management:
+    - Count topic shifts
+    - Assess topic maintenance
+    - Evaluate topic introduction
+    C. Social Communication:
+    - Assess register appropriateness
+    - Evaluate politeness markers
+    - Analyze social awareness
+    6. VOCABULARY AND SEMANTIC ANALYSIS (with quantification):
+    A. Vocabulary Diversity:
+    - Calculate Type-Token Ratio
+    - List most frequent words
+    - Assess vocabulary sophistication
+    B. Semantic Relationships:
+    - Count and cite examples of:
+        * Synonyms/antonyms
+        * Categories/hierarchies
+        * Part-whole relationships
+        * Cause-effect vocabulary
+    7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
+    A. Morphological Markers:
+    - Count and cite use of:
+        * Plurals (-s, -es)
+        * Possessives
+        * Verb tenses
+        * Derivational morphemes
+    B. Phonological Patterns:
+    - Identify speech sound errors
+    - Count phonological processes
+    - Assess syllable structure
+    8. COGNITIVE-LINGUISTIC FACTORS (with evidence):
+    A. Working Memory:
+    - Assess sentence length complexity
+    - Analyze information retention
+    - Evaluate processing demands
+    B. Processing Speed:
+    - Analyze speech rate
+    - Assess response time
+    - Evaluate efficiency
+    C. Executive Function:
+    - Assess planning and organization
+    - Evaluate self-monitoring
+    - Analyze cognitive flexibility
+    9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
+    A. Speech Rate:
+    - Calculate words per minute
+    - Analyze rate variability
+    - Assess naturalness
+    B. Pause Patterns:
+    - Count and analyze pauses
+    - Assess pause function
+    - Evaluate rhythm
+    10. QUANTITATIVE METRICS:
+    - Total words: [count]
+    - Total sentences: [count]
+    - Average sentence length: [calculation]
+    - Type-Token Ratio: [calculation]
+    - Disfluency rate: [percentage]
+    - Error rate: [percentage]
+    - Vocabulary diversity score: [calculation]
+    11. CLINICAL IMPLICATIONS:
+    A. Strengths:
+    - List specific strengths with evidence
+    - Identify areas of competence
+    B. Areas of Need:
+    - Prioritize intervention targets
+    - Provide specific examples
+    C. Treatment Recommendations:
+    - List 5-7 specific intervention strategies
+    - Include intensity and frequency recommendations
+    - Address all identified areas of need
+    12. PROGNOSIS AND SUMMARY:
+    - Overall communication profile
+    - Developmental appropriateness
+    - Impact on academic/social functioning
+    - Expected progress with intervention
+    FORMAT REQUIREMENTS:
+    - Use bullet points for organization
+    - Include exact counts for everything
+    - Cite specific quotes from transcript
+    - Use clear headings and subheadings
+    - Provide percentages and ratios where applicable
+    - Be comprehensive but organized
+    - Focus on clinical relevance
+    - COMPLETE ALL 12 SECTIONS
+    SECTION CHECKLIST - COMPLETE ALL:
+    □ 1. SPEECH FACTORS (A, B, C)
+    □ 2. LANGUAGE SKILLS ASSESSMENT (A, B, C)
+    □ 3. COMPLEX SENTENCE ANALYSIS (A, B, C)
+    □ 4. FIGURATIVE LANGUAGE ANALYSIS (A, B, C, D)
+    □ 5. PRAGMATIC LANGUAGE ASSESSMENT (A, B, C)
+    □ 6. VOCABULARY AND SEMANTIC ANALYSIS (A, B)
+    □ 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (A, B)
+    □ 8. COGNITIVE-LINGUISTIC FACTORS (A, B, C)
+    □ 9. FLUENCY AND RHYTHM ANALYSIS (A, B)
+    □ 10. QUANTITATIVE METRICS
+    □ 11. CLINICAL IMPLICATIONS (A, B, C)
+    □ 12. PROGNOSIS AND SUMMARY
+    CRITICAL: If you cannot complete all 12 sections in one response, end with <CONTINUE> and continue with the remaining sections. Do not skip any sections. Use the checklist to ensure all sections are completed.
     """
     # Get analysis from Claude API
+    result = call_claude_api_with_continuation(prompt, max_continuations=5)
     return result
+def analyze_transcript(file, age, gender, slp_notes):
+    """Analyze transcript from file upload"""
+    if file is None:
+        return "Please upload a transcript file first."
+    # Get transcript content
+    transcript = process_file(file)
+    if transcript.startswith("Error") or transcript.startswith("Please"):
+        return transcript
+    return analyze_transcript_content(transcript, age, gender, slp_notes)
 def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
+    """Perform targeted analysis based on custom questions with comprehensive detail"""
     if not transcript or not transcript.strip():
         return "Please provide a transcript first."
     {slp_notes.strip()}
     """
+    # Enhanced targeted analysis prompt with comprehensive detail
     prompt = f"""
+    You are a speech-language pathologist conducting a DETAILED targeted analysis of a speech transcript.
     Patient: {age}-year-old {gender}
     SPECIFIC QUESTION FOR ANALYSIS:
     {custom_question.strip()}
+    INSTRUCTIONS: Provide a COMPREHENSIVE, DETAILED analysis that directly addresses this specific question. Include:
+    - EXACT counts and quantification
+    - SPECIFIC citations from the transcript
+    - DETAILED examples for every observation
+    - PERCENTAGES and ratios where applicable
+    - CLINICAL significance of findings
+    - AGE-APPROPRIATE assessment
+    ANALYSIS REQUIREMENTS:
+    1. QUANTIFICATION:
+    - Count every relevant occurrence
+    - Calculate percentages and ratios
+    - Provide specific numbers for all observations
+    2. EVIDENCE:
+    - Cite exact quotes from the transcript
+    - Provide line-by-line examples
+    - Include specific timestamps or context
+    3. DETAILED EXAMPLES:
+    - Give multiple examples for each pattern
+    - Show variations in the pattern
+    - Demonstrate the range of severity
+    4. CLINICAL ASSESSMENT:
+    - Assess severity level
+    - Compare to age expectations
+    - Identify clinical significance
+    - Suggest intervention implications
+    5. COMPREHENSIVE COVERAGE:
+    - Address all aspects of the question
+    - Consider related language areas
+    - Include both strengths and weaknesses
+    - Provide developmental context
+    ANALYSIS STRUCTURE:
+    A. DIRECT ANSWER TO QUESTION:
+    - Provide a clear, direct answer
+    - Include quantification and severity assessment
+    B. DETAILED EVIDENCE:
+    - List every relevant example with exact quotes
+    - Provide counts and percentages
+    - Show patterns and variations
+    C. PATTERN ANALYSIS:
+    - Identify underlying patterns
+    - Analyze frequency and consistency
+    - Assess variability across the transcript
+    D. DEVELOPMENTAL ASSESSMENT:
+    - Compare to age-appropriate expectations
+    - Identify developmental level
+    - Assess progress and challenges
+    E. CLINICAL IMPLICATIONS:
+    - Impact on communication
+    - Effect on academic/social functioning
+    - Priority for intervention
+    F. INTERVENTION CONSIDERATIONS:
+    - Specific strategies to address the issue
+    - Intensity and frequency recommendations
+    - Expected outcomes and timeline
+    FORMAT REQUIREMENTS:
+    - Use clear headings and subheadings
+    - Include bullet points for organization
+    - Provide exact counts and percentages
+    - Cite specific quotes with context
+    - Be thorough and comprehensive
+    - Focus on clinical relevance and utility
+    Remember: This should be a DETAILED, COMPREHENSIVE analysis that thoroughly addresses the specific question with quantification, evidence, and clinical implications.
     """
     # Get targeted analysis from Claude API
+    result = call_claude_api_with_continuation(prompt, max_continuations=3)
     return result
 # Create enhanced interface with tabs
 with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
     gr.Markdown("# 🗣️ Enhanced CASL Analysis Tool")
+    gr.Markdown("Upload a speech transcript, paste text, or transcribe audio/video and get instant CASL assessment results with targeted analysis options.")
     # Store transcript globally
     transcript_state = gr.State("")
         with gr.Tab("���� Basic Analysis"):
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("### Input Options")
+                    with gr.Tabs():
+                        with gr.Tab("📁 File Upload"):
+                            file_upload = gr.File(
+                                label="Upload Transcript File",
+                                file_types=[".txt", ".cha"]
+                            )
+                            analyze_file_btn = gr.Button(
+                                "🔍 Analyze File",
+                                variant="primary"
+                            )
+                        with gr.Tab("📝 Text Input"):
+                            text_input = gr.Textbox(
+                                label="Paste Transcript Here",
+                                placeholder="Paste your transcript text here...",
+                                lines=10
+                            )
+                            analyze_text_btn = gr.Button(
+                                "🔍 Analyze Text",
+                                variant="primary"
+                            )
+                        with gr.Tab("🎤 Audio/Video Transcription"):
+                            audio_input = gr.File(
+                                label="Upload Audio/Video File",
+                                file_types=["audio", "video"]
+                            )
+                            transcribe_btn = gr.Button(
+                                "🎤 Transcribe & Analyze",
+                                variant="primary"
+                            )
+                            transcription_status = gr.Markdown("")
+                    gr.Markdown("### Patient Information")
                     age = gr.Number(
                         label="Patient Age",
                         placeholder="Enter any additional clinical observations, context, or notes...",
                         lines=3
                     )
                 with gr.Column():
                     gr.Markdown("### Analysis Results")
                         lines=25,
                         max_lines=30
                     )
+                    analysis_progress = gr.Markdown("")
         # Tab 2: Targeted Analysis
         with gr.Tab("🎯 Targeted Analysis"):
                             "What narrative organization skills are evident?",
                             "What specific intervention targets would you recommend?",
                             "How does this patient's language compare to typical development?",
+                            "What evidence suggests cognitive-linguistic strengths/weaknesses?",
+                            "Analyze the use of conjunctions and complex sentences",
+                            "Identify and analyze figurative language use"
                         ],
                         label="Question Templates (Optional)",
                         value="Select a template or write your own..."
                         lines=25,
                         max_lines=30
                     )
+                    targeted_progress = gr.Markdown("")
         # Tab 3: Quick Questions
         with gr.Tab("⚡ Quick Questions"):
                             "Narrative structure",
                             "Vocabulary level",
                             "Sentence complexity",
+                            "Speech rate patterns",
+                            "Complex sentence analysis",
+                            "Figurative language use",
+                            "Morphological markers",
+                            "Phonological patterns",
+                            "Turn-taking skills",
+                            "Topic maintenance",
+                            "Social communication",
+                            "Cognitive-linguistic factors",
+                            "Working memory demands",
+                            "Executive function skills",
+                            "Metalinguistic awareness",
+                            "Academic language use"
                         ],
                         label="Select questions to analyze:",
                         value=[]
                         lines=25,
                         max_lines=30
                     )
+                    quick_progress = gr.Markdown("")
+        # Tab 4: Advanced Transcription
+        with gr.Tab("🎤 Advanced Transcription"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Audio/Video Upload")
+                    gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
+                    transcription_file_input = gr.File(
+                        label="Upload Audio or Video File",
+                        file_types=["audio", "video"]
+                    )
+                    enable_diarization = gr.Checkbox(
+                        label="Enable Speaker Diarization",
+                        value=True,
+                        info="Identify different speakers in the audio"
+                    )
+                    transcribe_advanced_btn = gr.Button(
+                        "🎤 Transcribe with Metadata",
+                        variant="primary",
+                        size="lg"
+                    )
+                    transcription_status = gr.Markdown("")
+                with gr.Column(scale=2):
+                    gr.Markdown("### Rich Transcript with Metadata")
+                    rich_transcript_display = gr.Textbox(
+                        label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
+                        lines=15,
+                        max_lines=20
+                    )
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Speech Metrics")
+                    transcription_metrics_display = gr.Textbox(
+                        label="SLP Metrics",
+                        lines=10,
+                        max_lines=15
+                    )
+                with gr.Column():
+                    gr.Markdown("### Word Frequency")
+                    transcription_word_freq_display = gr.Dataframe(
+                        headers=["Word", "Frequency"],
+                        label="Most Frequent Words",
+                        interactive=False
+                    )
     # Event handlers
+    def on_analyze_file(file, age_val, gender_val, notes):
+        """Handle file analysis"""
         result = analyze_transcript(file, age_val, gender_val, notes)
         transcript = process_file(file) if file else ""
+        progress_msg = "✅ Analysis completed" if "[Analysis completed in" in result else "🔄 Analysis in progress..."
+        return result, transcript, progress_msg
+    def on_analyze_text(text, age_val, gender_val, notes):
+        """Handle text analysis"""
+        result = analyze_transcript_content(text, age_val, gender_val, notes)
+        progress_msg = "✅ Analysis completed" if "[Analysis completed in" in result else "🔄 Analysis in progress..."
+        return result, text, progress_msg
+    def on_transcribe_and_analyze(audio_file, age_val, gender_val, notes):
+        """Handle transcription and analysis"""
+        if not audio_file:
+            return "Please upload an audio/video file first.", "", "No file provided"
+        transcript, status = transcribe_audio(audio_file.name)
+        if transcript:
+            result = analyze_transcript_content(transcript, age_val, gender_val, notes)
+            progress_msg = "✅ Analysis completed" if "[Analysis completed in" in result else "🔄 Analysis in progress..."
+            return result, transcript, status
+        else:
+            return f"Transcription failed: {status}", "", status
+    def on_transcribe_advanced(audio_file, enable_diarization):
+        """Handle advanced transcription"""
+        if not audio_file:
+            return "Please upload an audio/video file first.", "", "No file provided"
+        transcript, status = transcribe_audio_with_metadata(audio_file.name, enable_diarization)
+        if transcript:
+            metrics = calculate_slp_metrics(transcript)
+            word_freq_data = metrics.get('word_frequency', {})
+            return transcript, status, metrics, word_freq_data
+        else:
+            return f"Transcription failed: {status}", "", {}, {}
     def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
         """Handle targeted analysis"""
+        result = targeted_analysis(transcript, question, age_val, gender_val, notes)
+        progress_msg = "✅ Targeted analysis completed" if "[Analysis completed in" in result else "🔄 Targeted analysis in progress..."
+        return result, progress_msg
     def on_question_template_change(template):
         """Handle question template selection"""
     def on_quick_analyze(transcript, questions, age_val, gender_val, notes):
         """Handle quick analysis with multiple questions"""
         if not transcript or not transcript.strip():
+            return "Please provide a transcript first.", "❌ No transcript provided"
         if not questions:
+            return "Please select at least one question to analyze.", "❌ No questions selected"
         # Add SLP notes to the prompt if provided
         notes_section = ""
         {notes.strip()}
         """
+        # Create enhanced quick analysis prompt with comprehensive SLP analysis
         questions_text = "\n".join([f"- {q}" for q in questions])
         prompt = f"""
+        You are a speech-language pathologist conducting a COMPREHENSIVE quick analysis of a speech transcript.
         Patient: {age_val}-year-old {gender_val}
         TRANSCRIPT:
         {transcript}{notes_section}
+        Please provide a DETAILED analysis addressing these specific areas:
         {questions_text}
+        ANALYSIS REQUIREMENTS:
+        For each selected area, provide:
+        1. EXACT COUNTS and quantification
+        2. SPECIFIC EXAMPLES with exact quotes from transcript
+        3. PERCENTAGES and ratios where applicable
+        4. SEVERITY assessment
+        5. AGE-APPROPRIATE evaluation
+        6. CLINICAL significance
+        7. INTERVENTION considerations
+        DETAILED ANALYSIS GUIDELINES:
+        For SYNTAX and COMPLEX SENTENCE analysis:
+        - Count and cite EVERY coordinating conjunction (and, but, or, so, yet, for, nor)
+        - Count and cite EVERY subordinating conjunction (because, although, while, since, if, when, where, that, which, who, whom, whose)
+        - Identify and count each sentence type (simple, compound, complex, compound-complex)
+        - Calculate complexity ratios and percentages
+        - Assess embedding depth and clause complexity
+        - Provide specific examples for each pattern
+        For FIGURATIVE LANGUAGE analysis:
+        - Count and cite EVERY simile (comparisons using "like" or "as")
+        - Count and cite EVERY metaphor (direct comparisons without "like" or "as")
+        - Count and cite EVERY idiom and non-literal expression
+        - Assess creativity and age-appropriate use
+        - Provide specific examples with context
+        For PRAGMATIC and SOCIAL COMMUNICATION:
+        - Count and analyze turn-taking patterns
+        - Assess topic maintenance and shifting abilities
+        - Evaluate social appropriateness and register use
+        - Count interruptions or conversational breakdowns
+        - Analyze non-literal language comprehension
+        - Provide specific examples of pragmatic behaviors
+        For VOCABULARY and SEMANTIC analysis:
+        - Calculate Type-Token Ratio
+        - Count and categorize vocabulary by sophistication level
+        - Analyze word retrieval strategies and circumlocution
+        - Assess semantic precision and relationships
+        - Count academic vs. everyday vocabulary use
+        - Provide specific examples of vocabulary patterns
+        For MORPHOLOGICAL and PHONOLOGICAL analysis:
+        - Count and cite EVERY morphological marker (plurals, possessives, verb tenses)
+        - Count and cite EVERY derivational morpheme (prefixes, suffixes)
+        - Identify and count phonological patterns and errors
+        - Assess syllable structure and stress patterns
+        - Provide specific examples of morphological use
+        For COGNITIVE-LINGUISTIC factors:
+        - Assess working memory demands in language production
+        - Analyze processing speed and efficiency
+        - Count and evaluate attention and focus patterns
+        - Assess executive function skills and self-monitoring
+        - Provide specific examples of cognitive-linguistic patterns
+        For FLUENCY and SPEECH RATE:
+        - Count and cite EVERY disfluency (fillers, repetitions, revisions)
+        - Calculate speech rate and variability
+        - Analyze pause patterns and their function
+        - Assess overall speech naturalness
+        - Provide specific examples of fluency patterns
+        For GRAMMAR and LANGUAGE ERRORS:
+        - Count and cite EVERY grammatical error
+        - Count and cite EVERY syntactic error
+        - Count and cite EVERY morphological error
+        - Calculate error rates and percentages
+        - Provide specific examples of error patterns
+        For WORD-FINDING and RETRIEVAL:
+        - Count and cite EVERY instance of circumlocution
+        - Count and cite EVERY incomplete thought
+        - Count and cite EVERY word-finding pause
+        - Analyze word retrieval strategies used
+        - Provide specific examples of retrieval patterns
+        For NARRATIVE and DISCOURSE:
+        - Assess narrative organization and coherence
+        - Count topic shifts and maintenance
+        - Analyze discourse markers and transitions
+        - Evaluate story structure and completeness
+        - Provide specific examples of narrative patterns
+        FORMAT REQUIREMENTS:
+        - Use clear headings for each area analyzed
+        - Include bullet points for organization
+        - Provide exact counts and percentages
+        - Cite specific quotes from transcript
+        - Include severity assessments
+        - Provide clinical implications
+        - Be comprehensive but focused on selected areas
+        Remember: This should be a DETAILED analysis that thoroughly addresses each selected area with quantification, evidence, and clinical relevance.
         """
+        result = call_claude_api_with_continuation(prompt, max_continuations=2)
+        progress_msg = "✅ Quick analysis completed" if "[Analysis completed in" in result else "🔄 Quick analysis in progress..."
+        return result, progress_msg
     # Connect event handlers
+    analyze_file_btn.click(
+        on_analyze_file,
         inputs=[file_upload, age, gender, slp_notes],
+        outputs=[output, transcript_input, analysis_progress]
+    )
+    analyze_text_btn.click(
+        on_analyze_text,
+        inputs=[text_input, age, gender, slp_notes],
+        outputs=[output, transcript_input, analysis_progress]
+    )
+    transcribe_btn.click(
+        on_transcribe_and_analyze,
+        inputs=[audio_input, age, gender, slp_notes],
+        outputs=[output, transcript_input, transcription_status]
+    )
+    transcribe_advanced_btn.click(
+        on_transcribe_advanced,
+        inputs=[transcription_file_input, enable_diarization],
+        outputs=[rich_transcript_display, transcription_status, transcription_metrics_display, transcription_word_freq_display]
     )
     targeted_analyze_btn.click(
         on_targeted_analyze,
         inputs=[transcript_input, custom_question, age, gender, slp_notes],
+        outputs=[targeted_output, targeted_progress]
     )
     question_templates.change(
     quick_analyze_btn.click(
         on_quick_analyze,
         inputs=[quick_transcript, quick_questions, age, gender, slp_notes],
+        outputs=[quick_output, quick_progress]
     )
 if __name__ == "__main__":
     print("🚀 Starting Enhanced CASL Analysis Tool...")
+    print("📊 Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis, Advanced Transcription")
+    print("🎤 Transcription: Audio/Video support with speaker diarization, sentiment, and emotion analysis")
+    print("📈 Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
     if not ANTHROPIC_API_KEY:
         print("⚠️  ANTHROPIC_API_KEY not configured - analysis will show error message")
         print("   For HuggingFace Spaces: Add ANTHROPIC_API_KEY as a secret in your space settings")
     else:
         print("✅ Claude API configured")
+    if not SPEECHBRAIN_AVAILABLE:
+        print("⚠️  SpeechBrain not available - transcription will be disabled")
+        print("   Install with: pip install speechbrain transformers torch")
+    else:
+        print("✅ SpeechBrain available for transcription")
+    if not MOVIEPY_AVAILABLE:
+        print("⚠️  MoviePy not available - video processing will be limited")
+        print("   Install with: pip install moviepy")
+    else:
+        print("✅ MoviePy available for video processing")
+    if not DIARIZATION_AVAILABLE:
+        print("⚠️  Pyannote.audio not available - speaker diarization will be disabled")
+        print("   Install with: pip install pyannote.audio")
+        print("   Note: Requires HuggingFace token for model access")
+    else:
+        print("✅ Pyannote.audio available for speaker diarization")
+    if not SENTIMENT_AVAILABLE:
+        print("⚠️  Transformers not available - sentiment/emotion analysis will be disabled")
+        print("   Install with: pip install transformers torch")
+    else:
+        print("✅ Transformers available for sentiment and emotion analysis")
     app.launch(show_api=False)

transcription_demo.py ADDED Viewed

	@@ -0,0 +1,826 @@

+import gradio as gr
+import json
+import os
+import logging
+import re
+import numpy as np
+import pandas as pd
+from datetime import datetime
+import time
+import tempfile
+from typing import Dict, List, Tuple, Optional
+import requests
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Try to import video processing libraries
+try:
+    import moviepy.editor as mp
+    MOVIEPY_AVAILABLE = True
+    logger.info("MoviePy available for video processing")
+except ImportError as e:
+    logger.warning(f"MoviePy not available: {e}")
+    MOVIEPY_AVAILABLE = False
+# Try to import speaker diarization
+try:
+    from pyannote.audio import Pipeline
+    from pyannote.audio.pipelines.utils.hook import ProgressHook
+    DIARIZATION_AVAILABLE = True
+    logger.info("Pyannote.audio available for speaker diarization")
+except ImportError as e:
+    logger.warning(f"Pyannote.audio not available: {e}")
+    DIARIZATION_AVAILABLE = False
+# Try to import SpeechBrain and HuggingFace components
+try:
+    from speechbrain.pretrained import EncoderDecoderASR, VAD, EncoderClassifier
+    from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+    import torch
+    SPEECHBRAIN_AVAILABLE = True
+    HUGGINGFACE_AVAILABLE = True
+    logger.info("SpeechBrain and HuggingFace models available")
+except ImportError as e:
+    logger.warning(f"SpeechBrain/HuggingFace not available: {e}")
+    SPEECHBRAIN_AVAILABLE = False
+    HUGGINGFACE_AVAILABLE = False
+# Initialize models if available
+asr_model = None
+vad_model = None
+sentiment_model = None
+emotion_model = None
+diarization_pipeline = None
+if SPEECHBRAIN_AVAILABLE and HUGGINGFACE_AVAILABLE:
+    try:
+        # Speech-to-text model
+        asr_model = EncoderDecoderASR.from_hparams(
+            source="speechbrain/asr-crdnn-rnnlm-librispeech",
+            savedir="pretrained_models/asr-crdnn-rnnlm-librispeech"
+        )
+        # Voice Activity Detection
+        vad_model = VAD.from_hparams(
+            source="speechbrain/vad-crdnn-libriparty",
+            savedir="pretrained_models/vad-crdnn-libriparty"
+        )
+        # Sentiment analysis
+        sentiment_model = pipeline(
+            "sentiment-analysis",
+            model="cardiffnlp/twitter-roberta-base-sentiment-latest",
+            top_k=None
+        )
+        # Emotion analysis
+        emotion_model = pipeline(
+            "text-classification",
+            model="j-hartmann/emotion-english-distilroberta-base",
+            top_k=None
+        )
+        logger.info("All models loaded successfully")
+    except Exception as e:
+        logger.error(f"Error loading models: {e}")
+        SPEECHBRAIN_AVAILABLE = False
+        HUGGINGFACE_AVAILABLE = False
+# Initialize diarization pipeline
+if DIARIZATION_AVAILABLE:
+    try:
+        # Note: You'll need to get a HuggingFace token and accept the model terms
+        # at https://huggingface.co/pyannote/speaker-diarization
+        HF_TOKEN = os.getenv("HF_TOKEN", "")
+        if HF_TOKEN:
+            diarization_pipeline = Pipeline.from_pretrained(
+                "pyannote/speaker-diarization@2.1",
+                use_auth_token=HF_TOKEN
+            )
+            logger.info("Speaker diarization pipeline loaded")
+        else:
+            logger.warning("HF_TOKEN not set - speaker diarization will be disabled")
+    except Exception as e:
+        logger.error(f"Error loading diarization pipeline: {e}")
+def extract_audio_from_video(video_path):
+    """Extract audio from video file (MP4, etc.)"""
+    if not MOVIEPY_AVAILABLE:
+        return None, "MoviePy not available for video processing"
+    try:
+        # Create temporary file for audio
+        temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+        temp_audio_path = temp_audio.name
+        temp_audio.close()
+        # Load video and extract audio
+        video = mp.VideoFileClip(video_path)
+        audio = video.audio
+        if audio is None:
+            return None, "No audio track found in video file"
+        # Export audio to temporary WAV file
+        audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
+        # Close video to free memory
+        video.close()
+        audio.close()
+        logger.info(f"Audio extracted from video: {temp_audio_path}")
+        return temp_audio_path, "Audio extracted successfully"
+    except Exception as e:
+        logger.error(f"Error extracting audio from video: {e}")
+        return None, f"Error extracting audio: {str(e)}"
+def perform_speaker_diarization(audio_path):
+    """Perform speaker diarization on audio file"""
+    if not DIARIZATION_AVAILABLE or not diarization_pipeline:
+        return None, "Speaker diarization not available"
+    try:
+        # Perform diarization
+        with ProgressHook() as hook:
+            diarization = diarization_pipeline(audio_path, hook=hook)
+        # Extract speaker segments
+        speaker_segments = []
+        for turn, _, speaker in diarization.itertracks(yield_label=True):
+            speaker_segments.append({
+                'start': turn.start,
+                'end': turn.end,
+                'speaker': speaker,
+                'duration': turn.end - turn.start
+            })
+        logger.info(f"Diarization completed: {len(speaker_segments)} segments found")
+        return speaker_segments, "Diarization completed successfully"
+    except Exception as e:
+        logger.error(f"Error in diarization: {e}")
+        return None, f"Diarization error: {str(e)}"
+def process_audio_file(file_path):
+    """Process audio file, extracting from video if needed"""
+    if not file_path:
+        return None, "No file provided"
+    file_extension = os.path.splitext(file_path)[1].lower()
+    # If it's a video file, extract audio first
+    if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
+        logger.info(f"Processing video file: {file_path}")
+        audio_path, status = extract_audio_from_video(file_path)
+        if audio_path:
+            return audio_path, f"Video processed: {status}"
+        else:
+            return None, status
+    # If it's already an audio file, use it directly
+    elif file_extension in ['.wav', '.mp3', '.m4a', '.flac', '.ogg']:
+        logger.info(f"Processing audio file: {file_path}")
+        return file_path, "Audio file ready for transcription"
+    else:
+        return None, f"Unsupported file format: {file_extension}"
+def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
+    """Transcribe audio with timestamps, sentiment, and metadata"""
+    if not audio_file:
+        return None, "No audio file provided"
+    if not SPEECHBRAIN_AVAILABLE:
+        return None, "SpeechBrain not available - using demo transcription"
+    try:
+        # Process the file (extract audio if it's a video)
+        processed_audio_path, process_status = process_audio_file(audio_file)
+        if not processed_audio_path:
+            return None, process_status
+        # Perform speaker diarization if enabled
+        speaker_segments = None
+        diarization_status = ""
+        if enable_diarization:
+            speaker_segments, diarization_status = perform_speaker_diarization(processed_audio_path)
+        # Get transcription with timestamps
+        transcript = asr_model.transcribe_file(processed_audio_path)
+        # Clean up temporary audio file if it was created from video
+        if processed_audio_path != audio_file and os.path.exists(processed_audio_path):
+            try:
+                os.unlink(processed_audio_path)
+                logger.info("Temporary audio file cleaned up")
+            except Exception as e:
+                logger.warning(f"Could not clean up temporary file: {e}")
+        # Split into sentences for analysis
+        sentences = re.split(r'[.!?]+', transcript)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        # Analyze each sentence
+        rich_transcript = []
+        current_time = 0
+        for i, sentence in enumerate(sentences):
+            # Estimate timestamp (rough approximation)
+            timestamp = current_time + (i * 2)  # Assume ~2 seconds per sentence
+            # Determine speaker for this timestamp
+            speaker = "UNKNOWN"
+            if speaker_segments:
+                for segment in speaker_segments:
+                    if segment['start'] <= timestamp <= segment['end']:
+                        speaker = segment['speaker']
+                        break
+            # Sentiment analysis
+            sentiment_result = sentiment_model(sentence)[0] if sentiment_model else None
+            sentiment = max(sentiment_result, key=lambda x: x['score']) if sentiment_result else {'label': 'neutral', 'score': 0.5}
+            # Emotion analysis
+            emotion_result = emotion_model(sentence)[0] if emotion_model else None
+            emotion = max(emotion_result, key=lambda x: x['score']) if emotion_result else {'label': 'neutral', 'score': 0.5}
+            # Word count and complexity metrics
+            words = sentence.split()
+            word_count = len(words)
+            avg_word_length = np.mean([len(word) for word in words]) if words else 0
+            # Calculate speech rate (words per minute estimate)
+            speech_rate = word_count * 30 / 60  # Rough estimate
+            rich_transcript.append({
+                'timestamp': timestamp,
+                'speaker': speaker,
+                'sentence': sentence,
+                'word_count': word_count,
+                'avg_word_length': round(avg_word_length, 2),
+                'speech_rate_wpm': round(speech_rate, 1),
+                'sentiment': sentiment['label'],
+                'sentiment_score': round(sentiment['score'], 3),
+                'emotion': emotion['label'],
+                'emotion_score': round(emotion['score'], 3)
+            })
+            current_time = timestamp
+        status_msg = f"Transcription completed successfully. {process_status}"
+        if diarization_status:
+            status_msg += f" {diarization_status}"
+        return rich_transcript, status_msg
+    except Exception as e:
+        logger.error(f"Error in transcription: {e}")
+        return None, f"Transcription error: {str(e)}"
+def format_rich_transcript(rich_transcript):
+    """Format rich transcript for display"""
+    if not rich_transcript:
+        return "No transcript data available"
+    formatted_lines = []
+    for entry in rich_transcript:
+        timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
+        line = f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}"
+        line += f" [Words: {entry['word_count']}, Rate: {entry['speech_rate_wpm']}wpm]"
+        line += f" [Sentiment: {entry['sentiment']} ({entry['sentiment_score']})]"
+        line += f" [Emotion: {entry['emotion']} ({entry['emotion_score']})]"
+        formatted_lines.append(line)
+    return '\n'.join(formatted_lines)
+def calculate_slp_metrics(rich_transcript):
+    """Calculate comprehensive SLP metrics"""
+    if not rich_transcript:
+        return {}
+    # Basic metrics
+    total_sentences = len(rich_transcript)
+    total_words = sum(entry['word_count'] for entry in rich_transcript)
+    total_duration = rich_transcript[-1]['timestamp'] if rich_transcript else 0
+    # Speaker analysis
+    speakers = {}
+    for entry in rich_transcript:
+        speaker = entry['speaker']
+        if speaker not in speakers:
+            speakers[speaker] = {
+                'sentences': 0,
+                'words': 0,
+                'sentiments': [],
+                'emotions': []
+            }
+        speakers[speaker]['sentences'] += 1
+        speakers[speaker]['words'] += entry['word_count']
+        speakers[speaker]['sentiments'].append(entry['sentiment'])
+        speakers[speaker]['emotions'].append(entry['emotion'])
+    # Word-level analysis
+    all_words = []
+    for entry in rich_transcript:
+        words = entry['sentence'].lower().split()
+        all_words.extend(words)
+    # Word frequency distribution
+    word_freq = {}
+    for word in all_words:
+        word_clean = re.sub(r'[^\w\s]', '', word)
+        if word_clean:
+            word_freq[word_clean] = word_freq.get(word_clean, 0) + 1
+    # Vocabulary diversity (Type-Token Ratio)
+    unique_words = len(set(all_words))
+    ttr = unique_words / total_words if total_words > 0 else 0
+    # Speech rate analysis
+    speech_rates = [entry['speech_rate_wpm'] for entry in rich_transcript]
+    avg_speech_rate = np.mean(speech_rates) if speech_rates else 0
+    # Sentiment analysis
+    sentiment_counts = {}
+    emotion_counts = {}
+    for entry in rich_transcript:
+        sentiment_counts[entry['sentiment']] = sentiment_counts.get(entry['sentiment'], 0) + 1
+        emotion_counts[entry['emotion']] = emotion_counts.get(entry['emotion'], 0) + 1
+    # Sentence complexity
+    sentence_lengths = [entry['word_count'] for entry in rich_transcript]
+    avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
+    # Pause analysis (gaps between sentences)
+    pauses = []
+    for i in range(1, len(rich_transcript)):
+        pause = rich_transcript[i]['timestamp'] - rich_transcript[i-1]['timestamp']
+        pauses.append(pause)
+    avg_pause_duration = np.mean(pauses) if pauses else 0
+    return {
+        'total_sentences': total_sentences,
+        'total_words': total_words,
+        'total_duration_seconds': total_duration,
+        'unique_words': unique_words,
+        'type_token_ratio': round(ttr, 3),
+        'avg_sentence_length': round(avg_sentence_length, 1),
+        'avg_speech_rate_wpm': round(avg_speech_rate, 1),
+        'avg_pause_duration': round(avg_pause_duration, 1),
+        'sentiment_distribution': sentiment_counts,
+        'emotion_distribution': emotion_counts,
+        'word_frequency': dict(sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]),
+        'speech_rate_variability': round(np.std(speech_rates), 1) if speech_rates else 0,
+        'speakers': speakers,
+        'speaker_count': len(speakers)
+    }
+def generate_comprehensive_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes=""):
+    """Generate comprehensive analysis prompt using rich transcript data"""
+    # Format rich transcript with timestamps and metadata
+    transcript_lines = []
+    for entry in rich_transcript:
+        timestamp_str = f"{int(entry['timestamp']//60):02d}:{int(entry['timestamp']%60):02d}"
+        transcript_lines.append(f"[{timestamp_str}] *{entry['speaker']}: {entry['sentence']}")
+    transcript_text = '\n'.join(transcript_lines)
+    # Format metrics for analysis
+    metrics_text = f"""
+TRANSCRIPT METRICS:
+• Total sentences: {metrics['total_sentences']}
+• Total words: {metrics['total_words']}
+• Duration: {metrics['total_duration_seconds']:.1f} seconds
+• Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
+• Average sentence length: {metrics['avg_sentence_length']} words
+• Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
+• Speech rate variability: {metrics['speech_rate_variability']} wpm
+• Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
+• Number of speakers: {metrics['speaker_count']}
+SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
+EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
+SPEAKER ANALYSIS:"""
+    for speaker, data in metrics['speakers'].items():
+        metrics_text += f"\n• {speaker}: {data['sentences']} sentences, {data['words']} words"
+    metrics_text += f"\n\nMOST FREQUENT WORDS: {list(metrics['word_frequency'].keys())[:10]}"
+    notes_section = f"\nSLP CLINICAL NOTES:\n{slp_notes}" if slp_notes else ""
+    prompt = f"""
+You are a speech-language pathologist conducting a comprehensive analysis of a speech transcript with rich temporal and affective metadata.
+PATIENT: {age}-year-old {gender}
+{metrics_text}
+TRANSCRIPT WITH TIMESTAMPS AND METADATA:
+{transcript_text}{notes_section}
+Please provide a comprehensive analysis including:
+1. TEMPORAL SPEECH PATTERNS:
+   - Analyze speech rate changes over time using timestamps
+   - Identify patterns in pause duration and frequency
+   - Assess temporal consistency in speech production
+   - Note any significant changes in speech patterns throughout the session
+2. AFFECTIVE AND EMOTIONAL ANALYSIS:
+   - Analyze sentiment patterns throughout the transcript using timestamp data
+   - Identify emotional shifts and their potential causes
+   - Assess emotional regulation and expression
+   - Note any correlations between emotional state and speech characteristics
+3. SPEAKER-SPECIFIC ANALYSIS (if multiple speakers):
+   - Compare speech patterns between speakers
+   - Analyze turn-taking patterns and timing
+   - Assess interaction dynamics
+   - Note speaker-specific emotional and sentiment patterns
+4. SPEECH FLUENCY AND RATE ANALYSIS:
+   - Analyze speech rate variability using the provided metrics
+   - Identify periods of fluent vs. dysfluent speech
+   - Assess the impact of emotional state on speech rate
+   - Note any temporal patterns in speech rate changes
+5. LANGUAGE COMPLEXITY ASSESSMENT:
+   - Analyze vocabulary diversity using Type-Token Ratio
+   - Assess sentence complexity and variety
+   - Identify patterns in word frequency and usage
+   - Note any temporal changes in language complexity
+6. COMPLEX SENTENCE ANALYSIS:
+   - Count and analyze use of coordinating conjunctions (and, but, or, so, yet, for, nor)
+   - Count and analyze use of subordinating conjunctions (because, although, while, since, if, when, where, that, which, who, whom, whose)
+   - Identify compound, complex, and compound-complex sentences
+   - Assess sentence variety and complexity level for age
+7. FIGURATIVE LANGUAGE ANALYSIS:
+   - Identify and count similes (comparisons using "like" or "as")
+   - Identify and count metaphors (direct comparisons without "like" or "as")
+   - Identify and count idioms (common expressions with non-literal meanings)
+   - Assess figurative language comprehension and use for age
+8. CLINICAL IMPLICATIONS:
+   - Specific intervention targets based on temporal patterns
+   - Recommendations for emotional regulation if needed
+   - Suggestions for improving speech rate consistency
+   - Strategies for enhancing language complexity
+   - Age-appropriate development recommendations
+9. COMPREHENSIVE SUMMARY:
+   - Overall communication profile with temporal considerations
+   - Assessment of emotional and affective communication
+   - Developmental appropriateness considering age
+   - Prognosis and treatment priorities
+Use the temporal data, sentiment scores, and emotional labels to provide insights that would not be possible with a simple transcript. Reference specific timestamps and emotional states when making observations.
+"""
+    return prompt
+def analyze_rich_transcript_with_llm(rich_transcript, age, gender, slp_notes=""):
+    """Analyze rich transcript using LLM with comprehensive metadata"""
+    if not rich_transcript:
+        return "No transcript data available for analysis."
+    # Calculate SLP metrics
+    metrics = calculate_slp_metrics(rich_transcript)
+    # Generate comprehensive analysis prompt
+    prompt = generate_comprehensive_analysis_prompt(rich_transcript, metrics, age, gender, slp_notes)
+    # Get analysis from Claude API
+    if ANTHROPIC_API_KEY:
+        result = call_claude_api(prompt)
+    else:
+        result = generate_demo_analysis(rich_transcript, metrics)
+    return result
+def call_claude_api(prompt):
+    """Call Claude API directly"""
+    if not ANTHROPIC_API_KEY:
+        return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
+    try:
+        headers = {
+            "Content-Type": "application/json",
+            "x-api-key": ANTHROPIC_API_KEY,
+            "anthropic-version": "2023-06-01"
+        }
+        data = {
+            "model": "claude-3-5-sonnet-20241022",
+            "max_tokens": 4096,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        }
+        response = requests.post(
+            "https://api.anthropic.com/v1/messages",
+            headers=headers,
+            json=data,
+            timeout=60
+        )
+        if response.status_code == 200:
+            response_json = response.json()
+            return response_json['content'][0]['text']
+        else:
+            logger.error(f"Claude API error: {response.status_code} - {response.text}")
+            return f"❌ Claude API Error: {response.status_code}"
+    except Exception as e:
+        logger.error(f"Error calling Claude API: {str(e)}")
+        return f"❌ Error: {str(e)}"
+def generate_demo_analysis(rich_transcript, metrics):
+    """Generate demo analysis when API is not available"""
+    return f"""## Comprehensive SLP Analysis with Temporal and Affective Data
+### TEMPORAL SPEECH PATTERNS
+**Speech Rate Analysis**: {metrics['avg_speech_rate_wpm']} words per minute (variability: {metrics['speech_rate_variability']} wpm)
+- Speech rate appears {'within normal limits' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'below typical range' if metrics['avg_speech_rate_wpm'] < 120 else 'above typical range'}
+- Variability suggests {'consistent' if metrics['speech_rate_variability'] < 20 else 'variable'} speech patterns
+**Pause Analysis**: Average pause duration of {metrics['avg_pause_duration']:.1f} seconds
+- {'Appropriate' if 0.5 <= metrics['avg_pause_duration'] <= 2.0 else 'Short' if metrics['avg_pause_duration'] < 0.5 else 'Long'} pauses between utterances
+### AFFECTIVE AND EMOTIONAL ANALYSIS
+**Sentiment Distribution**: {metrics['sentiment_distribution']}
+**Emotion Distribution**: {metrics['emotion_distribution']}
+The emotional patterns suggest {'positive' if 'positive' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['positive'] > 2 else 'neutral' if 'neutral' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['neutral'] > 2 else 'mixed'} emotional expression throughout the session.
+### LANGUAGE COMPLEXITY
+**Vocabulary Diversity**: Type-Token Ratio of {metrics['type_token_ratio']}
+- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited' if metrics['type_token_ratio'] < 0.3 else 'Moderate'} vocabulary diversity
+**Sentence Structure**: Average {metrics['avg_sentence_length']} words per sentence
+- Sentence length appears {'age-appropriate' if 5 <= metrics['avg_sentence_length'] <= 12 else 'below age expectations' if metrics['avg_sentence_length'] < 5 else 'above age expectations'}
+**Most Frequent Words**: {', '.join(list(metrics['word_frequency'].keys())[:5])}
+### SPEAKER ANALYSIS
+**Number of Speakers**: {metrics['speaker_count']}
+{chr(10).join([f"• {speaker}: {data['sentences']} sentences, {data['words']} words" for speaker, data in metrics['speakers'].items()])}
+### CLINICAL IMPLICATIONS
+Based on the temporal and affective analysis, this patient shows:
+- {'Good' if metrics['type_token_ratio'] > 0.4 else 'Limited'} vocabulary diversity
+- {'Appropriate' if 120 <= metrics['avg_speech_rate_wpm'] <= 180 else 'Atypical'} speech rate
+- {'Consistent' if metrics['speech_rate_variability'] < 20 else 'Variable'} speech patterns
+- {'Positive' if 'positive' in metrics['sentiment_distribution'] and metrics['sentiment_distribution']['positive'] > 2 else 'Neutral'} emotional expression
+### RECOMMENDATIONS
+1. Focus on vocabulary expansion if TTR < 0.4
+2. Address speech rate if outside normal range
+3. Work on sentence complexity if below age expectations
+4. Consider emotional regulation strategies based on sentiment patterns
+5. Monitor temporal patterns in speech rate and fluency"""
+def create_transcription_interface():
+    """Create the transcription-focused Gradio interface"""
+    with gr.Blocks(title="Advanced Transcription Tool", theme=gr.themes.Soft()) as app:
+        gr.Markdown("# 🎤 Advanced Transcription Tool")
+        gr.Markdown("Transcribe audio/video with speaker diarization, timestamps, sentiment analysis, and comprehensive LLM analysis")
+        with gr.Tabs():
+            # Audio/Video Upload & Transcription Tab
+            with gr.Tab("🎤 Audio/Video Transcription"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### File Upload")
+                        gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
+                        file_input = gr.File(
+                            label="Upload Audio or Video File",
+                            file_types=["audio", "video"]
+                        )
+                        enable_diarization = gr.Checkbox(
+                            label="Enable Speaker Diarization",
+                            value=True,
+                            info="Identify different speakers in the audio"
+                        )
+                        transcribe_btn = gr.Button(
+                            "🎤 Transcribe File",
+                            variant="primary",
+                            size="lg"
+                        )
+                        transcription_status = gr.Markdown("")
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Rich Transcript with Metadata")
+                        rich_transcript_display = gr.Textbox(
+                            label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
+                            lines=15,
+                            max_lines=20
+                        )
+            # Analysis Tab
+            with gr.Tab("📊 LLM Analysis"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Patient Information")
+                        with gr.Row():
+                            age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
+                            gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
+                        slp_notes = gr.Textbox(
+                            label="SLP Clinical Notes (Optional)",
+                            placeholder="Enter additional clinical observations...",
+                            lines=3
+                        )
+                        analyze_btn = gr.Button(
+                            "🔍 Analyze with LLM",
+                            variant="primary",
+                            size="lg"
+                        )
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Comprehensive LLM Analysis")
+                        analysis_output = gr.Textbox(
+                            label="LLM Analysis Report",
+                            lines=25,
+                            max_lines=30
+                        )
+            # Metrics Tab
+            with gr.Tab("📈 Speech Metrics"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### Quantitative Speech Metrics")
+                        metrics_display = gr.Textbox(
+                            label="SLP Metrics",
+                            lines=15,
+                            max_lines=20
+                        )
+                    with gr.Column():
+                        gr.Markdown("### Word Frequency Analysis")
+                        word_freq_display = gr.Dataframe(
+                            headers=["Word", "Frequency"],
+                            label="Most Frequent Words",
+                            interactive=False
+                        )
+            # Raw Data Tab
+            with gr.Tab("📊 Raw Data"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### JSON Data")
+                        json_display = gr.Textbox(
+                            label="Raw JSON Data",
+                            lines=20,
+                            max_lines=25
+                        )
+        # Event handlers
+        def on_transcribe(file, diarization_enabled):
+            """Handle file transcription"""
+            if not file:
+                return "", "", "", "Please upload a file first."
+            rich_transcript, status = transcribe_audio_with_metadata(file.name, diarization_enabled)
+            if rich_transcript:
+                formatted = format_rich_transcript(rich_transcript)
+                metrics = calculate_slp_metrics(rich_transcript)
+                # Format metrics for display
+                metrics_text = f"""SPEECH METRICS:
+• Total sentences: {metrics['total_sentences']}
+• Total words: {metrics['total_words']}
+• Duration: {metrics['total_duration_seconds']:.1f} seconds
+• Type-Token Ratio: {metrics['type_token_ratio']} (vocabulary diversity)
+• Average sentence length: {metrics['avg_sentence_length']} words
+• Average speech rate: {metrics['avg_speech_rate_wpm']} words per minute
+• Speech rate variability: {metrics['speech_rate_variability']} wpm
+• Average pause duration: {metrics['avg_pause_duration']:.1f} seconds
+• Number of speakers: {metrics['speaker_count']}
+SENTIMENT DISTRIBUTION: {metrics['sentiment_distribution']}
+EMOTION DISTRIBUTION: {metrics['emotion_distribution']}
+SPEAKER ANALYSIS:"""
+                for speaker, data in metrics['speakers'].items():
+                    metrics_text += f"\n• {speaker}: {data['sentences']} sentences, {data['words']} words"
+                # Create word frequency dataframe
+                word_freq_data = [[word, freq] for word, freq in list(metrics['word_frequency'].items())[:20]]
+                # JSON data
+                json_data = json.dumps(rich_transcript, indent=2)
+                return formatted, metrics_text, word_freq_data, status
+            else:
+                return "", "", [], status
+        def on_analyze(rich_transcript_text, age_val, gender_val, notes):
+            """Handle LLM analysis"""
+            if not rich_transcript_text or rich_transcript_text == "No transcript data available":
+                return "Please transcribe audio first."
+            # Convert formatted text back to rich transcript structure
+            lines = rich_transcript_text.split('\n')
+            rich_transcript = []
+            for i, line in enumerate(lines):
+                if line.strip():
+                    # Extract data from the formatted line
+                    timestamp_match = re.search(r'\[(\d{2}:\d{2})\]', line)
+                    speaker_match = re.search(r'\*(\w+):', line)
+                    sentence_match = re.search(r'\*\w+:\s*(.+?)(?=\s*\[|$)', line)
+                    if timestamp_match and speaker_match and sentence_match:
+                        timestamp_str = timestamp_match.group(1)
+                        minutes, seconds = map(int, timestamp_str.split(':'))
+                        timestamp = minutes * 60 + seconds
+                        speaker = speaker_match.group(1)
+                        sentence = sentence_match.group(1).strip()
+                        rich_transcript.append({
+                            'timestamp': timestamp,
+                            'speaker': speaker,
+                            'sentence': sentence,
+                            'word_count': len(sentence.split()),
+                            'avg_word_length': np.mean([len(word) for word in sentence.split()]) if sentence.split() else 0,
+                            'speech_rate_wpm': 120.0,
+                            'sentiment': 'neutral',
+                            'sentiment_score': 0.5,
+                            'emotion': 'neutral',
+                            'emotion_score': 0.5
+                        })
+            return analyze_rich_transcript_with_llm(rich_transcript, age_val, gender_val, notes)
+        # Connect event handlers
+        transcribe_btn.click(
+            on_transcribe,
+            inputs=[file_input, enable_diarization],
+            outputs=[rich_transcript_display, metrics_display, word_freq_display, transcription_status]
+        )
+        analyze_btn.click(
+            on_analyze,
+            inputs=[rich_transcript_display, age, gender, slp_notes],
+            outputs=[analysis_output]
+        )
+    return app
+if __name__ == "__main__":
+    print("🚀 Starting Advanced Transcription Tool...")
+    if not MOVIEPY_AVAILABLE:
+        print("⚠️  MoviePy not available - video processing will be limited")
+        print("   Install with: pip install moviepy")
+    else:
+        print("✅ MoviePy available for video processing")
+    if not DIARIZATION_AVAILABLE:
+        print("⚠️  Pyannote.audio not available - speaker diarization will be disabled")
+        print("   Install with: pip install pyannote.audio")
+    else:
+        print("✅ Pyannote.audio available for speaker diarization")
+        if not os.getenv("HF_TOKEN"):
+            print("⚠️  HF_TOKEN not set - set it to enable speaker diarization")
+            print("   Get token from: https://huggingface.co/settings/tokens")
+            print("   Accept model terms at: https://huggingface.co/pyannote/speaker-diarization")
+    if not SPEECHBRAIN_AVAILABLE:
+        print("⚠️  SpeechBrain not available - audio transcription will use demo mode")
+        print("   Install with: pip install speechbrain transformers torch")
+    else:
+        print("✅ SpeechBrain and HuggingFace models loaded")
+    app = create_transcription_interface()
+    app.launch(show_api=False)