""" Test Option B System with Physician Query Tests: "what should a physician considering prescribing ianalumab for sjogren's disease know" """ import os import sys import json import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Check if HF_TOKEN is set if not os.getenv("HF_TOKEN"): logger.warning("⚠️ HF_TOKEN not set! Query parsing will fail.") logger.warning(" Set it with: export HF_TOKEN=your_token_here") logger.warning(" Continuing with limited functionality...") try: # Try to use the existing foundation_engine which has download capability logger.info("Loading foundation_engine (with auto-download)...") import foundation_engine logger.info("=" * 80) logger.info("TESTING OPTION B SYSTEM") logger.info("=" * 80) # Load data (will auto-download if needed) logger.info("Loading RAG data (will download from HF if needed)...") foundation_engine.load_embeddings() logger.info("=" * 80) logger.info("DATA LOADED SUCCESSFULLY") logger.info("=" * 80) logger.info(f"✓ Trials loaded: {len(foundation_engine.doc_chunks):,}") logger.info(f"✓ Embeddings shape: {foundation_engine.doc_embeddings.shape if foundation_engine.doc_embeddings is not None else 'None'}") logger.info(f"✓ Inverted index terms: {len(foundation_engine.inverted_index):,}" if foundation_engine.inverted_index else "None") # Test query test_query = "what should a physician considering prescribing ianalumab for sjogren's disease know" logger.info("=" * 80) logger.info(f"TEST QUERY: {test_query}") logger.info("=" * 80) # Use the structured query processor (Option B!) logger.info("Processing with Option B pipeline...") result = foundation_engine.process_query_structured(test_query, top_k=5) logger.info("=" * 80) logger.info("RESULTS") logger.info("=" * 80) # Print timing breakdown if 'benchmarking' in result: bench = result['benchmarking'] logger.info(f"\n⏱️ PERFORMANCE:") logger.info(f" Query Parsing: {bench.get('query_parsing_time', 0):.2f}s") logger.info(f" RAG Search: {bench.get('rag_search_time', 0):.2f}s") logger.info(f" 355M Ranking: {bench.get('355m_ranking_time', 0):.2f}s") logger.info(f" TOTAL: {result.get('processing_time', 0):.2f}s") # Print query analysis if 'query_analysis' in result: qa = result['query_analysis'] logger.info(f"\n🔍 QUERY ANALYSIS:") entities = qa.get('extracted_entities', {}) logger.info(f" Drugs: {entities.get('drugs', [])}") logger.info(f" Diseases: {entities.get('diseases', [])}") logger.info(f" Companies: {entities.get('companies', [])}") logger.info(f" Endpoints: {entities.get('endpoints', [])}") logger.info(f" Optimized: {qa.get('optimized_search', 'N/A')}") # Print results summary if 'results' in result: res = result['results'] logger.info(f"\n📊 SEARCH RESULTS:") logger.info(f" Total Found: {res.get('total_found', 0)}") logger.info(f" Returned: {res.get('returned', 0)}") logger.info(f" Top Relevance: {res.get('top_relevance_score', 0):.3f}") # Print top trials if 'trials' in result and len(result['trials']) > 0: logger.info(f"\n🏥 TOP TRIALS:\n") for i, trial in enumerate(result['trials'][:5], 1): logger.info(f"{i}. NCT ID: {trial['nct_id']}") logger.info(f" Title: {trial.get('title', 'N/A')}") logger.info(f" Status: {trial.get('status', 'N/A')}") logger.info(f" Phase: {trial.get('phase', 'N/A')}") if 'scoring' in trial: scoring = trial['scoring'] logger.info(f" Scoring:") logger.info(f" Relevance: {scoring.get('relevance_score', 0):.3f}") logger.info(f" Perplexity: {scoring.get('perplexity', 'N/A')}") logger.info(f" Rank before: {scoring.get('rank_before_355m', 'N/A')}") logger.info(f" Rank after: {scoring.get('rank_after_355m', 'N/A')}") rank_change = "" if scoring.get('rank_before_355m') and scoring.get('rank_after_355m'): change = scoring['rank_before_355m'] - scoring['rank_after_355m'] if change > 0: rank_change = f" (↑ improved by {change})" elif change < 0: rank_change = f" (↓ dropped by {-change})" else: rank_change = " (→ no change)" logger.info(f" Impact: {rank_change}") logger.info(f" URL: {trial.get('url', 'N/A')}") logger.info("") # Save full results to JSON output_file = "test_results_option_b.json" with open(output_file, 'w') as f: json.dump(result, f, indent=2) logger.info(f"💾 Full results saved to: {output_file}") logger.info("=" * 80) logger.info("TEST COMPLETED SUCCESSFULLY ✅") logger.info("=" * 80) # Print what a physician should know logger.info("\n📋 SUMMARY FOR PHYSICIAN:") logger.info(" Based on the ranked trials, here's what the API returns:") logger.info(f" - Found {result['results']['returned']} relevant trials") logger.info(f" - Top trial has {result['results']['top_relevance_score']:.1%} relevance") logger.info("") logger.info(" ⚠️ NOTE: This API returns STRUCTURED DATA only") logger.info(" The chatbot company would use their LLM to generate a response like:") logger.info("") logger.info(" 'Based on clinical trial data, physicians prescribing ianalumab") logger.info(" for Sjögren's disease should know:'") logger.info(f" '- {len(result['trials'])} clinical trials are available'") if result['trials']: trial = result['trials'][0] logger.info(f" '- Primary trial: {trial.get('title', 'N/A')}'") logger.info(f" '- Status: {trial.get('status', 'N/A')}'") logger.info(f" '- Phase: {trial.get('phase', 'N/A')}'") logger.info("") logger.info(" The client's LLM would generate this response using the JSON data.") logger.info("") except ImportError as e: logger.error(f"❌ Import failed: {e}") logger.error(" Make sure you're in the correct directory with foundation_engine.py") sys.exit(1) except Exception as e: logger.error(f"❌ Test failed: {e}") import traceback logger.error(traceback.format_exc()) sys.exit(1)