""" Clinical Trial API - Option B (Simplified) =========================================== Clean foundational RAG with single LLM query parser Architecture: 1. Query Parser LLM (Llama-70B) - 3s, $0.001 2. RAG Search (BM25 + Semantic + Inverted Index) - 2s, free 3. 355M Perplexity Ranking - 2-5s, free 4. Structured JSON Output - instant, free Total: ~7-10s per query, $0.001 cost No response generation - clients use their own LLMs """ from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import time import logging # Import Option B pipeline import foundation_rag_optionB as rag logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI( title="Clinical Trial API - Option B", description="Foundational RAG API with query parser LLM + perplexity ranking", version="2.0.0", docs_url="/docs", redoc_url="/redoc" ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ============================================================================ # REQUEST/RESPONSE MODELS # ============================================================================ class SearchRequest(BaseModel): query: str top_k: int = 10 class Config: schema_extra = { "example": { "query": "What trials exist for ianalumab in Sjogren's syndrome?", "top_k": 10 } } class HealthResponse(BaseModel): status: str trials_loaded: int embeddings_loaded: bool api_version: str architecture: str # ============================================================================ # STARTUP # ============================================================================ @app.on_event("startup") async def startup_event(): """Initialize RAG system on startup""" logger.info("=" * 70) logger.info("CLINICAL TRIAL API - OPTION B") logger.info("=" * 70) logger.info("Loading RAG data...") try: rag.load_all_data() logger.info("=" * 70) logger.info("✓ API READY - Option B Architecture Active") logger.info("=" * 70) except Exception as e: logger.error(f"!!! Failed to load data: {e}") logger.error("!!! API will start but queries will fail") # ============================================================================ # ENDPOINTS # ============================================================================ @app.get("/") async def root(): """API information""" return { "service": "Clinical Trial API - Option B", "version": "2.0.0", "architecture": "1 LLM (Query Parser) + RAG + 355M Perplexity Ranking", "status": "healthy", "endpoints": { "POST /search": "Search clinical trials with structured JSON output", "GET /health": "Health check", "GET /docs": "Interactive API documentation (Swagger UI)", "GET /redoc": "Alternative API documentation (ReDoc)" }, "pipeline": [ "1. Query Parser LLM (Llama-70B) → Extract entities + synonyms (3s, $0.001)", "2. RAG Search (BM25 + Semantic + Inverted Index) → Retrieve (2s, free)", "3. 355M Perplexity Ranking → Rank by relevance (2-5s, free)", "4. Structured JSON Output → Return ranked trials (instant, free)" ], "performance": { "average_latency": "7-10 seconds", "cost_per_query": "$0.001", "no_response_generation": "Clients handle text generation with their own LLMs" } } @app.get("/health", response_model=HealthResponse) async def health_check(): """Health check endpoint""" embeddings_loaded = rag.doc_embeddings is not None chunks_loaded = len(rag.doc_chunks) if rag.doc_chunks else 0 return HealthResponse( status="healthy" if embeddings_loaded else "degraded", trials_loaded=chunks_loaded, embeddings_loaded=embeddings_loaded, api_version="2.0.0", architecture="Option B: Query Parser LLM + RAG + 355M Ranking" ) @app.post("/search") async def search_trials(request: SearchRequest): """ Search clinical trials using Option B pipeline **Pipeline:** 1. **Query Parser LLM** - Extracts entities (drugs, diseases, companies, endpoints) and expands with synonyms using Llama-70B 2. **RAG Search** - Hybrid search using BM25 + semantic embeddings + inverted index 3. **355M Perplexity Ranking** - Re-ranks using Clinical Trial GPT perplexity scores 4. **Structured JSON Output** - Returns ranked trials with all metadata **No Response Generation** - Returns raw trial data for client-side processing Args: - **query**: Your question about clinical trials - **top_k**: Number of trials to return (default: 10, max: 50) Returns: - Structured JSON with ranked trials - Query analysis (extracted entities, optimized search terms) - Benchmarking data (timing breakdown) - Trial metadata (NCT ID, title, status, phase, etc.) - Scoring details (relevance, perplexity, rank changes) **Example Query:** ``` { "query": "What trials exist for ianalumab in Sjogren's syndrome?", "top_k": 10 } ``` **Example Response:** ``` { "query": "What trials exist for ianalumab in Sjogren's syndrome?", "processing_time": 8.2, "query_analysis": { "extracted_entities": { "drugs": ["ianalumab", "VAY736"], "diseases": ["Sjogren's syndrome", "Sjögren's disease"], "companies": [], "endpoints": [] }, "optimized_search": "ianalumab VAY736 Sjogren's syndrome sjögren", "parsing_time": 3.1 }, "results": { "total_found": 30, "returned": 10, "top_relevance_score": 0.923 }, "trials": [ { "nct_id": "NCT02962895", "title": "Phase 2 Study of Ianalumab in Sjögren's Syndrome", "status": "Completed", "phase": "Phase 2", "conditions": "Sjögren's Syndrome", "interventions": "Ianalumab (VAY736)", "sponsor": "Novartis", "scoring": { "relevance_score": 0.923, "perplexity": 12.4, "rank_before_355m": 2, "rank_after_355m": 1 }, "url": "https://clinicaltrials.gov/study/NCT02962895" } ], "benchmarking": { "query_parsing_time": 3.1, "rag_search_time": 2.3, "355m_ranking_time": 2.8, "total_processing_time": 8.2 } } ``` """ try: logger.info(f"[SEARCH] Query: {request.query[:100]}...") # Validate top_k if request.top_k > 50: logger.warning(f"[SEARCH] top_k={request.top_k} exceeds max 50, capping") request.top_k = 50 elif request.top_k < 1: logger.warning(f"[SEARCH] top_k={request.top_k} invalid, using default 10") request.top_k = 10 start_time = time.time() # Process with Option B pipeline result = rag.process_query_option_b(request.query, top_k=request.top_k) processing_time = time.time() - start_time logger.info(f"[SEARCH] ✓ Completed in {processing_time:.2f}s") # Ensure processing_time is set if 'processing_time' not in result or result['processing_time'] == 0: result['processing_time'] = processing_time return result except Exception as e: logger.error(f"[SEARCH] Error: {str(e)}") import traceback return { "error": str(e), "traceback": traceback.format_exc(), "query": request.query, "processing_time": time.time() - start_time if 'start_time' in locals() else 0 } # ============================================================================ # RUN SERVER # ============================================================================ if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)