Spaces:
Running
Running
File size: 4,194 Bytes
9e9bc6b cb48bd4 9e9bc6b 069f0a0 58ff8fb cb48bd4 fcc601a cb48bd4 7fab6d4 cb48bd4 7fab6d4 cb48bd4 7fab6d4 fcc601a 7fab6d4 cb48bd4 7fab6d4 9e9bc6b 069f0a0 9e9bc6b 069f0a0 3aa91e9 cb48bd4 9e9bc6b cb48bd4 9e9bc6b 3aa91e9 cb48bd4 3aa91e9 cb48bd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# ============== LLM CONFIGURATION ==============
# Provider: "openai", "anthropic", or "huggingface"
LLM_PROVIDER=openai
# API Keys (at least one required for full LLM analysis)
OPENAI_API_KEY=sk-your-key-here
ANTHROPIC_API_KEY=sk-ant-your-key-here
# Model names (optional - sensible defaults set in config.py)
# OPENAI_MODEL=gpt-5.1
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
# ============== HUGGINGFACE CONFIGURATION ==============
# HuggingFace Token - enables gated models and higher rate limits
# Get yours at: https://huggingface.co/settings/tokens
#
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
#
# For HuggingFace Spaces deployment:
# Set this as a "Secret" in Space Settings -> Variables and secrets
# Users/judges don't need their own token - the Space secret is used
#
HF_TOKEN=hf_your-token-here
# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
# Default HuggingFace model for inference (gated, requires auth)
# Can be overridden in UI dropdown
# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
# Fallback models for HuggingFace Inference API (comma-separated)
# Models are tried in order until one succeeds
# Format: model1,model2,model3
# Latest reasoning models first, then reliable fallbacks
# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
# Override model/provider selection (optional, usually set via UI)
# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
# HF_PROVIDER=hyperbolic
# ============== EMBEDDING CONFIGURATION ==============
# Embedding Provider: "openai", "local", or "huggingface"
# Default: "local" (no API key required)
EMBEDDING_PROVIDER=local
# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# ============== AGENT CONFIGURATION ==============
MAX_ITERATIONS=10
SEARCH_TIMEOUT=30
LOG_LEVEL=INFO
# Graph-based execution (experimental)
# USE_GRAPH_EXECUTION=false
# Budget & Rate Limiting
# DEFAULT_TOKEN_LIMIT=100000
# DEFAULT_TIME_LIMIT_MINUTES=10
# DEFAULT_ITERATIONS_LIMIT=10
# ============== WEB SEARCH CONFIGURATION ==============
# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
# Default: "duckduckgo" (no API key required)
WEB_SEARCH_PROVIDER=duckduckgo
# Serper API Key (for Google search via Serper)
# SERPER_API_KEY=your-serper-key-here
# SearchXNG Host URL (for self-hosted search)
# SEARCHXNG_HOST=http://localhost:8080
# Brave Search API Key
# BRAVE_API_KEY=your-brave-key-here
# Tavily API Key
# TAVILY_API_KEY=your-tavily-key-here
# ============== EXTERNAL SERVICES ==============
# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
NCBI_API_KEY=your-ncbi-key-here
# Modal (optional - for secure code execution sandbox)
# MODAL_TOKEN_ID=your-modal-token-id
# MODAL_TOKEN_SECRET=your-modal-token-secret
# ============== VECTOR DATABASE (ChromaDB) ==============
# ChromaDB storage path
CHROMA_DB_PATH=./chroma_db
# Persist ChromaDB to disk (default: true)
# CHROMA_DB_PERSIST=true
# Remote ChromaDB server (optional)
# CHROMA_DB_HOST=localhost
# CHROMA_DB_PORT=8000
# ============== RAG SERVICE CONFIGURATION ==============
# ChromaDB collection name for RAG
# RAG_COLLECTION_NAME=deepcritical_evidence
# Number of top results to retrieve from RAG
# RAG_SIMILARITY_TOP_K=5
# Automatically ingest evidence into RAG
# RAG_AUTO_INGEST=true
|