File size: 4,194 Bytes
9e9bc6b
 
cb48bd4
9e9bc6b
 
 
069f0a0
 
 
58ff8fb
 
cb48bd4
fcc601a
cb48bd4
7fab6d4
cb48bd4
7fab6d4
cb48bd4
 
 
7fab6d4
 
fcc601a
7fab6d4
 
 
cb48bd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fab6d4
9e9bc6b
069f0a0
 
9e9bc6b
069f0a0
3aa91e9
cb48bd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e9bc6b
 
cb48bd4
9e9bc6b
3aa91e9
cb48bd4
 
 
 
 
 
 
3aa91e9
cb48bd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# ============== LLM CONFIGURATION ==============

# Provider: "openai", "anthropic", or "huggingface"
LLM_PROVIDER=openai

# API Keys (at least one required for full LLM analysis)
OPENAI_API_KEY=sk-your-key-here
ANTHROPIC_API_KEY=sk-ant-your-key-here

# Model names (optional - sensible defaults set in config.py)
# OPENAI_MODEL=gpt-5.1
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929

# ============== HUGGINGFACE CONFIGURATION ==============

# HuggingFace Token - enables gated models and higher rate limits
# Get yours at: https://huggingface.co/settings/tokens
# 
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
#
# For HuggingFace Spaces deployment:
#   Set this as a "Secret" in Space Settings -> Variables and secrets
#   Users/judges don't need their own token - the Space secret is used
#
HF_TOKEN=hf_your-token-here
# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)

# Default HuggingFace model for inference (gated, requires auth)
# Can be overridden in UI dropdown
# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking

# Fallback models for HuggingFace Inference API (comma-separated)
# Models are tried in order until one succeeds
# Format: model1,model2,model3
# Latest reasoning models first, then reliable fallbacks
# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct

# Override model/provider selection (optional, usually set via UI)
# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
# HF_PROVIDER=hyperbolic

# ============== EMBEDDING CONFIGURATION ==============

# Embedding Provider: "openai", "local", or "huggingface"
# Default: "local" (no API key required)
EMBEDDING_PROVIDER=local

# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
OPENAI_EMBEDDING_MODEL=text-embedding-3-small

# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5

# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2

# ============== AGENT CONFIGURATION ==============

MAX_ITERATIONS=10
SEARCH_TIMEOUT=30
LOG_LEVEL=INFO

# Graph-based execution (experimental)
# USE_GRAPH_EXECUTION=false

# Budget & Rate Limiting
# DEFAULT_TOKEN_LIMIT=100000
# DEFAULT_TIME_LIMIT_MINUTES=10
# DEFAULT_ITERATIONS_LIMIT=10

# ============== WEB SEARCH CONFIGURATION ==============

# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
# Default: "duckduckgo" (no API key required)
WEB_SEARCH_PROVIDER=duckduckgo

# Serper API Key (for Google search via Serper)
# SERPER_API_KEY=your-serper-key-here

# SearchXNG Host URL (for self-hosted search)
# SEARCHXNG_HOST=http://localhost:8080

# Brave Search API Key
# BRAVE_API_KEY=your-brave-key-here

# Tavily API Key
# TAVILY_API_KEY=your-tavily-key-here

# ============== EXTERNAL SERVICES ==============

# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
NCBI_API_KEY=your-ncbi-key-here

# Modal (optional - for secure code execution sandbox)
# MODAL_TOKEN_ID=your-modal-token-id
# MODAL_TOKEN_SECRET=your-modal-token-secret

# ============== VECTOR DATABASE (ChromaDB) ==============

# ChromaDB storage path
CHROMA_DB_PATH=./chroma_db

# Persist ChromaDB to disk (default: true)
# CHROMA_DB_PERSIST=true

# Remote ChromaDB server (optional)
# CHROMA_DB_HOST=localhost
# CHROMA_DB_PORT=8000

# ============== RAG SERVICE CONFIGURATION ==============

# ChromaDB collection name for RAG
# RAG_COLLECTION_NAME=deepcritical_evidence

# Number of top results to retrieve from RAG
# RAG_SIMILARITY_TOP_K=5

# Automatically ingest evidence into RAG
# RAG_AUTO_INGEST=true