Spaces:
Sleeping
Sleeping
File size: 9,453 Bytes
7dfe46c 3911eb8 7dfe46c 3911eb8 672d4d3 3911eb8 672d4d3 3911eb8 7dfe46c 28b9ad5 672d4d3 7dfe46c 672d4d3 7dfe46c 498d859 672d4d3 7dfe46c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
import os
import yaml
from pathlib import Path
from typing import Dict, Any
import logging
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from .utilites import load_environment_variables, validate_api_keys
# Import logger here to avoid circular imports
try:
from logger.custom_logger import CustomLoggerTracker
custom_log = CustomLoggerTracker()
logger = custom_log.get_logger("config")
except ImportError:
# Fallback to standard logging if custom logger not available
logger = logging.getLogger("config")
class Config:
def __init__(self, config_path: str = "config.yaml"):
logger.info("Start Loading data from configs")
load_environment_variables()
self.config_path = Path(config_path)
self.config = self._load_config()
self._validate_config()
# Validate API keys
api_validation = validate_api_keys()
if not api_validation['valid']:
logger.warning(f"Some API keys missing: {api_validation['missing_required']}")
# Don't raise error for missing optional keys, just warn
def _load_config(self) -> Dict[str, Any]:
try:
if not self.config_path.exists():
logger.error(f"Configuration file not found: {self.config_path}")
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
with open(self.config_path, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
logger.info(f"Configuration loaded successfully from {self.config_path}")
return config
except yaml.YAMLError as e:
logger.error(f"Error parsing YAML configuration: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error loading configuration: {e}")
raise
def _validate_config(self) -> None:
"""Validate configuration based on the actual YAML structure."""
# Check if we have either the old structure (gemini_model) or new structure (models + rag_system)
has_gemini = 'gemini_model' in self.config
has_models_section = 'models' in self.config
has_rag_section = 'rag_system' in self.config
if not has_gemini and not has_models_section:
logger.error("Missing required configuration: either 'gemini_model' or 'models' section must be configured")
raise ValueError("Missing required configuration: either 'gemini_model' or 'models' section must be configured")
# Validate models section if present
if has_models_section:
models_config = self.config['models']
required_models = ['embedding_model', 'llm_model']
for key in required_models:
if key not in models_config:
logger.error(f"Missing required model configuration: models.{key}")
raise ValueError(f"Missing required model configuration: models.{key}")
# Validate rag_system section if present (optional validation)
if has_rag_section:
rag_config = self.config['rag_system']
# These are optional but log if missing
optional_rag_keys = ['chunk_size', 'chunk_overlap', 'max_context_chunks']
for key in optional_rag_keys:
if key not in rag_config:
logger.debug(f"Optional RAG configuration key not found: rag_system.{key}")
# Validate vector store section if present
if 'vector_store' in self.config:
vector_config = self.config['vector_store']
if 'provider' in vector_config and vector_config['provider'] == 'qdrant':
# Check for qdrant specific config
if 'collection_name' not in vector_config:
logger.warning("Qdrant collection_name not specified, will use default")
logger.info("Configuration validation passed")
def get(self, key: str, default: Any = None) -> Any:
"""Get configuration value by key, supporting nested keys with dot notation."""
keys = key.split('.')
value = self.config
try:
for k in keys:
value = value[k]
logger.debug(f"Retrieved config value for '{key}': {value}")
return value
except (KeyError, TypeError):
logger.debug(f"Config key '{key}' not found, returning default: {default}")
return default
def get_env_var(self, key: str, required: bool = True) -> str:
value = os.getenv(key)
if required and not value:
logger.error(f"Required environment variable not found: {key}")
raise ValueError(f"Required environment variable not found: {key}")
if value:
logger.info(f"Environment variable '{key}' loaded successfully")
else:
logger.warning(f"Optional environment variable '{key}' not found")
return value
@property
def gemini_model(self) -> str:
"""Get Gemini model name (optional for RAG system)."""
return self.get('gemini_model', 'models/gemini-2.5-flash')
@property
def google_api_key(self) -> str:
"""Get Google API key from environment."""
try:
return self.get_env_var('GOOGLE_API_KEY')
except ValueError:
logger.warning("Google API key not found, this is optional for RAG-only usage")
return ""
# RAG System Properties
@property
def rag_config(self) -> Dict[str, Any]:
"""Get RAG system configuration, combining rag_system and models sections."""
rag_config = self.get('rag_system', {}).copy()
# Add models to rag config if they exist
models_config = self.get('models', {})
if models_config:
rag_config.update(models_config)
# Add performance settings
performance_config = self.get('performance', {})
if performance_config:
rag_config.update(performance_config)
return rag_config
@property
def groq_api_key(self) -> str:
GROQ_API_KEY=os.getenv('GROQ_API_KEY', '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')
if GROQ_API_KEY:
return GROQ_API_KEY
return self.get('groq_api_key', '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')
@property
def groq_url(self) -> str:
"""Get Groq URL from environment or config."""
GROQ_URL = os.getenv('GROQ_URL', 'https://api.groq.com/openai/v1')
if GROQ_URL:
return GROQ_URL
return self.get('groq_url', 'https://api.groq.com/openai/v1')
@property
def siliconflow_api_key(self) -> str:
"""Get Silicon Flow API key from environment."""
SILICONFLOW_API_KEY= os.getenv('SILICONFLOW_API_KEY', 'sk-mamyyymhoyklygepxyaazxpxiaphjjbbynxgdrzebbmusmwl')
if SILICONFLOW_API_KEY:
return SILICONFLOW_API_KEY
return self.get('siliconflow_api_key', 'sk-mamyyymhoyklygepxyaazxpxiaphjjbbynxgdrzebbmusmwl')
@property
def qdrant_url(self) -> str:
QDRANT_URL = os.getenv('QDRANT_URL', "https://50f53cc8-bbb0-4939-8254-8f025a577222.us-west-2-0.aws.cloud.qdrant.io")
if QDRANT_URL:
return QDRANT_URL
return self.get('qdrant_url', "https://50f53cc8-bbb0-4939-8254-8f025a577222.us-west-2-0.aws.cloud.qdrant.io")
@property
def qdrant_api_key(self) -> str:
"""Get Qdrant API key from environment."""
QDRANT_API_KEY=os.getenv('QDRANT_API_KEY', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.L6Xwubquqwa9CXj3kVn9jiv64Cbe85vRdLv_LltuzZg')
if QDRANT_API_KEY:
return QDRANT_API_KEY
return self.get('qdrant_api_key', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.L6Xwubquqwa9CXj3kVn9jiv64Cbe85vRdLv_LltuzZg')
@property
def document_processing_config(self) -> Dict[str, Any]:
"""Get document processing configuration."""
return self.get('document_processing', {})
@property
def storage_config(self) -> Dict[str, Any]:
"""Get storage configuration."""
# Combine multiple storage-related sections
storage_config = {}
# Vector store config
vector_store = self.get('vector_store', {})
if vector_store:
storage_config.update(vector_store)
# Cache config
cache_config = self.get('cache', {})
if cache_config:
storage_config.update(cache_config)
# Add any storage-specific settings
if 'storage' in self.config:
storage_config.update(self.config['storage'])
return storage_config
# Test the configuration loading
if __name__ == "__main__":
try:
config = Config()
print("✅ Configuration loaded successfully!")
print(f"RAG Config keys: {list(config.rag_config.keys())}")
print(f"Has Groq API key: {'Yes' if config.groq_api_key else 'No'}")
print(f"Has SiliconFlow API key: {'Yes' if config.siliconflow_api_key else 'No'}")
print(f"Qdrant URL: {config.qdrant_url}")
except Exception as e:
print(f"❌ Configuration failed: {e}") |