Spaces:

A7m0d
/

rag_korean_manufacturing_docs

Sleeping

App Files Files Community

rag_korean_manufacturing_docs / src /evaluation_bench /main.py

A7m0d

Upload folder using huggingface_hub

7dfe46c verified 3 months ago

raw

history blame contribute delete

4.1 kB

	"""Main entry point for the Korean Q&A evaluation system."""

	import argparse
	import sys
	from pathlib import Path
	import logging

	# Add src to path
	sys.path.append(str(Path(__file__).parent / "src"))

	from src.logger import setup_logging
	from src.config import Config
	from src.dataset_loader import DatasetLoader
	from src.evaluator import KoreanQAEvaluator

	def main():
	"""Main function to run the evaluation."""
	parser = argparse.ArgumentParser(description="Korean Q&A Evaluation System")
	parser.add_argument("--config", default=None, help="src/config.yaml")
	parser.add_argument("--dataset", default="/Users/ahmedmostafa/Downloads/eval_Korean_qa/assets/bench_korean.csv", help="Path to dataset CSV file")
	parser.add_argument("--output", help="Output path for results (optional)")
	parser.add_argument("--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
	help="Logging level")
	parser.add_argument("--threshold", type=float, default=0.8, help="Evaluation threshold")
	parser.add_argument("--verbose", action="store_true", help="Enable verbose evaluation mode")

	args = parser.parse_args()

	# Setup logging
	logger_setup = setup_logging(log_level=args.log_level)
	logger = logger_setup.get_logger(__name__)

	try:
	logger.info("Starting Korean Q&A Evaluation System")

	# Load configuration
	logger.info("Loading configuration...")
	if args.config is None:
	# Try to find config file in multiple locations
	script_dir = Path(__file__).parent
	possible_configs = [script_dir / "src" / "config.yaml",
	script_dir / "config.yaml"]


	config_path = None
	for path in possible_configs:
	if path.exists():
	config_path = str(path)
	break

	if config_path is None:
	raise FileNotFoundError("No config.yaml found in expected locations")
	else:
	config_path = args.config
	config = Config(config_path)

	# Log evaluation start
	logger_setup.log_evaluation_start(args.dataset, config.gemini_model)

	# Load dataset
	logger.info("Loading dataset...")
	dataset_loader = DatasetLoader()
	dataset = dataset_loader.load_from_csv(args.dataset)

	# Get dataset statistics
	stats = dataset_loader.get_dataset_stats()
	logger.info(f"Dataset loaded: {stats}")

	# Initialize evaluator
	logger.info("Initializing evaluator...")
	evaluator = KoreanQAEvaluator(
	model_name=config.gemini_model,
	api_key=config.google_api_key,
	threshold=args.threshold,
	verbose_mode=args.verbose)


	# Run evaluation
	logger.info("Running evaluation...")
	results = evaluator.evaluate_dataset(dataset)

	# Save results
	if args.output:
	output_path = evaluator.save_results(results, args.output)
	else:
	output_path = evaluator.save_results(results)


	# Log evaluation end
	logger_setup.log_evaluation_end(results)
	logger.info(f"Evaluation completed successfully. Results saved to: {output_path}")
	# Print summary to console
	print("\n" + "="*60)
	print("EVALUATION SUMMARY")
	print("="*60)
	print(f"Total test cases: {results['total_cases']}")
	print(f"Passed cases: {results['passed_cases']}")
	print(f"Failed cases: {results['failed_cases']}")
	print(f"Pass rate: {results['pass_rate']}%")
	print(f"Average score: {results['average_score']}")
	print(f"Threshold: {results['threshold']}")
	print(f"Model: {results['model_name']}")
	print(f"Results saved to: {output_path}")
	print("="*60)

	except Exception as e:
	logger.error(f"Evaluation failed: {e}")
	sys.exit(1)

	if __name__ == "__main__":
	main()