Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Test agent capabilities against local test cases | |
| """ | |
| import json | |
| from pathlib import Path | |
| # Read all test task files and analyze requirements | |
| test_base = Path(__file__).parent / "TEST CASE" / "public" | |
| def analyze_task_requirements(): | |
| """Analyze each task to identify required capabilities""" | |
| tasks = {} | |
| task_files = [ | |
| ("project2-csv", "CSV processing with date parsing, sorting, JSON output"), | |
| ("project2-audio-passphrase", "Audio transcription (opus format)"), | |
| ("project2-heatmap", "Image analysis - find most frequent RGB color"), | |
| ("project2-invoice", "PDF text extraction and calculation"), | |
| ("project2-chart", "Chart type selection (reasoning)"), | |
| ("project2-rag", "JSON processing with weighted scoring"), | |
| ("project2-diff", "Image comparison - pixel difference count"), | |
| ("project2-logs", "ZIP file extraction, JSON parsing, personalized calculation"), | |
| ("project2-git", "Git command generation"), | |
| ("project2-md", "Markdown file location/path finding"), | |
| ("project2-uv", "Command string generation"), | |
| ("project2-embed", "Embedding/vector operations"), | |
| ("project2-f1", "F1 score calculation from JSON"), | |
| ("project2-gh-tree", "GitHub tree API parsing"), | |
| ("project2-guard", "Security/validation logic"), | |
| ("project2-orders", "CSV data aggregation"), | |
| ("project2-rate", "Rate/percentage calculation"), | |
| ("project2-shards", "Data sharding logic"), | |
| ("project2-tools", "Tool selection/recommendation"), | |
| ("project2-cache", "Caching strategy selection"), | |
| ] | |
| for task_name, description in task_files: | |
| task_file = test_base / task_name | |
| if task_file.exists(): | |
| with open(task_file) as f: | |
| content = f.read() | |
| # Extract difficulty | |
| difficulty = 0 | |
| if "Difficulty:</strong> " in content: | |
| diff_line = content.split("Difficulty:</strong> ")[1].split("<")[0] | |
| difficulty = int(diff_line.split()[0]) | |
| # Extract personalized status | |
| personalized = "Personalized:</strong> Yes" in content | |
| tasks[task_name] = { | |
| "description": description, | |
| "difficulty": difficulty, | |
| "personalized": personalized, | |
| "content": content | |
| } | |
| return tasks | |
| def check_required_tools(): | |
| """Check which tools are available in the agent""" | |
| tools_status = { | |
| "get_rendered_html": "β Available - Web scraping with JS", | |
| "download_file": "β Available - Download any file type", | |
| "post_request": "β Available - HTTP POST with headers", | |
| "run_code": "β Available - Execute Python code", | |
| "add_dependencies": "β Available - Install packages on-demand", | |
| "transcribe_audio": "β Available - Audio to text", | |
| } | |
| return tools_status | |
| def check_python_libraries(): | |
| """Check which Python libraries might be needed""" | |
| required_libs = { | |
| "pandas": ("CSV/Excel processing", "β In pyproject.toml"), | |
| "PIL/Pillow": ("Image processing", "β οΈ NOT in pyproject.toml - Need to add"), | |
| "PyPDF2/pdfplumber": ("PDF extraction", "β οΈ NOT in pyproject.toml - Need to add"), | |
| "opencv-cv2": ("Image comparison", "β οΈ NOT in pyproject.toml - Optional"), | |
| "numpy": ("Numerical operations", "β οΈ NOT in pyproject.toml - Need to add"), | |
| "zipfile": ("ZIP extraction", "β Built-in"), | |
| "json": ("JSON processing", "β Built-in"), | |
| } | |
| return required_libs | |
| def identify_missing_capabilities(tasks): | |
| """Identify capabilities that might be missing""" | |
| missing = [] | |
| for task_name, task_info in tasks.items(): | |
| content = task_info["content"] | |
| # Check for PDF requirements | |
| if ".pdf" in content: | |
| missing.append({ | |
| "task": task_name, | |
| "capability": "PDF extraction", | |
| "library": "PyPDF2 or pdfplumber", | |
| "priority": "HIGH" | |
| }) | |
| # Check for image processing | |
| if ".png" in content and "color" in content: | |
| missing.append({ | |
| "task": task_name, | |
| "capability": "Image color analysis", | |
| "library": "Pillow (PIL)", | |
| "priority": "HIGH" | |
| }) | |
| # Check for image comparison | |
| if "differ" in content or "Compare" in content: | |
| missing.append({ | |
| "task": task_name, | |
| "capability": "Image comparison", | |
| "library": "Pillow + numpy", | |
| "priority": "MEDIUM" | |
| }) | |
| # Check for ZIP handling | |
| if ".zip" in content: | |
| missing.append({ | |
| "task": task_name, | |
| "capability": "ZIP extraction", | |
| "library": "zipfile (built-in)", | |
| "priority": "LOW" | |
| }) | |
| return missing | |
| def generate_test_report(): | |
| """Generate comprehensive test report""" | |
| print("=" * 80) | |
| print("QUIZ SOLVER AGENT - CAPABILITY ANALYSIS REPORT") | |
| print("=" * 80) | |
| # Analyze tasks | |
| print("\nπ TASK INVENTORY") | |
| print("-" * 80) | |
| tasks = analyze_task_requirements() | |
| difficulty_counts = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0} | |
| personalized_count = 0 | |
| for task_name, info in sorted(tasks.items()): | |
| difficulty_counts[info["difficulty"]] += 1 | |
| if info["personalized"]: | |
| personalized_count += 1 | |
| status = "π" if info["difficulty"] >= 3 else "π" | |
| personal = "π€" if info["personalized"] else "π" | |
| print(f"{status} {personal} [{info['difficulty']}] {task_name:<30} - {info['description']}") | |
| print(f"\nTotal tasks: {len(tasks)}") | |
| print(f"Difficulty distribution: {difficulty_counts}") | |
| print(f"Personalized tasks: {personalized_count}") | |
| # Check tools | |
| print("\nπ§ AVAILABLE TOOLS") | |
| print("-" * 80) | |
| tools = check_required_tools() | |
| for tool, status in tools.items(): | |
| print(f"{status}") | |
| # Check libraries | |
| print("\nπ PYTHON LIBRARIES") | |
| print("-" * 80) | |
| libs = check_python_libraries() | |
| for lib, (purpose, status) in libs.items(): | |
| print(f"{status:<40} {lib:<20} - {purpose}") | |
| # Missing capabilities | |
| print("\nβ οΈ MISSING CAPABILITIES") | |
| print("-" * 80) | |
| missing = identify_missing_capabilities(tasks) | |
| if missing: | |
| priority_order = {"HIGH": 1, "MEDIUM": 2, "LOW": 3} | |
| for item in sorted(missing, key=lambda x: priority_order[x["priority"]]): | |
| print(f"[{item['priority']}] {item['task']:<30} - {item['capability']:<25} β {item['library']}") | |
| else: | |
| print("β All capabilities available!") | |
| # Recommendations | |
| print("\nπ‘ RECOMMENDATIONS") | |
| print("-" * 80) | |
| print("1. Add to pyproject.toml:") | |
| print(" - Pillow>=10.0.0 (for image processing)") | |
| print(" - PyPDF2>=3.0.0 or pdfplumber>=0.10.0 (for PDF extraction)") | |
| print(" - numpy>=1.24.0 (for numerical operations)") | |
| print() | |
| print("2. Update system prompt to handle:") | |
| print(" - PDF text extraction workflows") | |
| print(" - Image processing (color analysis, pixel comparison)") | |
| print(" - ZIP file extraction and processing") | |
| print() | |
| print("3. Test Priority:") | |
| print(" - HIGH: project2-csv (already improved)") | |
| print(" - HIGH: project2-invoice (needs PDF support)") | |
| print(" - HIGH: project2-heatmap (needs image processing)") | |
| print(" - MEDIUM: project2-diff (needs image comparison)") | |
| print("\n" + "=" * 80) | |
| print("LOCAL TEST SERVER: http://localhost:8000/project2.html") | |
| print("=" * 80) | |
| if __name__ == "__main__": | |
| generate_test_report() | |