Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Structured JSON output test suite. | |
| Tests the model's ability to produce valid, structured JSON responses. | |
| """ | |
| import json | |
| from typing import List, Dict, Any, Optional | |
| from testing.core.base_tester import BaseTester, TestCase | |
| class JSONStructuredTester(BaseTester): | |
| """Test structured JSON output capabilities.""" | |
| def load_test_cases(self) -> List[TestCase]: | |
| """Load JSON structured output test cases.""" | |
| return [ | |
| TestCase( | |
| name="simple_json", | |
| prompt="Return a JSON object with the following structure: {\"company_name\": \"string\", \"industry\": \"string\", \"risk_level\": \"string\"} for a typical insurance company.", | |
| expected_keys=["response"], | |
| max_tokens=100 | |
| ), | |
| TestCase( | |
| name="complex_json", | |
| prompt="Create a JSON object representing a Solvency II risk assessment with the following structure: {\"company\": {\"name\": \"string\", \"type\": \"string\"}, \"risks\": [{\"type\": \"string\", \"level\": \"string\", \"mitigation\": \"string\"}], \"capital_requirement\": {\"amount\": \"number\", \"currency\": \"string\"}}", | |
| expected_keys=["response"], | |
| max_tokens=200 | |
| ), | |
| TestCase( | |
| name="array_json", | |
| prompt="Return a JSON array of 3 insurance products with each object containing: {\"name\": \"string\", \"category\": \"string\", \"premium_range\": \"string\"}", | |
| expected_keys=["response"], | |
| max_tokens=150 | |
| ), | |
| TestCase( | |
| name="nested_json", | |
| prompt="Create a JSON object for an insurance company's regulatory compliance status: {\"company\": {\"name\": \"string\", \"license_number\": \"string\"}, \"compliance\": {\"solvency_ii\": {\"status\": \"string\", \"last_review\": \"string\"}, \"basel_iii\": {\"status\": \"string\", \"last_review\": \"string\"}}, \"next_review_date\": \"string\"}", | |
| expected_keys=["response"], | |
| max_tokens=180 | |
| ), | |
| TestCase( | |
| name="error_json", | |
| prompt="Return a JSON error response with the structure: {\"error\": {\"code\": \"string\", \"message\": \"string\", \"details\": \"string\"}} for an invalid insurance policy request.", | |
| expected_keys=["response"], | |
| max_tokens=120 | |
| ) | |
| ] | |
| def validate_response(self, response: Dict[str, Any], test_case: TestCase) -> bool: | |
| """Validate structured JSON response.""" | |
| try: | |
| # Check if response exists | |
| if "response" not in response: | |
| return False | |
| response_text = response["response"] | |
| # Basic validation | |
| if not response_text or len(response_text.strip()) < 5: | |
| return False | |
| # Try to parse as JSON | |
| try: | |
| # Clean the response text (remove markdown code blocks if present) | |
| clean_text = response_text.strip() | |
| if clean_text.startswith("```json"): | |
| clean_text = clean_text[7:] | |
| if clean_text.endswith("```"): | |
| clean_text = clean_text[:-3] | |
| clean_text = clean_text.strip() | |
| parsed_json = json.loads(clean_text) | |
| # Validate structure based on test case | |
| if test_case.name == "simple_json": | |
| return self._validate_simple_json(parsed_json) | |
| elif test_case.name == "complex_json": | |
| return self._validate_complex_json(parsed_json) | |
| elif test_case.name == "array_json": | |
| return self._validate_array_json(parsed_json) | |
| elif test_case.name == "nested_json": | |
| return self._validate_nested_json(parsed_json) | |
| elif test_case.name == "error_json": | |
| return self._validate_error_json(parsed_json) | |
| return True | |
| except json.JSONDecodeError: | |
| return False | |
| except Exception as e: | |
| print(f"Validation error: {e}") | |
| return False | |
| def _validate_simple_json(self, data: Any) -> bool: | |
| """Validate simple JSON structure.""" | |
| if not isinstance(data, dict): | |
| return False | |
| required_keys = ["company_name", "industry", "risk_level"] | |
| return all(key in data for key in required_keys) | |
| def _validate_complex_json(self, data: Any) -> bool: | |
| """Validate complex JSON structure.""" | |
| if not isinstance(data, dict): | |
| return False | |
| # Check top-level structure | |
| if "company" not in data or "risks" not in data or "capital_requirement" not in data: | |
| return False | |
| # Check company structure | |
| company = data["company"] | |
| if not isinstance(company, dict) or not all(key in company for key in ["name", "type"]): | |
| return False | |
| # Check risks array | |
| risks = data["risks"] | |
| if not isinstance(risks, list) or len(risks) == 0: | |
| return False | |
| for risk in risks: | |
| if not isinstance(risk, dict) or not all(key in risk for key in ["type", "level", "mitigation"]): | |
| return False | |
| # Check capital requirement | |
| capital = data["capital_requirement"] | |
| if not isinstance(capital, dict) or not all(key in capital for key in ["amount", "currency"]): | |
| return False | |
| return True | |
| def _validate_array_json(self, data: Any) -> bool: | |
| """Validate JSON array structure.""" | |
| if not isinstance(data, list) or len(data) != 3: | |
| return False | |
| for item in data: | |
| if not isinstance(item, dict) or not all(key in item for key in ["name", "category", "premium_range"]): | |
| return False | |
| return True | |
| def _validate_nested_json(self, data: Any) -> bool: | |
| """Validate nested JSON structure.""" | |
| if not isinstance(data, dict): | |
| return False | |
| # Check top-level keys | |
| required_keys = ["company", "compliance", "next_review_date"] | |
| if not all(key in data for key in required_keys): | |
| return False | |
| # Check company structure | |
| company = data["company"] | |
| if not isinstance(company, dict) or not all(key in company for key in ["name", "license_number"]): | |
| return False | |
| # Check compliance structure | |
| compliance = data["compliance"] | |
| if not isinstance(compliance, dict) or not all(key in compliance for key in ["solvency_ii", "basel_iii"]): | |
| return False | |
| # Check solvency_ii and basel_iii structures | |
| for regulation in ["solvency_ii", "basel_iii"]: | |
| reg_data = compliance[regulation] | |
| if not isinstance(reg_data, dict) or not all(key in reg_data for key in ["status", "last_review"]): | |
| return False | |
| return True | |
| def _validate_error_json(self, data: Any) -> bool: | |
| """Validate error JSON structure.""" | |
| if not isinstance(data, dict): | |
| return False | |
| if "error" not in data: | |
| return False | |
| error = data["error"] | |
| if not isinstance(error, dict): | |
| return False | |
| required_keys = ["code", "message", "details"] | |
| return all(key in error for key in required_keys) | |