dragonllm-finance-models / testing /suites /json_structured_test.py
jeanbaptdzd's picture
feat: Clean deployment to HuggingFace Space with model config test endpoint
8c0b652
#!/usr/bin/env python3
"""
Structured JSON output test suite.
Tests the model's ability to produce valid, structured JSON responses.
"""
import json
from typing import List, Dict, Any, Optional
from testing.core.base_tester import BaseTester, TestCase
class JSONStructuredTester(BaseTester):
"""Test structured JSON output capabilities."""
def load_test_cases(self) -> List[TestCase]:
"""Load JSON structured output test cases."""
return [
TestCase(
name="simple_json",
prompt="Return a JSON object with the following structure: {\"company_name\": \"string\", \"industry\": \"string\", \"risk_level\": \"string\"} for a typical insurance company.",
expected_keys=["response"],
max_tokens=100
),
TestCase(
name="complex_json",
prompt="Create a JSON object representing a Solvency II risk assessment with the following structure: {\"company\": {\"name\": \"string\", \"type\": \"string\"}, \"risks\": [{\"type\": \"string\", \"level\": \"string\", \"mitigation\": \"string\"}], \"capital_requirement\": {\"amount\": \"number\", \"currency\": \"string\"}}",
expected_keys=["response"],
max_tokens=200
),
TestCase(
name="array_json",
prompt="Return a JSON array of 3 insurance products with each object containing: {\"name\": \"string\", \"category\": \"string\", \"premium_range\": \"string\"}",
expected_keys=["response"],
max_tokens=150
),
TestCase(
name="nested_json",
prompt="Create a JSON object for an insurance company's regulatory compliance status: {\"company\": {\"name\": \"string\", \"license_number\": \"string\"}, \"compliance\": {\"solvency_ii\": {\"status\": \"string\", \"last_review\": \"string\"}, \"basel_iii\": {\"status\": \"string\", \"last_review\": \"string\"}}, \"next_review_date\": \"string\"}",
expected_keys=["response"],
max_tokens=180
),
TestCase(
name="error_json",
prompt="Return a JSON error response with the structure: {\"error\": {\"code\": \"string\", \"message\": \"string\", \"details\": \"string\"}} for an invalid insurance policy request.",
expected_keys=["response"],
max_tokens=120
)
]
def validate_response(self, response: Dict[str, Any], test_case: TestCase) -> bool:
"""Validate structured JSON response."""
try:
# Check if response exists
if "response" not in response:
return False
response_text = response["response"]
# Basic validation
if not response_text or len(response_text.strip()) < 5:
return False
# Try to parse as JSON
try:
# Clean the response text (remove markdown code blocks if present)
clean_text = response_text.strip()
if clean_text.startswith("```json"):
clean_text = clean_text[7:]
if clean_text.endswith("```"):
clean_text = clean_text[:-3]
clean_text = clean_text.strip()
parsed_json = json.loads(clean_text)
# Validate structure based on test case
if test_case.name == "simple_json":
return self._validate_simple_json(parsed_json)
elif test_case.name == "complex_json":
return self._validate_complex_json(parsed_json)
elif test_case.name == "array_json":
return self._validate_array_json(parsed_json)
elif test_case.name == "nested_json":
return self._validate_nested_json(parsed_json)
elif test_case.name == "error_json":
return self._validate_error_json(parsed_json)
return True
except json.JSONDecodeError:
return False
except Exception as e:
print(f"Validation error: {e}")
return False
def _validate_simple_json(self, data: Any) -> bool:
"""Validate simple JSON structure."""
if not isinstance(data, dict):
return False
required_keys = ["company_name", "industry", "risk_level"]
return all(key in data for key in required_keys)
def _validate_complex_json(self, data: Any) -> bool:
"""Validate complex JSON structure."""
if not isinstance(data, dict):
return False
# Check top-level structure
if "company" not in data or "risks" not in data or "capital_requirement" not in data:
return False
# Check company structure
company = data["company"]
if not isinstance(company, dict) or not all(key in company for key in ["name", "type"]):
return False
# Check risks array
risks = data["risks"]
if not isinstance(risks, list) or len(risks) == 0:
return False
for risk in risks:
if not isinstance(risk, dict) or not all(key in risk for key in ["type", "level", "mitigation"]):
return False
# Check capital requirement
capital = data["capital_requirement"]
if not isinstance(capital, dict) or not all(key in capital for key in ["amount", "currency"]):
return False
return True
def _validate_array_json(self, data: Any) -> bool:
"""Validate JSON array structure."""
if not isinstance(data, list) or len(data) != 3:
return False
for item in data:
if not isinstance(item, dict) or not all(key in item for key in ["name", "category", "premium_range"]):
return False
return True
def _validate_nested_json(self, data: Any) -> bool:
"""Validate nested JSON structure."""
if not isinstance(data, dict):
return False
# Check top-level keys
required_keys = ["company", "compliance", "next_review_date"]
if not all(key in data for key in required_keys):
return False
# Check company structure
company = data["company"]
if not isinstance(company, dict) or not all(key in company for key in ["name", "license_number"]):
return False
# Check compliance structure
compliance = data["compliance"]
if not isinstance(compliance, dict) or not all(key in compliance for key in ["solvency_ii", "basel_iii"]):
return False
# Check solvency_ii and basel_iii structures
for regulation in ["solvency_ii", "basel_iii"]:
reg_data = compliance[regulation]
if not isinstance(reg_data, dict) or not all(key in reg_data for key in ["status", "last_review"]):
return False
return True
def _validate_error_json(self, data: Any) -> bool:
"""Validate error JSON structure."""
if not isinstance(data, dict):
return False
if "error" not in data:
return False
error = data["error"]
if not isinstance(error, dict):
return False
required_keys = ["code", "message", "details"]
return all(key in error for key in required_keys)