| import json | |
| import logging | |
| import polars as pl | |
| logger = logging.getLogger(__name__) | |
| FEATURES = { | |
| "Performance": { | |
| "metrics.result": "continuous", | |
| "metrics.result_per_accelerator": "continuous", | |
| "metrics.accuracy": "continuous", | |
| }, | |
| "Model": { | |
| "model.name": "categorical", | |
| "model.mlperf_name": "categorical", | |
| "model.architecture": "categorical", | |
| "model.number_of_parameters": "continuous", | |
| "model.weight_data_types": "categorical", | |
| }, | |
| "Accelerator": { | |
| "system.accelerator.vendor": "categorical", | |
| "system.accelerator.name": "categorical", | |
| "system.accelerator.count_per_node": "continuous", | |
| "system.accelerator.total_count": "continuous", | |
| "system.accelerator.memory_capacity": "continuous", | |
| "system.accelerator.memory_config": "text", | |
| "system.interconnect.accelerator": "categorical", | |
| }, | |
| "CPU": { | |
| "system.cpu.vendor": "categorical", | |
| "system.cpu.model": "categorical", | |
| "system.cpu.core_count": "continuous", | |
| "system.cpu.count_per_node": "continuous", | |
| "system.cpu.frequency": "continuous", | |
| "system.cpu.caches": "text", | |
| "system.cpu.vcpu_count": "continuous", | |
| }, | |
| "System": { | |
| "system.name": "text", | |
| "system.type": "categorical", | |
| "system.cooling": "categorical", | |
| "system.number_of_nodes": "continuous", | |
| "system.memory.capacity": "continuous", | |
| "system.memory.configuration": "text", | |
| "system.interconnect.accelerator_host": "categorical", | |
| }, | |
| "Software": { | |
| "software.framework": "categorical", | |
| "software.version": "categorical", | |
| "software.operating_system": "categorical", | |
| }, | |
| "Submission": { | |
| "submission.organization": "categorical", | |
| "submission.division": "categorical", | |
| "submission.scenario": "categorical", | |
| "submission.availability": "boolean", | |
| }, | |
| } | |
| def get_features_by_type(feature_type: str) -> list[str]: | |
| """Get all features of a specific type.""" | |
| result = [] | |
| for group in FEATURES.values(): | |
| for feature, typ in group.items(): | |
| if typ == feature_type: | |
| result.append(feature) | |
| return result | |
| FEATURE_TYPES = { | |
| "continuous": get_features_by_type("continuous"), | |
| "categorical": get_features_by_type("categorical"), | |
| "boolean": get_features_by_type("boolean"), | |
| "text": get_features_by_type("text"), | |
| } | |
| UI_FEATURE_GROUPS = { | |
| group: list(features.keys()) for group, features in FEATURES.items() | |
| } | |
| def get_feature_type(feature_name: str) -> str: | |
| """Get the type of a feature from the FEATURES dictionary.""" | |
| for group in FEATURES.values(): | |
| if feature_name in group: | |
| return group[feature_name] | |
| return "categorical" | |
| def load_data(file_path: str = "data.json") -> pl.DataFrame: | |
| """Load processed benchmark data.""" | |
| logger.info(f"Loading processed data from {file_path}") | |
| try: | |
| with open(file_path, "r") as f: | |
| data = json.load(f) | |
| for item in data: | |
| for key, value in item.items(): | |
| if isinstance(value, str): | |
| if value.isdigit(): | |
| item[key] = int(value) | |
| elif value.replace(".", "", 1).isdigit(): | |
| item[key] = float(value) | |
| df = pl.DataFrame(data, infer_schema_length=None) | |
| logger.info(f"Loaded {len(df)} benchmark results") | |
| return df | |
| except Exception as e: | |
| logger.error(f"Error loading data: {e}") | |
| return pl.DataFrame() | |