FlexBoard / recommender.py
Grigori Fursin
first commit
4d2d78e unverified
"""Configuration recommendation module for MLPerf benchmarks."""
import logging
import pandas as pd
from utils import get_feature_type
logger = logging.getLogger(__name__)
class ConfigurationFinder:
"""Finds optimal hardware configurations based on user requirements."""
def __init__(self, dataset: pd.DataFrame):
"""Initialize with benchmark dataset."""
self.df = dataset
self.perf_metric = "metrics.result_per_accelerator"
self.cost_metric = "cost_per_million_tokens"
self.total_perf_metric = "metrics.result"
def is_within_tolerance(
self, value1: float, value2: float, tolerance: float = 0.1
) -> bool:
"""Check if two values are within a specified percentage tolerance."""
if value1 is None or value2 is None:
return False
try:
if value1 == 0 or value2 == 0:
return value1 == value2
percentage_diff = abs(value1 - value2) / max(abs(value1), abs(value2))
return percentage_diff <= tolerance
except:
return False
def find_configurations(
self, constraints: dict, tolerance: float = 0.1
) -> pd.DataFrame:
"""Find configurations matching the given constraints."""
if self.df.empty:
return pd.DataFrame()
filtered_df = self.df.copy()
for feature, value in constraints.items():
if feature not in filtered_df.columns or value is None or value == "Any":
continue
if get_feature_type(feature) == "continuous":
try:
target_value = float(value)
lower_bound = target_value * (1 - tolerance)
upper_bound = target_value * (1 + tolerance)
filtered_df = filtered_df[
(filtered_df[feature] >= lower_bound)
& (filtered_df[feature] <= upper_bound)
]
except:
filtered_df = filtered_df[filtered_df[feature] == value]
else:
filtered_df = filtered_df[filtered_df[feature] == value]
if "min_accelerators" in constraints and constraints["min_accelerators"]:
min_acc = constraints["min_accelerators"]
filtered_df = filtered_df[
filtered_df["system.accelerator.total_count"] >= min_acc
]
if "max_accelerators" in constraints and constraints["max_accelerators"]:
max_acc = constraints["max_accelerators"]
filtered_df = filtered_df[
filtered_df["system.accelerator.total_count"] <= max_acc
]
return filtered_df
def rank_configurations(
self,
df: pd.DataFrame,
metric: str = "metrics.result_per_accelerator",
ascending: bool = False,
) -> pd.DataFrame:
"""Rank configurations by the specified metric."""
if df.empty or metric not in df.columns:
return df
return df.sort_values(by=metric, ascending=ascending)
def recommend(self, constraints: dict, top_n: int = 10) -> pd.DataFrame:
"""Find and rank configurations based on constraints."""
filtered_configs = self.find_configurations(constraints)
ranked_configs = self.rank_configurations(
filtered_configs, metric=self.perf_metric, ascending=False
)
if len(ranked_configs) > top_n:
return ranked_configs.head(top_n)
return ranked_configs