Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import pingouin as pg | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| # Set up output directories | |
| HEATMAPS_FOLDER = "icc_heatmaps/" | |
| os.makedirs(HEATMAPS_FOLDER, exist_ok=True) | |
| def preprocess_data(df, selected_assessors, selected_respondents, selected_criteria): | |
| """ | |
| Filters the dataset based on user-selected assessors, respondents, and criteria. | |
| Ensures data is properly formatted for ICC computation. | |
| """ | |
| df = df[df["assessor"].isin(selected_assessors) & df["respondent"].isin(selected_respondents)] | |
| df = df[["assessor", "respondent"] + selected_criteria] | |
| # Convert all columns to numeric (handling comma decimals) | |
| for col in selected_criteria: | |
| df[col] = df[col].str.replace(",", ".").astype(float) | |
| # Ensure 'assessor' and 'respondent' are treated as categorical | |
| df["assessor"] = df["assessor"].astype(str) | |
| df["respondent"] = df["respondent"].astype(str) | |
| return df | |
| def compute_icc(df): | |
| """ | |
| Computes the overall ICC (Intraclass Correlation Coefficient). | |
| """ | |
| melted_df = df.melt(id_vars=["assessor", "respondent"], var_name="Criterion", value_name="Score") | |
| if melted_df["respondent"].nunique() >= 5: | |
| icc_results = pg.intraclass_corr(data=melted_df, targets="respondent", raters="assessor", ratings="Score").round(3) | |
| return icc_results | |
| else: | |
| return None | |
| def compute_assessor_icc(df): | |
| """ | |
| Computes ICC matrices between assessors and generates heatmaps. | |
| """ | |
| melted_df = df.melt(id_vars=["assessor", "respondent"], var_name="Criterion", value_name="Score") | |
| assessors = df["assessor"].unique() | |
| icc_matrix_types = {icc_type: pd.DataFrame(index=assessors, columns=assessors, dtype=float) for icc_type in ["ICC1", "ICC2", "ICC3"]} | |
| for assessor1 in assessors: | |
| for assessor2 in assessors: | |
| if assessor1 != assessor2: | |
| subset = melted_df[melted_df["assessor"].isin([assessor1, assessor2])] | |
| if subset["respondent"].nunique() >= 5: | |
| icc_results = pg.intraclass_corr( | |
| data=subset, targets="respondent", raters="assessor", ratings="Score" | |
| ).round(3) | |
| for icc_type in ["ICC1", "ICC2", "ICC3"]: | |
| icc_matrix_types[icc_type].loc[assessor1, assessor2] = icc_results.set_index("Type").loc[icc_type]["ICC"] | |
| return icc_matrix_types | |
| def generate_heatmaps(icc_matrix_types): | |
| """ | |
| Generates and saves heatmaps for ICC matrices. | |
| """ | |
| heatmap_files = {} | |
| for icc_type, icc_matrix in icc_matrix_types.items(): | |
| plt.figure(figsize=(8, 6)) | |
| sns.heatmap(icc_matrix.astype(float), annot=True, cmap="coolwarm", linewidths=0.5, fmt=".2f") | |
| plt.title(f"Assessor ICC Matrix ({icc_type})") | |
| plt.xlabel("Assessor (LLM)") | |
| plt.ylabel("Assessor (LLM)") | |
| plt.xticks(rotation=45) | |
| plt.yticks(rotation=0) | |
| heatmap_file = os.path.join(HEATMAPS_FOLDER, f"icc_matrix_{icc_type}.png") | |
| plt.savefig(heatmap_file) | |
| plt.close() | |
| heatmap_files[icc_type] = heatmap_file | |
| return heatmap_files | |