Spaces:

AshmithaIRRI
/

DeepMap_GUI

Runtime error

App Files Files Community

AshmithaIRRI commited on Jan 28, 2025

Commit

2faf8d0

verified ·

1 Parent(s): be50c9f

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -98

app.py CHANGED Viewed

@@ -1,3 +1,10 @@
 # -*- coding: utf-8 -*-
 """
 Created on Sun Nov 24 12:47:37 2024
@@ -55,22 +62,14 @@ def RandomForestFeatureSelection(trainX, trainy, num_features=60):
     indices = np.argsort(importances)[-num_features:]
     return indices
 #----------------------------------------------------------GRU Model---------------------------------------------------------------------
-import numpy as np
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import GRU, Dense, BatchNormalization, Dropout, LeakyReLU
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras import regularizers
-from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
-from sklearn.preprocessing import MinMaxScaler
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.feature_selection import SelectFromModel
 def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2, feature_selection=True):
     # Apply feature selection using Random Forest Regressor
     if feature_selection:
         # Use RandomForestRegressor to rank features by importance
-        rf = RandomForestRegressor(n_estimators=100, random_state=60)
         rf.fit(trainX, trainy)
         # Select features with importance greater than a threshold (e.g., mean importance)
@@ -184,10 +183,6 @@ def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
     model = Sequential()
     # Convolutional layers
-    model.add(Conv1D(512, kernel_size=3, activation='relu', input_shape=(trainX.shape[1], 1), kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
-    model.add(MaxPooling1D(pool_size=2))
-    model.add(Dropout(dropout_rate))
     model.add(Conv1D(256, kernel_size=3, activation='relu', input_shape=(trainX.shape[1], 1), kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(MaxPooling1D(pool_size=2))
     model.add(Dropout(dropout_rate))
@@ -195,14 +190,10 @@ def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
     model.add(Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(MaxPooling1D(pool_size=2))
     model.add(Dropout(dropout_rate))
-    model.add(Conv1D(64, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
-    model.add(MaxPooling1D(pool_size=2))
-    model.add(Dropout(dropout_rate))
     # Flatten and Dense layers
     model.add(Flatten())
-    model.add(Dense(32, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(LeakyReLU(alpha=0.1))
     model.add(Dropout(dropout_rate))
@@ -320,45 +311,57 @@ def read_csv_file(uploaded_file):
 #-----------------------------------------------------------------calculate topsis score--------------------------------------------------------
 def calculate_topsis_score(df):
-    # Normalize the metrics
-    metrics = df[['Train_MSE', 'Train_RMSE', 'Train_R2', 'Train_Corr']].dropna()  # Ensure no NaN values
-    norm_metrics = metrics / np.sqrt((metrics ** 2).sum(axis=0))
-    # Define ideal best and worst for each metric
-    ideal_best = pd.Series(index=norm_metrics.columns)
-    ideal_worst = pd.Series(index=norm_metrics.columns)
-    # For RMSE and MSE (minimization criteria): min is best, max is worst
-    for col in ['Train_MSE', 'Train_RMSE']:
-        ideal_best[col] = norm_metrics[col].min()
-        ideal_worst[col] = norm_metrics[col].max()
-    # For R2 and Corr (maximization criteria): max is best, min is worst
-    for col in ['Train_R2', 'Train_Corr']:
-        ideal_best[col] = norm_metrics[col].max()
-        ideal_worst[col] = norm_metrics[col].min()
-    # Calculate Euclidean distance to ideal best and worst
-    dist_to_best = np.sqrt(((norm_metrics - ideal_best) ** 2).sum(axis=1))
-    dist_to_worst = np.sqrt(((norm_metrics - ideal_worst) ** 2).sum(axis=1))
-    # Calculate TOPSIS score
-    topsis_score = dist_to_worst / (dist_to_best + dist_to_worst)
-    df['TOPSIS_Score'] = np.nan  # Initialize with NaN
-    df.loc[metrics.index, 'TOPSIS_Score'] = topsis_score  # Assign TOPSIS scores
     return df
 #--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
-def NestedKFoldCrossValidation(
-    training_data, training_additive, testing_data, testing_additive,
-    training_dominance, testing_dominance, epochs, learning_rate, min_child_weight,
-    batch_size=64, outer_n_splits=2, output_file='cross_validation_results.csv',
-    predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True
-):
     if 'phenotypes' not in training_data.columns:
         raise ValueError("Training data does not contain the 'phenotypes' column.")
@@ -381,7 +384,7 @@ def NestedKFoldCrossValidation(
     # Feature selection
     if feature_selection:
-        rf = RandomForestRegressor(n_estimators=1000, random_state=60)
         rf.fit(training_genotypic_data_merged, phenotypic_info)
         selector = SelectFromModel(rf, threshold="mean", prefit=True)
         training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
@@ -455,44 +458,33 @@ def NestedKFoldCrossValidation(
     # Compile results
     results_df = pd.DataFrame(results)
-    avg_results_df = results_df.groupby('Model').agg({
-        'Train_MSE': 'mean',
-        'Train_RMSE': 'mean',
-        'Train_R2': 'mean',
-        'Train_Corr': 'mean',
-        'Test_MSE': 'mean',
-        'Test_RMSE': 'mean',
-        'Test_R2': 'mean',
-        'Test_Corr': 'mean'
-    }).reset_index()
-    # Calculate the TOPSIS score for the average metrics (considering only MSE, RMSE, R², and Correlation)
-    def calculate_topsis_score(df):
-        # Normalize the data
-        norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
-        # Calculate the positive and negative ideal solutions
-        ideal_positive = norm_df.max(axis=0)
-        ideal_negative = norm_df.min(axis=0)
-        # Calculate the Euclidean distances
-        dist_positive = np.sqrt(((norm_df - ideal_positive) ** 2).sum(axis=1))
-        dist_negative = np.sqrt(((norm_df - ideal_negative) ** 2).sum(axis=1))
-        # Calculate the TOPSIS score
-        topsis_score = dist_negative / (dist_positive + dist_negative)
-        # Add the TOPSIS score to the dataframe
-        df['TOPSIS_Score'] = topsis_score
-        return df
     avg_results_df = calculate_topsis_score(avg_results_df)
     # Save the results with TOPSIS scores to the file
     avg_results_df.to_csv(output_file, index=False)
-    # Save predicted phenotypes
     if all_predicted_phenotypes:
         predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
         predicted_all_df.to_csv(predicted_phenotype_file, index=False)
@@ -500,15 +492,6 @@ def NestedKFoldCrossValidation(
     return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
-    # Save the results to the file
-    #results_df.to_csv(output_file, index=False)
-    # Save predicted phenotypes
-    #if all_predicted_phenotypes:
-       # predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
-        #predicted_all_df.to_csv(predicted_phenotype_file, index=False)
-   # return results_df, predicted_all_df if all_predicted_phenotypes else None
 #--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
@@ -519,7 +502,7 @@ def run_cross_validation(training_file, training_additive_file, testing_file, te
     epochs = 1000
     batch_size = 64
     outer_n_splits = 2
-    inner_n_splits = 2
     min_child_weight=5
     learning_rate=0.001
     #learning_rate=learning_rate
@@ -543,7 +526,7 @@ def run_cross_validation(training_file, training_additive_file, testing_file, te
         testing_dominance=testing_dominance,
         epochs=epochs,
         batch_size=batch_size,
-        #outer_n_splits=outer_n_splits,
         #inner_n_splits=inner_n_splits,
         learning_rate=learning_rate,
         min_child_weight=min_child_weight,
@@ -591,6 +574,3 @@ with gr.Blocks() as interface:
 # Launch the interface
 interface.launch()

+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 28 09:12:48 2025
+@author: Ashmitha
+"""
 # -*- coding: utf-8 -*-
 """
 Created on Sun Nov 24 12:47:37 2024
     indices = np.argsort(importances)[-num_features:]
     return indices
 #----------------------------------------------------------GRU Model---------------------------------------------------------------------
 def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2, feature_selection=True):
     # Apply feature selection using Random Forest Regressor
     if feature_selection:
         # Use RandomForestRegressor to rank features by importance
+        rf = RandomForestRegressor(n_estimators=100, random_state=42)
         rf.fit(trainX, trainy)
         # Select features with importance greater than a threshold (e.g., mean importance)
     model = Sequential()
     # Convolutional layers
     model.add(Conv1D(256, kernel_size=3, activation='relu', input_shape=(trainX.shape[1], 1), kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(MaxPooling1D(pool_size=2))
     model.add(Dropout(dropout_rate))
     model.add(Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(MaxPooling1D(pool_size=2))
     model.add(Dropout(dropout_rate))
     # Flatten and Dense layers
     model.add(Flatten())
+    model.add(Dense(64, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(LeakyReLU(alpha=0.1))
     model.add(Dropout(dropout_rate))
 #-----------------------------------------------------------------calculate topsis score--------------------------------------------------------
 def calculate_topsis_score(df):
+    # Normalize the data
+    norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
+    # Calculate the positive and negative ideal solutions
+    ideal_positive = norm_df.max(axis=0)
+    ideal_negative = norm_df.min(axis=0)
+    # Calculate the Euclidean distances
+    dist_positive = np.sqrt(((norm_df - ideal_positive) ** 2).sum(axis=1))
+    dist_negative = np.sqrt(((norm_df - ideal_negative) ** 2).sum(axis=1))
+    # Calculate the TOPSIS score
+    topsis_score = dist_negative / (dist_positive + dist_negative)
+    # Add the TOPSIS score to the dataframe
+    df['TOPSIS_Score'] = topsis_score
     return df
+    # Calculate the TOPSIS score for the average metrics
 #--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
+def NestedKFoldCrossValidation(training_data, training_additive, testing_data, testing_additive,
+                                training_dominance, testing_dominance, epochs, learning_rate, min_child_weight, batch_size=64,
+                                outer_n_splits=2, output_file='cross_validation_results.csv',
+                                predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True):
+    # Define calculate_topsis_score before using it
+    def calculate_topsis_score(df):
+        # Normalize the data
+        norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
+        # Calculate the positive and negative ideal solutions
+        ideal_positive = norm_df.max(axis=0)
+        ideal_negative = norm_df.min(axis=0)
+        # Calculate the Euclidean distances
+        dist_positive = np.sqrt(((norm_df - ideal_positive) ** 2).sum(axis=1))
+        dist_negative = np.sqrt(((norm_df - ideal_negative) ** 2).sum(axis=1))
+        # Calculate the TOPSIS score
+        topsis_score = dist_negative / (dist_positive + dist_negative)
+        # Add the TOPSIS score to the dataframe
+        df['TOPSIS_Score'] = topsis_score
+        return df
+    # Original function logic continues here
     if 'phenotypes' not in training_data.columns:
         raise ValueError("Training data does not contain the 'phenotypes' column.")
     # Feature selection
     if feature_selection:
+        rf = RandomForestRegressor(n_estimators=100, random_state=60)
         rf.fit(training_genotypic_data_merged, phenotypic_info)
         selector = SelectFromModel(rf, threshold="mean", prefit=True)
         training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
     # Compile results
     results_df = pd.DataFrame(results)
+    # Calculate the average metrics for each model
+    if 'phenotypes' in testing_data.columns:
+        avg_results_df = results_df.groupby('Model').agg({
+            'Train_MSE': 'mean',
+            'Train_RMSE': 'mean',
+            'Train_R2': 'mean',
+            'Train_Corr': 'mean',
+            'Test_MSE': 'mean',
+            'Test_RMSE': 'mean',
+            'Test_R2': 'mean',
+            'Test_Corr': 'mean'
+        }).reset_index()
+    else:
+        avg_results_df = results_df.groupby('Model').agg({
+            'Train_MSE': 'mean',
+            'Train_RMSE': 'mean',
+            'Train_R2': 'mean',
+            'Train_Corr': 'mean'
+        }).reset_index()
     avg_results_df = calculate_topsis_score(avg_results_df)
     # Save the results with TOPSIS scores to the file
     avg_results_df.to_csv(output_file, index=False)
+    # Save predicted phenotypes
     if all_predicted_phenotypes:
         predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
         predicted_all_df.to_csv(predicted_phenotype_file, index=False)
     return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
 #--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
     epochs = 1000
     batch_size = 64
     outer_n_splits = 2
+    #inner_n_splits = 2
     min_child_weight=5
     learning_rate=0.001
     #learning_rate=learning_rate
         testing_dominance=testing_dominance,
         epochs=epochs,
         batch_size=batch_size,
+        outer_n_splits=outer_n_splits,
         #inner_n_splits=inner_n_splits,
         learning_rate=learning_rate,
         min_child_weight=min_child_weight,
 # Launch the interface
 interface.launch()