AshmithaIRRI commited on
Commit
5214b25
·
verified ·
1 Parent(s): 122d978

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -116
app.py CHANGED
@@ -1,17 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- Created on Fri Jan 31 13:24:37 2025
4
 
5
  @author: Ashmitha
6
  """
7
-
8
- import tensorflow as tf
9
- from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization
10
- from tensorflow.keras.optimizers import Adam
11
- from tensorflow.keras.models import Model
12
- from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
13
- from sklearn.preprocessing import MinMaxScaler
14
- import pandas as pd
15
  import pandas as pd
16
  import numpy as np
17
  import gradio as gr
@@ -36,123 +29,195 @@ from sklearn.feature_selection import SelectFromModel
36
  import tempfile
37
  import matplotlib.pyplot as plt
38
  import seaborn as sns
 
 
 
39
 
40
- import os
41
- import tempfile
42
-
43
- # Set a new temp directory inside /home/user
44
- os.environ["GRADIO_CACHE"] = "/home/user/tmp"
45
- tempfile.tempdir = "/home/user/tmp"
46
-
47
- # Ensure the directory exists
48
- os.makedirs(tempfile.tempdir, exist_ok=True)
49
-
50
- # Positional Encoding Function
51
- def positional_encoding(seq_len, d_model):
52
- pos = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
53
- div_term = tf.exp(tf.range(0, d_model, 2, dtype=tf.float32) * (-tf.math.log(10000.0) / d_model))
54
- pos_encoding = tf.concat([tf.sin(pos * div_term), tf.cos(pos * div_term)], axis=-1)
55
- return pos_encoding[tf.newaxis, ...]
56
-
57
- # Multi-Head Self-Attention Layer
58
- class MultiHeadSelfAttention(tf.keras.layers.Layer):
59
- def __init__(self, embed_dim, num_heads):
60
- super().__init__()
61
- self.num_heads = num_heads
62
- self.embed_dim = embed_dim
63
- assert embed_dim % num_heads == 0, "Embedding dimension must be divisible by number of heads"
64
-
65
- self.depth = embed_dim // num_heads
66
- self.wq = Dense(embed_dim)
67
- self.wk = Dense(embed_dim)
68
- self.wv = Dense(embed_dim)
69
- self.dense = Dense(embed_dim)
70
-
71
- def split_heads(self, x, batch_size):
72
- x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
73
- return tf.transpose(x, perm=[0, 2, 1, 3]) # (batch_size, num_heads, seq_length, depth)
74
-
75
- def call(self, inputs):
76
- batch_size = tf.shape(inputs)[0]
77
- q = self.split_heads(self.wq(inputs), batch_size)
78
- k = self.split_heads(self.wk(inputs), batch_size)
79
- v = self.split_heads(self.wv(inputs), batch_size)
80
-
81
- attention_scores = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(float(self.depth))
82
- attention_weights = tf.nn.softmax(attention_scores, axis=-1)
83
- attention_output = tf.matmul(attention_weights, v)
84
-
85
- attention_output = tf.transpose(attention_output, perm=[0, 2, 1, 3])
86
- concat_attention = tf.reshape(attention_output, (batch_size, -1, self.embed_dim))
87
- output = self.dense(concat_attention)
88
- return output
89
-
90
- # Transformer Block
91
- class TransformerBlock(tf.keras.layers.Layer):
92
- def __init__(self, embed_dim, num_heads, ff_dim, dropout_rate=0.1):
93
- super().__init__()
94
- self.att = MultiHeadSelfAttention(embed_dim, num_heads)
95
- self.norm1 = LayerNormalization(epsilon=1e-6)
96
- self.norm2 = LayerNormalization(epsilon=1e-6)
97
- self.ffn = tf.keras.Sequential([
98
- Dense(ff_dim, activation="relu"),
99
- Dense(embed_dim),
100
- ])
101
- self.dropout1 = Dropout(dropout_rate)
102
- self.dropout2 = Dropout(dropout_rate)
103
-
104
- def call(self, inputs, training):
105
- attn_output = self.att(inputs)
106
- attn_output = self.dropout1(attn_output, training=training)
107
- out1 = self.norm1(inputs + attn_output)
108
-
109
- ffn_output = self.ffn(out1)
110
- ffn_output = self.dropout2(ffn_output, training=training)
111
- return self.norm2(out1 + ffn_output)
112
-
113
- # Transformer Model
114
- def TransformerModel(trainX, trainy, testX, testy, embed_dim=128, num_heads=8, ff_dim=256,
115
- epochs=1, batch_size=64, learning_rate=0.0001, dropout_rate=0.3):
116
-
117
- # Feature Scaling
118
  scaler = MinMaxScaler()
119
  trainX_scaled = scaler.fit_transform(trainX)
120
  testX_scaled = scaler.transform(testX) if testX is not None else None
121
 
122
- # Ensure correct input shape
123
- seq_len = trainX.shape[1]
124
-
125
- # Define Model
126
- inputs = Input(shape=(seq_len, 1)) # Input reshaped to (batch, seq_len, 1)
127
- x = Dense(embed_dim)(inputs) # Feature projection
128
- pos_encoding = positional_encoding(seq_len, embed_dim)
129
- x += tf.broadcast_to(pos_encoding, tf.shape(x)) # Ensure shape compatibility
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- # Transformer Blocks
132
- for _ in range(3):
133
- x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
 
134
 
135
- x = Dense(64, activation="relu")(x)
136
- x = Dropout(dropout_rate)(x)
137
- outputs = Dense(1, activation="linear")(tf.reduce_mean(x, axis=1)) # Reduce along sequence length
 
138
 
139
- model = Model(inputs, outputs)
140
- model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate), metrics=["mse"])
 
 
 
 
 
 
 
 
141
 
142
  # Callbacks
143
- lr_reduction = ReduceLROnPlateau(monitor="val_loss", patience=5, factor=0.5, min_lr=1e-6, verbose=1)
144
- early_stopping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=1)
 
 
145
 
146
- # Train Model
147
- history = model.fit(trainX_scaled[..., np.newaxis], trainy, validation_split=0.1,
148
- epochs=epochs, batch_size=batch_size, callbacks=[lr_reduction, early_stopping], verbose=1)
149
 
150
  # Predictions
151
- predicted_train = model.predict(trainX_scaled[..., np.newaxis]).flatten()
152
- predicted_test = model.predict(testX_scaled[..., np.newaxis]).flatten() if testX is not None else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  return predicted_train, predicted_test, history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
 
156
  def calculate_topsis_score(df):
157
  # Normalize the data
158
  norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
@@ -172,6 +237,7 @@ def calculate_topsis_score(df):
172
  df['TOPSIS_Score'] = topsis_score
173
 
174
  return df
 
175
  def NestedKFoldCrossValidation(training_data, training_additive, testing_data, testing_additive,
176
  training_dominance, testing_dominance, epochs, learning_rate, min_child_weight, batch_size=64,
177
  outer_n_splits=2, output_file='cross_validation_results.csv',
@@ -214,8 +280,10 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
214
  return mse, rmse, r2, corr
215
 
216
  models = [
217
-
218
- ('TransformerModel', TransformerModel)
 
 
219
  ]
220
 
221
  for outer_fold, (outer_train_index, outer_test_index) in enumerate(outer_kf.split(phenotypic_info), 1):
@@ -242,10 +310,10 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
242
 
243
  for model_name, model_func in models:
244
  print(f"Running model: {model_name} for fold {outer_fold}")
245
- if model_name in ['TransformerModel' ]:
246
  predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, epochs=epochs, batch_size=batch_size)
247
- #elif model_name in ['RFModel']:
248
- # predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
249
  else:
250
  predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, learning_rate, min_child_weight)
251
 
@@ -333,7 +401,7 @@ def run_cross_validation(training_file, training_additive_file, testing_file, te
333
  training_dominance_file, testing_dominance_file, feature_selection, learning_rate, min_child_weight):
334
 
335
  # Default parameters
336
- epochs = 1
337
  batch_size = 64
338
  outer_n_splits = 2
339
 
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ Created on Fri Jan 31 14:12:26 2025
4
 
5
  @author: Ashmitha
6
  """
7
+ #-------------------------------------Libraries-------------------------
 
 
 
 
 
 
 
8
  import pandas as pd
9
  import numpy as np
10
  import gradio as gr
 
29
  import tempfile
30
  import matplotlib.pyplot as plt
31
  import seaborn as sns
32
+ #--------------------------------------------------------------------FNNModel----------------------------------------------------
33
+ def FNNModel(trainX, trainy, testX=None, testy=None, epochs=1000, batch_size=64, learning_rate=0.0001,
34
+ l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2):
35
 
36
+ # Scale the input data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  scaler = MinMaxScaler()
38
  trainX_scaled = scaler.fit_transform(trainX)
39
  testX_scaled = scaler.transform(testX) if testX is not None else None
40
 
41
+ # Scale the target variable
42
+ target_scaler = MinMaxScaler()
43
+ trainy_scaled = target_scaler.fit_transform(trainy.reshape(-1, 1))
44
+
45
+ # Model definition
46
+ model = Sequential()
47
+
48
+ # Input Layer
49
+ model.add(Dense(512, input_shape=(trainX.shape[1],), kernel_initializer='he_normal',
50
+ kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
51
+ model.add(BatchNormalization())
52
+ model.add(Dropout(dropout_rate))
53
+ model.add(LeakyReLU(alpha=0.1))
54
+
55
+ # Hidden Layers
56
+ model.add(Dense(256, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
57
+ model.add(BatchNormalization())
58
+ model.add(Dropout(dropout_rate))
59
+ model.add(LeakyReLU(alpha=0.1))
60
 
61
+ model.add(Dense(128, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
62
+ model.add(BatchNormalization())
63
+ model.add(Dropout(dropout_rate))
64
+ model.add(LeakyReLU(alpha=0.1))
65
 
66
+ model.add(Dense(64, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
67
+ model.add(BatchNormalization())
68
+ model.add(Dropout(dropout_rate))
69
+ model.add(LeakyReLU(alpha=0.1))
70
 
71
+ model.add(Dense(32, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
72
+ model.add(BatchNormalization())
73
+ model.add(Dropout(dropout_rate))
74
+ model.add(LeakyReLU(alpha=0.1))
75
+
76
+ # Output Layer
77
+ model.add(Dense(1, activation="relu"))
78
+
79
+ # Compile Model
80
+ model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
81
 
82
  # Callbacks
83
+ callbacks = [
84
+ ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=1, factor=0.5, min_lr=1e-6),
85
+ EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
86
+ ]
87
 
88
+ # Train model
89
+ history = model.fit(trainX_scaled, trainy_scaled, epochs=epochs, batch_size=batch_size, validation_split=0.1,
90
+ verbose=1, callbacks=callbacks)
91
 
92
  # Predictions
93
+ predicted_train = model.predict(trainX_scaled).flatten()
94
+ predicted_test = model.predict(testX_scaled).flatten() if testX is not None else None
95
+
96
+ # Inverse transform predictions
97
+ predicted_train = target_scaler.inverse_transform(predicted_train.reshape(-1, 1)).flatten()
98
+ if predicted_test is not None:
99
+ predicted_test = target_scaler.inverse_transform(predicted_test.reshape(-1, 1)).flatten()
100
+
101
+ return predicted_train, predicted_test, history
102
+
103
+
104
+
105
+
106
+ #--------------------------------------------------CNNModel-------------------------------------------
107
+ def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
108
+
109
+
110
+
111
+ # Scaling the inputs
112
+ scaler = MinMaxScaler()
113
+ trainX_scaled = scaler.fit_transform(trainX)
114
+ if testX is not None:
115
+ testX_scaled = scaler.transform(testX)
116
+
117
+ # Reshape for CNN input (samples, features, channels)
118
+ trainX = trainX_scaled.reshape((trainX.shape[0], trainX.shape[1], 1))
119
+ if testX is not None:
120
+ testX = testX_scaled.reshape((testX.shape[0], testX.shape[1], 1))
121
+
122
+ model = Sequential()
123
+
124
+ # Convolutional layers
125
+ model.add(Conv1D(512, kernel_size=3, activation='relu', input_shape=(trainX.shape[1], 1), kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
126
+ model.add(MaxPooling1D(pool_size=2))
127
+ model.add(Dropout(dropout_rate))
128
+
129
+ model.add(Conv1D(256, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
130
+ model.add(MaxPooling1D(pool_size=2))
131
+ model.add(Dropout(dropout_rate))
132
 
133
+ model.add(Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
134
+ model.add(MaxPooling1D(pool_size=2))
135
+ model.add(Dropout(dropout_rate))
136
+
137
+ # Flatten and Dense layers
138
+ model.add(Flatten())
139
+ model.add(Dense(64, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
140
+ model.add(LeakyReLU(alpha=0.1))
141
+ model.add(Dropout(dropout_rate))
142
+
143
+ model.add(Dense(1, activation='linear'))
144
+
145
+ # Compile the model
146
+ model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
147
+
148
+ # Callbacks
149
+ learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.5, min_lr=1e-6)
150
+ early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
151
+
152
+ # Train the model
153
+ history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
154
+ callbacks=[learning_rate_reduction, early_stopping])
155
+
156
+ predicted_train = model.predict(trainX).flatten()
157
+ predicted_test = model.predict(testX).flatten() if testX is not None else None
158
+
159
  return predicted_train, predicted_test, history
160
+ #------------------------------------------RFModel---------------------------------------------------
161
+ def RFModel(trainX, trainy, testX, testy, n_estimators=100, max_depth=None,feature_selection=True):
162
+
163
+
164
+ # Log transformation of the target variable
165
+
166
+ # Scaling the feature data
167
+ scaler = MinMaxScaler()
168
+ trainX_scaled = scaler.fit_transform(trainX)
169
+ if testX is not None:
170
+ testX_scaled = scaler.transform(testX)
171
+
172
+ # Define and train the RandomForest model
173
+ rf_model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
174
+ history=rf_model.fit(trainX_scaled, trainy)
175
+
176
+
177
+ # Predictions
178
+ predicted_train = rf_model.predict(trainX_scaled)
179
+ predicted_test = rf_model.predict(testX_scaled) if testX is not None else None
180
+
181
+ return predicted_train, predicted_test,history
182
+ #-------------------------------------------------XGBoost--------------------------------------------
183
+ def XGBoostModel(trainX, trainy, testX, testy,learning_rate,min_child_weight,feature_selection=True, n_estimators=100, max_depth=None):
184
+
185
+
186
+
187
+ # Scale the features
188
+ scaler = MinMaxScaler()
189
+ trainX_scaled = scaler.fit_transform(trainX)
190
+ if testX is not None:
191
+ testX_scaled = scaler.transform(testX)
192
+
193
+
194
+ xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
195
+ history=xgb_model.fit(trainX, trainy)
196
+ #param_grid={
197
+ #"learning_rate":0.01,
198
+ #"max_depth" : 10,
199
+ #"n_estimators": 100,
200
+ #"min_child_weight": 10
201
+ # }
202
+
203
+
204
+ # Predictions
205
+ predicted_train = xgb_model.predict(trainX_scaled)
206
+ predicted_test = xgb_model.predict(testX_scaled) if testX is not None else None
207
+
208
+
209
+ return predicted_train, predicted_test,history
210
+ #------------------------------------------------------------------File--------------------------------------------
211
+ def read_csv_file(uploaded_file):
212
+ if uploaded_file is not None:
213
+ if hasattr(uploaded_file, 'data'): # For NamedBytes
214
+ return pd.read_csv(io.BytesIO(uploaded_file.data))
215
+ elif hasattr(uploaded_file, 'name'): # For NamedString
216
+ return pd.read_csv(uploaded_file.name)
217
+ return None
218
+
219
 
220
+ #_-------------------------------------------------------------NestedKFold Cross Validation---------------------
221
  def calculate_topsis_score(df):
222
  # Normalize the data
223
  norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
 
237
  df['TOPSIS_Score'] = topsis_score
238
 
239
  return df
240
+ #_-------------------------------------------------------------NestedKFold Cross Validation---------------------
241
  def NestedKFoldCrossValidation(training_data, training_additive, testing_data, testing_additive,
242
  training_dominance, testing_dominance, epochs, learning_rate, min_child_weight, batch_size=64,
243
  outer_n_splits=2, output_file='cross_validation_results.csv',
 
280
  return mse, rmse, r2, corr
281
 
282
  models = [
283
+ ('FNNModel', FNNModel),
284
+ ('CNNModel', CNNModel),
285
+ ('RFModel', RFModel),
286
+ ('XGBoostModel', XGBoostModel)
287
  ]
288
 
289
  for outer_fold, (outer_train_index, outer_test_index) in enumerate(outer_kf.split(phenotypic_info), 1):
 
310
 
311
  for model_name, model_func in models:
312
  print(f"Running model: {model_name} for fold {outer_fold}")
313
+ if model_name in ['FNNModel', 'CNNModel']:
314
  predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, epochs=epochs, batch_size=batch_size)
315
+ elif model_name in ['RFModel']:
316
+ predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
317
  else:
318
  predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, learning_rate, min_child_weight)
319
 
 
401
  training_dominance_file, testing_dominance_file, feature_selection, learning_rate, min_child_weight):
402
 
403
  # Default parameters
404
+ epochs = 1000
405
  batch_size = 64
406
  outer_n_splits = 2
407