KPLabs commited on
Commit
72f5e8c
·
verified ·
1 Parent(s): 08ee3a8

Create pca.py

Browse files
Files changed (1) hide show
  1. pca.py +33 -0
pca.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from sklearn.decomposition import PCA
4
+ import joblib # ✅ import directly
5
+ from sklearn.preprocessing import StandardScaler
6
+
7
+ # Directory containing your .npz files
8
+ data_dir = "./train_data" # change this to your directory path
9
+ from tqdm.notebook import tqdm
10
+ # Collect all arrays from .npz files
11
+ data_list = []
12
+ for file in tqdm(os.listdir(data_dir)):
13
+ if file.endswith(".npz"):
14
+ hsi_path = os.path.join(data_dir, file)
15
+ with np.load(hsi_path) as npz:
16
+ arr = np.ma.MaskedArray(**npz)
17
+ data_list.append(arr.reshape(150, -1).transpose()) # remove masked values
18
+
19
+ # Stack all into a single dataset
20
+ x = np.vstack(data_list)
21
+ print("\n\n")
22
+ print(x.shape)
23
+ # Fit PCA
24
+ # Apply standard scaling
25
+ scaler = StandardScaler()
26
+ X_scaled = scaler.fit_transform(x)
27
+
28
+ # Fit PCA
29
+ pca = PCA(n_components=16) # change number of components as needed
30
+ pca.fit(X_scaled)
31
+
32
+ # Save both scaler and PCA model
33
+ joblib.dump({"scaler": scaler, "pca": pca}, "pca_pipeline.pkl")