KPLabs
/

WeaklyLabeledHYPERVIEW

Model card Files Files and versions

KPLabs commited on Sep 15, 2025

Commit

72f5e8c

·

verified ·

1 Parent(s): 08ee3a8

Create pca.py

Files changed (1) hide show

pca.py +33 -0

pca.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+import numpy as np
+from sklearn.decomposition import PCA
+import joblib  # ✅ import directly
+from sklearn.preprocessing import StandardScaler
+# Directory containing your .npz files
+data_dir = "./train_data"  # change this to your directory path
+from tqdm.notebook import tqdm
+# Collect all arrays from .npz files
+data_list = []
+for file in tqdm(os.listdir(data_dir)):
+    if file.endswith(".npz"):
+        hsi_path = os.path.join(data_dir, file)
+        with np.load(hsi_path) as npz:
+            arr = np.ma.MaskedArray(**npz)
+            data_list.append(arr.reshape(150, -1).transpose())  # remove masked values
+# Stack all into a single dataset
+x = np.vstack(data_list)
+print("\n\n")
+print(x.shape)
+# Fit PCA
+# Apply standard scaling
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(x)
+# Fit PCA
+pca = PCA(n_components=16)  # change number of components as needed
+pca.fit(X_scaled)
+# Save both scaler and PCA model
+joblib.dump({"scaler": scaler, "pca": pca}, "pca_pipeline.pkl")