Spaces:

orva06
/

IndoBERT-MultiPredict

Sleeping

App Files Files Community

orva06 commited on 6 days ago

Commit

291773b

verified ·

1 Parent(s): 198772a

english cancel

Browse files

Files changed (1) hide show

app.py +5 -38

app.py CHANGED Viewed

@@ -22,24 +22,6 @@ def load_engine():
     eng = InferenceEngine(ckpt_path=MODEL_CKPT, tokenizer_name=TOKENIZER_NAME)
     return eng
-TOPIC_TRANSLATION = {
-    "Gaya": "Forces",
-    "Produksi pangan": "Food production",
-    "Listrik": "Electricity",
-    "Zat adiktif aditif": "Additives & addictive substances",
-    "Getaran": "Vibration",
-    "Objek ipa": "Science object",
-    "Gelombang": "Waves",
-    "Klasifikasi makhluk hidup": "Classification of living things",
-    "Materi perubahannya": "Matter & its changes",
-    "Suhu pemuaian": "Temperature & expansion",
-    "Bioteknologi": "Biotechnology",
-    "Materi dampaknya": "Matter & its impact",
-    "Sistem pernapasan manusia": "Human respiratory system",
-    "Gerak benda": "Motion of objects",
-    "Usaha pesawat sederhana": "Work & simple machines"
-}
 st.title("IndoBERT — Topic & Taxonomy")
 st.caption("Shared encoder → 2 output heads. Fast inference (CPU/GPU).")
@@ -56,18 +38,12 @@ with c1:
     if st.button("Predict question"):
         start = time.time()
         res = eng.predict_texts([text])[0]
-        # translate topic label to English
-        if res['topic_label'] in TOPIC_TRANSLATION:
-            res['topic_label_en'] = TOPIC_TRANSLATION[res['topic_label']]
-        else:
-            res['topic_label_en'] = res['topic_label']  # fallback
         runtime = time.time() - start
         # show result
         st.subheader("Result")
         if res['topic_conf'] < 0.5:
             st.warning("⚠️ Model ini dirancang untuk soal IPA (Biologi, Fisika, Kimia).\n\nJika soal di luar kategori tersebut, hasil dapat tidak akurat.")
-        # st.metric("Topic", f"{res['topic_label']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
-        st.metric("Topic", f"{res['topic_label_en']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
         st.metric("Taxonomy", f"{res['tax_label']} ({res['tax_idx']})", delta=f"{res['tax_conf']:.3f}")
         # optional: probability bar
         st.write("Topic confidence:", f"{res['topic_conf']:.3f}")
@@ -98,21 +74,12 @@ with c2:
                 results = eng.predict_texts(texts)
                 elapsed = time.time() - t0
                 # join results into dataframe
-                out["topic_label_en"] = out["topic_label"].apply(
-                    lambda x: TOPIC_TRANSLATION.get(x, x)
-                )
                 out = pd.DataFrame(results)
-                # out = out.rename(columns={
-                #     "topic_label":"pred_topic",
-                #     "topic_conf":"pred_topic_conf",
-                #     "tax_label":"pred_tax",
-                #     "tax_conf":"pred_tax_conf"
-                # })
                 out = out.rename(columns={
-                    "topic_label_en": "pred_topic",
-                    "topic_conf": "pred_topic_conf",
-                    "tax_label": "pred_tax",
-                    "tax_conf": "pred_tax_conf"
                 })
                 # attach to original
                 df_out = pd.concat([df.reset_index(drop=True), out[["pred_topic","pred_topic_conf","pred_tax","pred_tax_conf"]]], axis=1)

     eng = InferenceEngine(ckpt_path=MODEL_CKPT, tokenizer_name=TOKENIZER_NAME)
     return eng
 st.title("IndoBERT — Topic & Taxonomy")
 st.caption("Shared encoder → 2 output heads. Fast inference (CPU/GPU).")
     if st.button("Predict question"):
         start = time.time()
         res = eng.predict_texts([text])[0]
         runtime = time.time() - start
         # show result
         st.subheader("Result")
         if res['topic_conf'] < 0.5:
             st.warning("⚠️ Model ini dirancang untuk soal IPA (Biologi, Fisika, Kimia).\n\nJika soal di luar kategori tersebut, hasil dapat tidak akurat.")
+        st.metric("Topic", f"{res['topic_label']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
         st.metric("Taxonomy", f"{res['tax_label']} ({res['tax_idx']})", delta=f"{res['tax_conf']:.3f}")
         # optional: probability bar
         st.write("Topic confidence:", f"{res['topic_conf']:.3f}")
                 results = eng.predict_texts(texts)
                 elapsed = time.time() - t0
                 # join results into dataframe
                 out = pd.DataFrame(results)
                 out = out.rename(columns={
+                    "topic_label":"pred_topic",
+                    "topic_conf":"pred_topic_conf",
+                    "tax_label":"pred_tax",
+                    "tax_conf":"pred_tax_conf"
                 })
                 # attach to original
                 df_out = pd.concat([df.reset_index(drop=True), out[["pred_topic","pred_topic_conf","pred_tax","pred_tax_conf"]]], axis=1)