Spaces:
Sleeping
Sleeping
english cancel
Browse files
app.py
CHANGED
|
@@ -22,24 +22,6 @@ def load_engine():
|
|
| 22 |
eng = InferenceEngine(ckpt_path=MODEL_CKPT, tokenizer_name=TOKENIZER_NAME)
|
| 23 |
return eng
|
| 24 |
|
| 25 |
-
TOPIC_TRANSLATION = {
|
| 26 |
-
"Gaya": "Forces",
|
| 27 |
-
"Produksi pangan": "Food production",
|
| 28 |
-
"Listrik": "Electricity",
|
| 29 |
-
"Zat adiktif aditif": "Additives & addictive substances",
|
| 30 |
-
"Getaran": "Vibration",
|
| 31 |
-
"Objek ipa": "Science object",
|
| 32 |
-
"Gelombang": "Waves",
|
| 33 |
-
"Klasifikasi makhluk hidup": "Classification of living things",
|
| 34 |
-
"Materi perubahannya": "Matter & its changes",
|
| 35 |
-
"Suhu pemuaian": "Temperature & expansion",
|
| 36 |
-
"Bioteknologi": "Biotechnology",
|
| 37 |
-
"Materi dampaknya": "Matter & its impact",
|
| 38 |
-
"Sistem pernapasan manusia": "Human respiratory system",
|
| 39 |
-
"Gerak benda": "Motion of objects",
|
| 40 |
-
"Usaha pesawat sederhana": "Work & simple machines"
|
| 41 |
-
}
|
| 42 |
-
|
| 43 |
|
| 44 |
st.title("IndoBERT — Topic & Taxonomy")
|
| 45 |
st.caption("Shared encoder → 2 output heads. Fast inference (CPU/GPU).")
|
|
@@ -56,18 +38,12 @@ with c1:
|
|
| 56 |
if st.button("Predict question"):
|
| 57 |
start = time.time()
|
| 58 |
res = eng.predict_texts([text])[0]
|
| 59 |
-
# translate topic label to English
|
| 60 |
-
if res['topic_label'] in TOPIC_TRANSLATION:
|
| 61 |
-
res['topic_label_en'] = TOPIC_TRANSLATION[res['topic_label']]
|
| 62 |
-
else:
|
| 63 |
-
res['topic_label_en'] = res['topic_label'] # fallback
|
| 64 |
runtime = time.time() - start
|
| 65 |
# show result
|
| 66 |
st.subheader("Result")
|
| 67 |
if res['topic_conf'] < 0.5:
|
| 68 |
st.warning("⚠️ Model ini dirancang untuk soal IPA (Biologi, Fisika, Kimia).\n\nJika soal di luar kategori tersebut, hasil dapat tidak akurat.")
|
| 69 |
-
|
| 70 |
-
st.metric("Topic", f"{res['topic_label_en']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
|
| 71 |
st.metric("Taxonomy", f"{res['tax_label']} ({res['tax_idx']})", delta=f"{res['tax_conf']:.3f}")
|
| 72 |
# optional: probability bar
|
| 73 |
st.write("Topic confidence:", f"{res['topic_conf']:.3f}")
|
|
@@ -98,21 +74,12 @@ with c2:
|
|
| 98 |
results = eng.predict_texts(texts)
|
| 99 |
elapsed = time.time() - t0
|
| 100 |
# join results into dataframe
|
| 101 |
-
out["topic_label_en"] = out["topic_label"].apply(
|
| 102 |
-
lambda x: TOPIC_TRANSLATION.get(x, x)
|
| 103 |
-
)
|
| 104 |
out = pd.DataFrame(results)
|
| 105 |
-
# out = out.rename(columns={
|
| 106 |
-
# "topic_label":"pred_topic",
|
| 107 |
-
# "topic_conf":"pred_topic_conf",
|
| 108 |
-
# "tax_label":"pred_tax",
|
| 109 |
-
# "tax_conf":"pred_tax_conf"
|
| 110 |
-
# })
|
| 111 |
out = out.rename(columns={
|
| 112 |
-
"
|
| 113 |
-
"topic_conf":
|
| 114 |
-
"tax_label":
|
| 115 |
-
"tax_conf":
|
| 116 |
})
|
| 117 |
# attach to original
|
| 118 |
df_out = pd.concat([df.reset_index(drop=True), out[["pred_topic","pred_topic_conf","pred_tax","pred_tax_conf"]]], axis=1)
|
|
|
|
| 22 |
eng = InferenceEngine(ckpt_path=MODEL_CKPT, tokenizer_name=TOKENIZER_NAME)
|
| 23 |
return eng
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
st.title("IndoBERT — Topic & Taxonomy")
|
| 27 |
st.caption("Shared encoder → 2 output heads. Fast inference (CPU/GPU).")
|
|
|
|
| 38 |
if st.button("Predict question"):
|
| 39 |
start = time.time()
|
| 40 |
res = eng.predict_texts([text])[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
runtime = time.time() - start
|
| 42 |
# show result
|
| 43 |
st.subheader("Result")
|
| 44 |
if res['topic_conf'] < 0.5:
|
| 45 |
st.warning("⚠️ Model ini dirancang untuk soal IPA (Biologi, Fisika, Kimia).\n\nJika soal di luar kategori tersebut, hasil dapat tidak akurat.")
|
| 46 |
+
st.metric("Topic", f"{res['topic_label']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
|
|
|
|
| 47 |
st.metric("Taxonomy", f"{res['tax_label']} ({res['tax_idx']})", delta=f"{res['tax_conf']:.3f}")
|
| 48 |
# optional: probability bar
|
| 49 |
st.write("Topic confidence:", f"{res['topic_conf']:.3f}")
|
|
|
|
| 74 |
results = eng.predict_texts(texts)
|
| 75 |
elapsed = time.time() - t0
|
| 76 |
# join results into dataframe
|
|
|
|
|
|
|
|
|
|
| 77 |
out = pd.DataFrame(results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
out = out.rename(columns={
|
| 79 |
+
"topic_label":"pred_topic",
|
| 80 |
+
"topic_conf":"pred_topic_conf",
|
| 81 |
+
"tax_label":"pred_tax",
|
| 82 |
+
"tax_conf":"pred_tax_conf"
|
| 83 |
})
|
| 84 |
# attach to original
|
| 85 |
df_out = pd.concat([df.reset_index(drop=True), out[["pred_topic","pred_topic_conf","pred_tax","pred_tax_conf"]]], axis=1)
|