orva06 commited on
Commit
291773b
·
verified ·
1 Parent(s): 198772a

english cancel

Browse files
Files changed (1) hide show
  1. app.py +5 -38
app.py CHANGED
@@ -22,24 +22,6 @@ def load_engine():
22
  eng = InferenceEngine(ckpt_path=MODEL_CKPT, tokenizer_name=TOKENIZER_NAME)
23
  return eng
24
 
25
- TOPIC_TRANSLATION = {
26
- "Gaya": "Forces",
27
- "Produksi pangan": "Food production",
28
- "Listrik": "Electricity",
29
- "Zat adiktif aditif": "Additives & addictive substances",
30
- "Getaran": "Vibration",
31
- "Objek ipa": "Science object",
32
- "Gelombang": "Waves",
33
- "Klasifikasi makhluk hidup": "Classification of living things",
34
- "Materi perubahannya": "Matter & its changes",
35
- "Suhu pemuaian": "Temperature & expansion",
36
- "Bioteknologi": "Biotechnology",
37
- "Materi dampaknya": "Matter & its impact",
38
- "Sistem pernapasan manusia": "Human respiratory system",
39
- "Gerak benda": "Motion of objects",
40
- "Usaha pesawat sederhana": "Work & simple machines"
41
- }
42
-
43
 
44
  st.title("IndoBERT — Topic & Taxonomy")
45
  st.caption("Shared encoder → 2 output heads. Fast inference (CPU/GPU).")
@@ -56,18 +38,12 @@ with c1:
56
  if st.button("Predict question"):
57
  start = time.time()
58
  res = eng.predict_texts([text])[0]
59
- # translate topic label to English
60
- if res['topic_label'] in TOPIC_TRANSLATION:
61
- res['topic_label_en'] = TOPIC_TRANSLATION[res['topic_label']]
62
- else:
63
- res['topic_label_en'] = res['topic_label'] # fallback
64
  runtime = time.time() - start
65
  # show result
66
  st.subheader("Result")
67
  if res['topic_conf'] < 0.5:
68
  st.warning("⚠️ Model ini dirancang untuk soal IPA (Biologi, Fisika, Kimia).\n\nJika soal di luar kategori tersebut, hasil dapat tidak akurat.")
69
- # st.metric("Topic", f"{res['topic_label']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
70
- st.metric("Topic", f"{res['topic_label_en']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
71
  st.metric("Taxonomy", f"{res['tax_label']} ({res['tax_idx']})", delta=f"{res['tax_conf']:.3f}")
72
  # optional: probability bar
73
  st.write("Topic confidence:", f"{res['topic_conf']:.3f}")
@@ -98,21 +74,12 @@ with c2:
98
  results = eng.predict_texts(texts)
99
  elapsed = time.time() - t0
100
  # join results into dataframe
101
- out["topic_label_en"] = out["topic_label"].apply(
102
- lambda x: TOPIC_TRANSLATION.get(x, x)
103
- )
104
  out = pd.DataFrame(results)
105
- # out = out.rename(columns={
106
- # "topic_label":"pred_topic",
107
- # "topic_conf":"pred_topic_conf",
108
- # "tax_label":"pred_tax",
109
- # "tax_conf":"pred_tax_conf"
110
- # })
111
  out = out.rename(columns={
112
- "topic_label_en": "pred_topic",
113
- "topic_conf": "pred_topic_conf",
114
- "tax_label": "pred_tax",
115
- "tax_conf": "pred_tax_conf"
116
  })
117
  # attach to original
118
  df_out = pd.concat([df.reset_index(drop=True), out[["pred_topic","pred_topic_conf","pred_tax","pred_tax_conf"]]], axis=1)
 
22
  eng = InferenceEngine(ckpt_path=MODEL_CKPT, tokenizer_name=TOKENIZER_NAME)
23
  return eng
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  st.title("IndoBERT — Topic & Taxonomy")
27
  st.caption("Shared encoder → 2 output heads. Fast inference (CPU/GPU).")
 
38
  if st.button("Predict question"):
39
  start = time.time()
40
  res = eng.predict_texts([text])[0]
 
 
 
 
 
41
  runtime = time.time() - start
42
  # show result
43
  st.subheader("Result")
44
  if res['topic_conf'] < 0.5:
45
  st.warning("⚠️ Model ini dirancang untuk soal IPA (Biologi, Fisika, Kimia).\n\nJika soal di luar kategori tersebut, hasil dapat tidak akurat.")
46
+ st.metric("Topic", f"{res['topic_label']} ({res['topic_idx']})", delta=f"{res['topic_conf']:.3f}")
 
47
  st.metric("Taxonomy", f"{res['tax_label']} ({res['tax_idx']})", delta=f"{res['tax_conf']:.3f}")
48
  # optional: probability bar
49
  st.write("Topic confidence:", f"{res['topic_conf']:.3f}")
 
74
  results = eng.predict_texts(texts)
75
  elapsed = time.time() - t0
76
  # join results into dataframe
 
 
 
77
  out = pd.DataFrame(results)
 
 
 
 
 
 
78
  out = out.rename(columns={
79
+ "topic_label":"pred_topic",
80
+ "topic_conf":"pred_topic_conf",
81
+ "tax_label":"pred_tax",
82
+ "tax_conf":"pred_tax_conf"
83
  })
84
  # attach to original
85
  df_out = pd.concat([df.reset_index(drop=True), out[["pred_topic","pred_topic_conf","pred_tax","pred_tax_conf"]]], axis=1)