Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ def tokenize(input_text):
|
|
| 13 |
phi3_tokens = len(phi3_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 14 |
t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 15 |
gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
|
|
|
| 16 |
command_r_tokens = len(command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 17 |
qwen_tokens = len(qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 18 |
codeqwen_tokens = len(codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
|
@@ -33,6 +34,7 @@ def tokenize(input_text):
|
|
| 33 |
"Phi-3": phi3_tokens,
|
| 34 |
"T5": t5_tokens,
|
| 35 |
"Gemma": gemma_tokens,
|
|
|
|
| 36 |
"Command-R": command_r_tokens,
|
| 37 |
"Qwen/Qwen1.5": qwen_tokens,
|
| 38 |
"CodeQwen": codeqwen_tokens,
|
|
@@ -60,6 +62,7 @@ if __name__ == "__main__":
|
|
| 60 |
phi3_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
| 61 |
t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
|
| 62 |
gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
|
|
|
|
| 63 |
command_r_tokenizer = AutoTokenizer.from_pretrained("PJMixers/CohereForAI_c4ai-command-r-plus-tokenizer")
|
| 64 |
qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B")
|
| 65 |
codeqwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B")
|
|
@@ -69,5 +72,5 @@ if __name__ == "__main__":
|
|
| 69 |
internlm_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-20b", trust_remote_code=True)
|
| 70 |
internlm2_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-20b", trust_remote_code=True)
|
| 71 |
|
| 72 |
-
iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=
|
| 73 |
iface.launch()
|
|
|
|
| 13 |
phi3_tokens = len(phi3_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 14 |
t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 15 |
gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 16 |
+
gemma2_tokens = len(gemma2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 17 |
command_r_tokens = len(command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 18 |
qwen_tokens = len(qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
| 19 |
codeqwen_tokens = len(codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
|
|
|
|
| 34 |
"Phi-3": phi3_tokens,
|
| 35 |
"T5": t5_tokens,
|
| 36 |
"Gemma": gemma_tokens,
|
| 37 |
+
"Gemma-2": gemma2_tokens,
|
| 38 |
"Command-R": command_r_tokens,
|
| 39 |
"Qwen/Qwen1.5": qwen_tokens,
|
| 40 |
"CodeQwen": codeqwen_tokens,
|
|
|
|
| 62 |
phi3_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
| 63 |
t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
|
| 64 |
gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
|
| 65 |
+
gemma2_tokenizer = AutoTokenizer.from_pretrained("UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3")
|
| 66 |
command_r_tokenizer = AutoTokenizer.from_pretrained("PJMixers/CohereForAI_c4ai-command-r-plus-tokenizer")
|
| 67 |
qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B")
|
| 68 |
codeqwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B")
|
|
|
|
| 72 |
internlm_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-20b", trust_remote_code=True)
|
| 73 |
internlm2_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-20b", trust_remote_code=True)
|
| 74 |
|
| 75 |
+
iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=20), outputs="text")
|
| 76 |
iface.launch()
|