mohalisad commited on
Commit
e35f8fc
·
verified ·
1 Parent(s): 5f3c12c

Update leaderboard_data.jsonl

Browse files
Files changed (1) hide show
  1. leaderboard_data.jsonl +70 -30
leaderboard_data.jsonl CHANGED
@@ -1,38 +1,78 @@
1
- {"Model": "aya-23-35B", "#Params (B)": 34.98, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": 63.27, "GSM8K": 10.0, "DC-Homograph": 55.56, "MC-Homograph": 83.64, "PiQA": 89.49, "Proverb-Quiz": 67.03, "VerbEval": 47.32, "Winogrande": 65.81, "Arc-Challenge": 77.56, "Arc-Easy": 90.16, "Feqh": 30.29, "Hallucination (Truthfulness)": 11.72, "P-Hellaswag": 79.87, "Law": 32.0, "AUT Multiple Choice": 48.7, "Parsi Literature": 31.92, "BoolQA": 86.2, "Reading Comprehension": 62.82, "PartExpert": 37.44, "MMLU Pro": 24.1, "Iranian Social Norms": 65.0, "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
2
- {"Model": "aya-23-8B", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": 52.3, "GSM8K": 6.1, "DC-Homograph": 52.78, "MC-Homograph": 76.27, "PiQA": 80.78, "Proverb-Quiz": 44.32, "VerbEval": 39.3, "Winogrande": 57.13, "Arc-Challenge": 63.68, "Arc-Easy": 81.39, "Feqh": 29.14, "Hallucination (Truthfulness)": 0.6, "P-Hellaswag": 75.83, "Law": 28.33, "AUT Multiple Choice": 42.9, "Parsi Literature": 31.27, "BoolQA": 72.3, "Reading Comprehension": 60.31, "PartExpert": 33.33, "MMLU Pro": 19.9, "Iranian Social Norms": 70.73, "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
3
- {"Model": "aya-expanse-32b", "#Params (B)": 32.29, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": 73.72, "GSM8K": 17.5, "DC-Homograph": 62.96, "MC-Homograph": 87.56, "PiQA": 91.19, "Proverb-Quiz": 77.03, "VerbEval": 61.95, "Winogrande": 70.5, "Arc-Challenge": 85.15, "Arc-Easy": 93.37, "Feqh": 37.14, "Hallucination (Truthfulness)": 44.84, "P-Hellaswag": 81.7, "Law": 38.67, "AUT Multiple Choice": 54.7, "Parsi Literature": 34.75, "BoolQA": 89.7, "Reading Comprehension": 67.25, "PartExpert": 44.29, "MMLU Pro": 32.1, "Iranian Social Norms": 74.94, "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
4
- {"Model": "aya-expanse-8b", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": 58.67, "GSM8K": 9.8, "DC-Homograph": 51.85, "MC-Homograph": 80.65, "PiQA": 80.18, "Proverb-Quiz": 60.0, "VerbEval": 48.06, "Winogrande": 64.04, "Arc-Challenge": 71.47, "Arc-Easy": 84.6, "Feqh": 29.71, "Hallucination (Truthfulness)": 23.52, "P-Hellaswag": 76.49, "Law": 32.33, "AUT Multiple Choice": 45.8, "Parsi Literature": 34.49, "BoolQA": 82.3, "Reading Comprehension": 61.98, "PartExpert": 35.56, "MMLU Pro": 21.9, "Iranian Social Norms": 71.71, "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
5
  {"Model": "deepseek-v3-03-24", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 85.71, "GSM8K": 53.1, "DC-Homograph": 83.33, "MC-Homograph": 94.24, "PiQA": 91.39, "Proverb-Quiz": 84.86, "VerbEval": 81.11, "Winogrande": 76.71, "Arc-Challenge": 92.31, "Arc-Easy": 96.58, "Feqh": 42.29, "Hallucination (Truthfulness)": 55.54, "P-Hellaswag": 85.3, "Law": 46.0, "AUT Multiple Choice": 65.6, "Parsi Literature": 44.66, "BoolQA": 95.3, "Reading Comprehension": 63.79, "PartExpert": 58.46, "MMLU Pro": 53.4, "Iranian Social Norms": 71.71, "Model sha": "unknown", "Hub License": "unknown"}
6
- {"Model": "Dorna-Llama3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 64.85, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
7
- {"Model": "Dorna2-Llama3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": 48.72, "GSM8K": 11.9, "DC-Homograph": 44.44, "MC-Homograph": 72.81, "PiQA": 69.97, "Proverb-Quiz": 42.97, "VerbEval": 42.06, "Winogrande": 54.47, "Arc-Challenge": 67.63, "Arc-Easy": 78.72, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.91, "P-Hellaswag": 78.91, "Law": 29.67, "AUT Multiple Choice": 41.0, "Parsi Literature": 27.28, "BoolQA": 81.8, "Reading Comprehension": 56.84, "PartExpert": 35.65, "MMLU Pro": 22.7, "Iranian Social Norms": 49.82, "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  {"Model": "gemma-2-27b-it", "#Params (B)": 27.22, "Precision": "BF16", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": 68.11, "GSM8K": 26.7, "DC-Homograph": 60.19, "MC-Homograph": 91.24, "PiQA": 89.69, "Proverb-Quiz": 73.51, "VerbEval": 61.16, "Winogrande": 76.44, "Arc-Challenge": 86.75, "Arc-Easy": 94.22, "Feqh": 24.0, "Hallucination (Truthfulness)": 13.05, "P-Hellaswag": 83.69, "Law": 34.67, "AUT Multiple Choice": 50.8, "Parsi Literature": 35.91, "BoolQA": 89.8, "Reading Comprehension": 56.76, "PartExpert": 46.6, "MMLU Pro": 36.9, "Iranian Social Norms": 77.38, "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
9
- {"Model": "gemma-2-2b-it", "#Params (B)": 2.61, "Precision": "BF16", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 41.79, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
10
  {"Model": "gemma-2-9b-it", "#Params (B)": 9.24, "Precision": "BF16", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": 64.03, "GSM8K": 17.4, "DC-Homograph": 59.26, "MC-Homograph": 90.55, "PiQA": 87.09, "Proverb-Quiz": 69.19, "VerbEval": 58.25, "Winogrande": 72.01, "Arc-Challenge": 84.29, "Arc-Easy": 93.16, "Feqh": 29.71, "Hallucination (Truthfulness)": 50.58, "P-Hellaswag": 80.82, "Law": 33.67, "AUT Multiple Choice": 48.5, "Parsi Literature": 38.1, "BoolQA": 89.7, "Reading Comprehension": 56.43, "PartExpert": 43.03, "MMLU Pro": 33.2, "Iranian Social Norms": 73.84, "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
11
- {"Model": "gemma-3-12b-it", "#Params (B)": 12.18, "Precision": "BF16", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": 68.37, "GSM8K": 20.2, "DC-Homograph": 67.59, "MC-Homograph": 91.24, "PiQA": 87.19, "Proverb-Quiz": 72.97, "VerbEval": 63.39, "Winogrande": 73.96, "Arc-Challenge": 83.33, "Arc-Easy": 93.26, "Feqh": 25.14, "Hallucination (Truthfulness)": 46.1, "P-Hellaswag": 83.17, "Law": 36.33, "AUT Multiple Choice": 49.0, "Parsi Literature": 40.03, "BoolQA": 87.6, "Reading Comprehension": 55.26, "PartExpert": 44.12, "MMLU Pro": 32.6, "Iranian Social Norms": 75.55, "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
12
- {"Model": "gemma-3-1b-it", "#Params (B)": 0.99, "Precision": "BF16", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 31.98, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
13
- {"Model": "gemma-3-27b-it", "#Params (B)": 27.43, "Precision": "BF16", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": 73.72, "GSM8K": 28.3, "DC-Homograph": 63.89, "MC-Homograph": 92.4, "PiQA": 87.29, "Proverb-Quiz": 78.92, "VerbEval": 66.02, "Winogrande": 78.12, "Arc-Challenge": 88.35, "Arc-Easy": 94.22, "Feqh": 24.57, "Hallucination (Truthfulness)": 60.15, "P-Hellaswag": 83.39, "Law": 36.33, "AUT Multiple Choice": 55.2, "Parsi Literature": 40.93, "BoolQA": 91.4, "Reading Comprehension": 58.01, "PartExpert": 49.32, "MMLU Pro": 36.6, "Iranian Social Norms": 70.49, "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
14
- {"Model": "gemma-3-4b-it", "#Params (B)": 4.3, "Precision": "BF16", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": 45.92, "GSM8K": 9.6, "DC-Homograph": 42.59, "MC-Homograph": 72.58, "PiQA": 72.77, "Proverb-Quiz": 53.78, "VerbEval": 45.3, "Winogrande": 55.09, "Arc-Challenge": 63.46, "Arc-Easy": 79.57, "Feqh": 21.14, "Hallucination (Truthfulness)": 46.04, "P-Hellaswag": 73.84, "Law": 27.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 30.24, "BoolQA": 78.6, "Reading Comprehension": 47.28, "PartExpert": 34.7, "MMLU Pro": 22.8, "Iranian Social Norms": 65.55, "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
15
- {"Model": "gemini-2.0-flash-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 87.76, "GSM8K": 53.7, "DC-Homograph": 79.63, "MC-Homograph": 91.71, "PiQA": 90.59, "Proverb-Quiz": 95.14, "VerbEval": 85.15, "Winogrande": 78.74, "Arc-Challenge": 91.35, "Arc-Easy": 97.22, "Feqh": 53.14, "Hallucination (Truthfulness)": 68.87, "P-Hellaswag": 82.95, "Law": 45.67, "AUT Multiple Choice": 60.9, "Parsi Literature": 44.02, "BoolQA": 91.3, "Reading Comprehension": 67.92, "PartExpert": 59.5, "MMLU Pro": 47.8, "Iranian Social Norms": 77.68, "Model sha": "unknown", "Hub License": "unknown"}
16
- {"Model": "gemini-2.0-flash-lite-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 84.18, "GSM8K": 39.7, "DC-Homograph": 60.19, "MC-Homograph": 87.79, "PiQA": 85.29, "Proverb-Quiz": 91.35, "VerbEval": 81.39, "Winogrande": 75.64, "Arc-Challenge": 89.64, "Arc-Easy": 93.48, "Feqh": 41.71, "Hallucination (Truthfulness)": 67.32, "P-Hellaswag": 83.54, "Law": 43.0, "AUT Multiple Choice": 58.5, "Parsi Literature": 43.89, "BoolQA": 92.6, "Reading Comprehension": 65.92, "PartExpert": 54.15, "MMLU Pro": 41.2, "Iranian Social Norms": 70.49, "Model sha": "unknown", "Hub License": "unknown"}
17
- {"Model": "gpt-4.1-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 25.3, "DC-Homograph": 89.81, "MC-Homograph": 95.39, "PiQA": 95.9, "Proverb-Quiz": 95.14, "VerbEval": 83.04, "Winogrande": 85.92, "Arc-Challenge": 95.3, "Arc-Easy": 96.68, "Feqh": 52.0, "Hallucination (Truthfulness)": 77.43, "P-Hellaswag": 85.67, "Law": 53.67, "AUT Multiple Choice": 66.6, "Parsi Literature": 45.82, "BoolQA": 94.7, "Reading Comprehension": 44.82, "PartExpert": 59.92, "MMLU Pro": 50.5, "Iranian Social Norms": 77.56, "Model sha": "unknown", "Hub License": "unknown"}
18
- {"Model": "gpt-4.1-mini-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.34, "GSM8K": 60.3, "DC-Homograph": 66.67, "MC-Homograph": 94.24, "PiQA": 92.69, "Proverb-Quiz": 82.97, "VerbEval": 77.99, "Winogrande": 80.07, "Arc-Challenge": 91.88, "Arc-Easy": 96.15, "Feqh": 37.71, "Hallucination (Truthfulness)": 66.55, "P-Hellaswag": 84.57, "Law": 44.33, "AUT Multiple Choice": 53.5, "Parsi Literature": 41.18, "BoolQA": 93.7, "Reading Comprehension": 51.85, "PartExpert": 54.37, "MMLU Pro": 47.8, "Iranian Social Norms": 73.35, "Model sha": "unknown", "Hub License": "unknown"}
19
  {"Model": "gpt-4.1-nano-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 68.11, "GSM8K": 58.4, "DC-Homograph": 49.07, "MC-Homograph": 78.11, "PiQA": 84.58, "Proverb-Quiz": 67.84, "VerbEval": 66.21, "Winogrande": 60.32, "Arc-Challenge": 81.41, "Arc-Easy": 91.55, "Feqh": 32.0, "Hallucination (Truthfulness)": 51.24, "P-Hellaswag": 77.96, "Law": 32.67, "AUT Multiple Choice": 46.1, "Parsi Literature": 36.42, "BoolQA": 81.7, "Reading Comprehension": 50.66, "PartExpert": 42.49, "MMLU Pro": 29.9, "Iranian Social Norms": 74.76, "Model sha": "unknown", "Hub License": "unknown"}
20
- {"Model": "gpt-4o-2024-08-06", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 73.1, "DC-Homograph": 87.04, "MC-Homograph": 95.62, "PiQA": 95.1, "Proverb-Quiz": 96.76, "VerbEval": 85.89, "Winogrande": 86.18, "Arc-Challenge": 95.09, "Arc-Easy": 97.22, "Feqh": 46.86, "Hallucination (Truthfulness)": 74.64, "P-Hellaswag": 85.53, "Law": 47.67, "AUT Multiple Choice": 67.7, "Parsi Literature": 45.95, "BoolQA": 94.1, "Reading Comprehension": 55.34, "PartExpert": 57.36, "MMLU Pro": 47.1, "Iranian Social Norms": 76.89, "Model sha": "unknown", "Hub License": "unknown"}
21
- {"Model": "gpt-4o-mini-2024-07-18", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.08, "GSM8K": 60.9, "DC-Homograph": 68.52, "MC-Homograph": 90.09, "PiQA": 90.89, "Proverb-Quiz": 84.05, "VerbEval": 74.23, "Winogrande": 75.73, "Arc-Challenge": 86.43, "Arc-Easy": 94.01, "Feqh": 41.71, "Hallucination (Truthfulness)": 82.04, "P-Hellaswag": 83.84, "Law": 34.0, "AUT Multiple Choice": 54.8, "Parsi Literature": 40.93, "BoolQA": 93.3, "Reading Comprehension": 63.29, "PartExpert": 42.54, "MMLU Pro": 34.8, "Iranian Social Norms": 71.59, "Model sha": "unknown", "Hub License": "unknown"}
22
- {"Model": "Hermes-3-Llama-3.1-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": 49.49, "GSM8K": 10.2, "DC-Homograph": 44.44, "MC-Homograph": 79.72, "PiQA": 70.37, "Proverb-Quiz": 47.84, "VerbEval": 48.94, "Winogrande": 55.18, "Arc-Challenge": 65.28, "Arc-Easy": 78.07, "Feqh": 30.29, "Hallucination (Truthfulness)": 45.2, "P-Hellaswag": 73.99, "Law": 31.67, "AUT Multiple Choice": 42.1, "Parsi Literature": 30.63, "BoolQA": 83.5, "Reading Comprehension": 56.4, "PartExpert": 35.61, "MMLU Pro": 24.1, "Iranian Social Norms": 54.88, "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
 
 
 
 
 
 
 
23
  {"Model": "Hormoz-8B", "#Params (B)": 8.02, "Precision": "F32", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": 58.42, "GSM8K": 10.0, "DC-Homograph": 50.93, "MC-Homograph": 80.65, "PiQA": 80.68, "Proverb-Quiz": 60.27, "VerbEval": 47.29, "Winogrande": 64.39, "Arc-Challenge": 70.41, "Arc-Easy": 84.28, "Feqh": 28.57, "Hallucination (Truthfulness)": 23.66, "P-Hellaswag": 76.05, "Law": 30.33, "AUT Multiple Choice": 46.7, "Parsi Literature": 33.08, "BoolQA": 79.8, "Reading Comprehension": 61.11, "PartExpert": 35.68, "MMLU Pro": 21.5, "Iranian Social Norms": 70.3, "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
 
 
 
 
 
 
 
24
  {"Model": "Llama-3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": 52.55, "GSM8K": 12.0, "DC-Homograph": 43.52, "MC-Homograph": 79.03, "PiQA": 70.07, "Proverb-Quiz": 47.57, "VerbEval": 42.91, "Winogrande": 54.21, "Arc-Challenge": 68.91, "Arc-Easy": 80.11, "Feqh": 29.71, "Hallucination (Truthfulness)": 6.76, "P-Hellaswag": 79.79, "Law": 32.67, "AUT Multiple Choice": 44.9, "Parsi Literature": 32.3, "BoolQA": 82.7, "Reading Comprehension": 62.45, "PartExpert": 37.62, "MMLU Pro": 25.7, "Iranian Social Norms": 70.98, "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  {"Model": "Llama-3.2-1B-Instruct", "#Params (B)": 1.23, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 38.0, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
26
  {"Model": "Maral-7B-alpha-1", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 42.04, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
27
- {"Model": "Meta-Llama-3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 10.4, "DC-Homograph": 41.67, "MC-Homograph": 81.11, "PiQA": 70.97, "Proverb-Quiz": 42.97, "VerbEval": 38.93, "Winogrande": 56.95, "Arc-Challenge": 66.77, "Arc-Easy": 76.47, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.23, "P-Hellaswag": 76.71, "Law": 32.0, "AUT Multiple Choice": 45.0, "Parsi Literature": 29.99, "BoolQA": 82.5, "Reading Comprehension": 54.79, "PartExpert": 36.3, "MMLU Pro": 26.0, "Iranian Social Norms": 70.06, "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
28
- {"Model": "PersianMind-v1.0", "#Params (B)": 0.0, "Precision": "F32", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension": 0.0, "PartExpert": 29.75, "MMLU Pro": 14.5, "Iranian Social Norms": 48.41, "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
29
- {"Model": "Qwen2-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 14.5, "DC-Homograph": 54.63, "MC-Homograph": 72.81, "PiQA": 70.97, "Proverb-Quiz": 50.54, "VerbEval": 40.62, "Winogrande": 60.94, "Arc-Challenge": 69.12, "Arc-Easy": 80.75, "Feqh": 28.0, "Hallucination (Truthfulness)": 25.93, "P-Hellaswag": 76.71, "Law": 28.33, "AUT Multiple Choice": 40.4, "Parsi Literature": 31.4, "BoolQA": 79.0, "Reading Comprehension": 50.14, "PartExpert": 36.31, "MMLU Pro": 23.8, "Iranian Social Norms": 62.2, "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
30
- {"Model": "Qwen2.5-32B-Instruct", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": 61.73, "GSM8K": 50.1, "DC-Homograph": 67.59, "MC-Homograph": 91.47, "PiQA": 83.98, "Proverb-Quiz": 63.24, "VerbEval": 54.58, "Winogrande": 80.07, "Arc-Challenge": 85.15, "Arc-Easy": 91.87, "Feqh": 38.86, "Hallucination (Truthfulness)": 59.22, "P-Hellaswag": 82.07, "Law": 42.33, "AUT Multiple Choice": 50.4, "Parsi Literature": 40.41, "BoolQA": 93.4, "Reading Comprehension": 28.11, "PartExpert": 46.78, "MMLU Pro": 37.4, "Iranian Social Norms": 70.0, "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
31
- {"Model": "Qwen2.5-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": 51.02, "GSM8K": 18.0, "DC-Homograph": 52.78, "MC-Homograph": 79.26, "PiQA": 71.07, "Proverb-Quiz": 47.84, "VerbEval": 44.44, "Winogrande": 61.91, "Arc-Challenge": 72.33, "Arc-Easy": 81.5, "Feqh": 36.57, "Hallucination (Truthfulness)": 34.89, "P-Hellaswag": 74.8, "Law": 32.33, "AUT Multiple Choice": 42.6, "Parsi Literature": 31.27, "BoolQA": 82.5, "Reading Comprehension": 58.43, "PartExpert": 37.24, "MMLU Pro": 26.7, "Iranian Social Norms": 64.51, "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
32
- {"Model": "Qwen3-14B", "#Params (B)": 14.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": 56.38, "GSM8K": 31.1, "DC-Homograph": 55.56, "MC-Homograph": 87.56, "PiQA": 77.18, "Proverb-Quiz": 53.78, "VerbEval": 54.36, "Winogrande": 67.32, "Arc-Challenge": 84.29, "Arc-Easy": 91.02, "Feqh": 29.14, "Hallucination (Truthfulness)": 44.54, "P-Hellaswag": 80.97, "Law": 34.67, "AUT Multiple Choice": 44.8, "Parsi Literature": 35.39, "BoolQA": 87.6, "Reading Comprehension": 44.36, "PartExpert": 43.22, "MMLU Pro": 35.5, "Iranian Social Norms": 74.51, "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
33
- {"Model": "Qwen3-30B-A3B", "#Params (B)": 30.53, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": 65.05, "GSM8K": 28.8, "DC-Homograph": 57.41, "MC-Homograph": 86.41, "PiQA": 72.47, "Proverb-Quiz": 50.81, "VerbEval": 48.09, "Winogrande": 65.28, "Arc-Challenge": 87.39, "Arc-Easy": 93.58, "Feqh": 23.43, "Hallucination (Truthfulness)": 3.54, "P-Hellaswag": 83.1, "Law": 35.33, "AUT Multiple Choice": 48.0, "Parsi Literature": 36.55, "BoolQA": 86.2, "Reading Comprehension": 66.24, "PartExpert": 41.13, "MMLU Pro": 36.3, "Iranian Social Norms": 44.21, "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
34
- {"Model": "Qwen3-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": 67.6, "GSM8K": 37.9, "DC-Homograph": 51.85, "MC-Homograph": 89.63, "PiQA": 87.69, "Proverb-Quiz": 64.59, "VerbEval": 56.35, "Winogrande": 71.48, "Arc-Challenge": 91.13, "Arc-Easy": 94.22, "Feqh": 29.71, "Hallucination (Truthfulness)": 47.5, "P-Hellaswag": 83.47, "Law": 37.0, "AUT Multiple Choice": 48.3, "Parsi Literature": 39.12, "BoolQA": 91.1, "Reading Comprehension": 63.96, "PartExpert": 50.06, "MMLU Pro": 42.8, "Iranian Social Norms": 73.48, "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
35
- {"Model": "Qwen3-4B", "#Params (B)": 4.02, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": 43.88, "GSM8K": 20.1, "DC-Homograph": 38.89, "MC-Homograph": 76.27, "PiQA": 66.07, "Proverb-Quiz": 45.41, "VerbEval": 41.23, "Winogrande": 54.56, "Arc-Challenge": 73.61, "Arc-Easy": 83.42, "Feqh": 30.29, "Hallucination (Truthfulness)": 25.29, "P-Hellaswag": 78.03, "Law": 30.33, "AUT Multiple Choice": 40.6, "Parsi Literature": 31.79, "BoolQA": 81.9, "Reading Comprehension": 63.43, "PartExpert": 37.28, "MMLU Pro": 28.9, "Iranian Social Norms": 68.72, "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
36
- {"Model": "Qwen3-8B", "#Params (B)": 8.19, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": 49.23, "GSM8K": 25.7, "DC-Homograph": 50.93, "MC-Homograph": 82.95, "PiQA": 75.98, "Proverb-Quiz": 51.89, "VerbEval": 47.93, "Winogrande": 61.91, "Arc-Challenge": 80.24, "Arc-Easy": 87.38, "Feqh": 28.0, "Hallucination (Truthfulness)": 38.46, "P-Hellaswag": 80.38, "Law": 29.67, "AUT Multiple Choice": 46.0, "Parsi Literature": 33.2, "BoolQA": 86.4, "Reading Comprehension": 66.38, "PartExpert": 38.31, "MMLU Pro": 31.1, "Iranian Social Norms": 63.41, "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
37
- {"Model": "QwQ-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": 60.71, "GSM8K": 29.3, "DC-Homograph": 58.33, "MC-Homograph": 88.25, "PiQA": 81.68, "Proverb-Quiz": 59.19, "VerbEval": 52.31, "Winogrande": 73.07, "Arc-Challenge": 84.94, "Arc-Easy": 90.8, "Feqh": 41.71, "Hallucination (Truthfulness)": 48.93, "P-Hellaswag": 82.22, "Law": 38.0, "AUT Multiple Choice": 49.3, "Parsi Literature": 37.71, "BoolQA": 88.5, "Reading Comprehension": 50.25, "PartExpert": 46.75, "MMLU Pro": 39.0, "Iranian Social Norms": 70.73, "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
38
- {"Model": "QwQ-32B-Preview", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": 63.27, "GSM8K": 34.7, "DC-Homograph": 61.11, "MC-Homograph": 88.25, "PiQA": 81.28, "Proverb-Quiz": 58.11, "VerbEval": 51.97, "Winogrande": 75.64, "Arc-Challenge": 85.58, "Arc-Easy": 91.44, "Feqh": 41.14, "Hallucination (Truthfulness)": 38.84, "P-Hellaswag": 84.13, "Law": 43.0, "AUT Multiple Choice": 50.6, "Parsi Literature": 39.77, "BoolQA": 88.5, "Reading Comprehension": 65.38, "PartExpert": 47.39, "MMLU Pro": 37.3, "Iranian Social Norms": 72.26, "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
 
1
+ {"Model": "gpt-4o-2024-08-06", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 73.1, "DC-Homograph": 87.04, "MC-Homograph": 95.62, "PiQA": 95.1, "Proverb-Quiz": 96.76, "VerbEval": 85.89, "Winogrande": 86.18, "Arc-Challenge": 95.09, "Arc-Easy": 97.22, "Feqh": 46.86, "Hallucination (Truthfulness)": 74.64, "P-Hellaswag": 85.53, "Law": 47.67, "AUT Multiple Choice": 67.7, "Parsi Literature": 45.95, "BoolQA": 94.1, "Reading Comprehension": 55.34, "PartExpert": 57.36, "MMLU Pro": 47.1, "Iranian Social Norms": 76.89, "Model sha": "unknown", "Hub License": "unknown"}
2
+ {"Model": "gpt-4.1-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 25.3, "DC-Homograph": 89.81, "MC-Homograph": 95.39, "PiQA": 95.9, "Proverb-Quiz": 95.14, "VerbEval": 83.04, "Winogrande": 85.92, "Arc-Challenge": 95.3, "Arc-Easy": 96.68, "Feqh": 52.0, "Hallucination (Truthfulness)": 77.43, "P-Hellaswag": 85.67, "Law": 53.67, "AUT Multiple Choice": 66.6, "Parsi Literature": 45.82, "BoolQA": 94.7, "Reading Comprehension": 44.82, "PartExpert": 59.92, "MMLU Pro": 50.5, "Iranian Social Norms": 77.56, "Model sha": "unknown", "Hub License": "unknown"}
3
+ {"Model": "google__gemini-2.0-flash-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 87.76, "GSM8K": 53.7, "DC-Homograph": 79.63, "MC-Homograph": 91.71, "PiQA": 90.59, "Proverb-Quiz": 95.14, "VerbEval": 85.15, "Winogrande": 78.74, "Arc-Challenge": 91.35, "Arc-Easy": 97.22, "Feqh": 53.14, "Hallucination (Truthfulness)": 68.87, "P-Hellaswag": 82.95, "Law": 45.67, "AUT Multiple Choice": 60.9, "Parsi Literature": 44.02, "BoolQA": 91.3, "Reading Comprehension": 67.92, "PartExpert": 59.5, "MMLU Pro": 47.8, "Iranian Social Norms": 77.68, "Model sha": "unknown", "Hub License": "unknown"}
 
4
  {"Model": "deepseek-v3-03-24", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 85.71, "GSM8K": 53.1, "DC-Homograph": 83.33, "MC-Homograph": 94.24, "PiQA": 91.39, "Proverb-Quiz": 84.86, "VerbEval": 81.11, "Winogrande": 76.71, "Arc-Challenge": 92.31, "Arc-Easy": 96.58, "Feqh": 42.29, "Hallucination (Truthfulness)": 55.54, "P-Hellaswag": 85.3, "Law": 46.0, "AUT Multiple Choice": 65.6, "Parsi Literature": 44.66, "BoolQA": 95.3, "Reading Comprehension": 63.79, "PartExpert": 58.46, "MMLU Pro": 53.4, "Iranian Social Norms": 71.71, "Model sha": "unknown", "Hub License": "unknown"}
5
+ {"Model": "gpt-4.1-mini-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.34, "GSM8K": 60.3, "DC-Homograph": 66.67, "MC-Homograph": 94.24, "PiQA": 92.69, "Proverb-Quiz": 82.97, "VerbEval": 77.99, "Winogrande": 80.07, "Arc-Challenge": 91.88, "Arc-Easy": 96.15, "Feqh": 37.71, "Hallucination (Truthfulness)": 66.55, "P-Hellaswag": 84.57, "Law": 44.33, "AUT Multiple Choice": 53.5, "Parsi Literature": 41.18, "BoolQA": 93.7, "Reading Comprehension": 51.85, "PartExpert": 54.37, "MMLU Pro": 47.8, "Iranian Social Norms": 73.35, "Model sha": "unknown", "Hub License": "unknown"}
6
+ {"Model": "gpt-4-turbo-2024-04-09", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 80.87, "GSM8K": 30.6, "DC-Homograph": 88.89, "MC-Homograph": 93.78, "PiQA": 94.19, "Proverb-Quiz": 86.76, "VerbEval": 74.29, "Winogrande": 83.08, "Arc-Challenge": 91.35, "Arc-Easy": 96.47, "Feqh": 40.0, "Hallucination (Truthfulness)": 55.69, "P-Hellaswag": 80.68, "Law": 42.0, "AUT Multiple Choice": 62.6, "Parsi Literature": 40.93, "BoolQA": 92.2, "Reading Comprehension": 67.17, "PartExpert": 53.18, "MMLU Pro": 40.1, "Iranian Social Norms": 75.91, "Model sha": "unknown", "Hub License": "unknown"}
7
+ {"Model": "c4ai-command-a-03-2025", "#Params (B)": 111.05, "Precision": "BF16", "model_name_for_query": "CohereLabs/c4ai-command-a-03-2025", "GeneralKnowledge": 79.34, "GSM8K": 35.6, "DC-Homograph": 75.93, "MC-Homograph": 94.93, "PiQA": 93.99, "Proverb-Quiz": 87.3, "VerbEval": 70.65, "Winogrande": 80.34, "Arc-Challenge": 91.03, "Arc-Easy": 96.9, "Feqh": 46.86, "Hallucination (Truthfulness)": 54.64, "P-Hellaswag": 84.28, "Law": 45.0, "AUT Multiple Choice": 61.3, "Parsi Literature": 41.57, "BoolQA": 92.6, "Reading Comprehension": 66.81, "PartExpert": 52.44, "MMLU Pro": 41.3, "Iranian Social Norms": 77.56, "Model sha": "11fe7d565f727a58cf33092d9d3163b3414c9594", "Hub License": "cc-by-nc-4.0"}
8
+ {"Model": "google__gemini-2.0-flash-lite-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 84.18, "GSM8K": 39.7, "DC-Homograph": 60.19, "MC-Homograph": 87.79, "PiQA": 85.29, "Proverb-Quiz": 91.35, "VerbEval": 81.39, "Winogrande": 75.64, "Arc-Challenge": 89.64, "Arc-Easy": 93.48, "Feqh": 41.71, "Hallucination (Truthfulness)": 67.32, "P-Hellaswag": 83.54, "Law": 43.0, "AUT Multiple Choice": 58.5, "Parsi Literature": 43.89, "BoolQA": 92.6, "Reading Comprehension": 65.92, "PartExpert": 54.15, "MMLU Pro": 41.2, "Iranian Social Norms": 70.49, "Model sha": "unknown", "Hub License": "unknown"}
9
+ {"Model": "Llama-4-Scout-17B-16E-Instruct", "#Params (B)": 108.64, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "GeneralKnowledge": 81.63, "GSM8K": 35.4, "DC-Homograph": 71.3, "MC-Homograph": 93.09, "PiQA": 90.29, "Proverb-Quiz": 83.78, "VerbEval": 79.25, "Winogrande": 78.3, "Arc-Challenge": 88.78, "Arc-Easy": 95.19, "Feqh": 41.14, "Hallucination (Truthfulness)": 53.18, "P-Hellaswag": 84.42, "Law": 38.67, "AUT Multiple Choice": 55.2, "Parsi Literature": 42.21, "BoolQA": 93.8, "Reading Comprehension": 66.62, "PartExpert": 54.55, "MMLU Pro": 47.0, "Iranian Social Norms": 75.18, "Model sha": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", "Hub License": "other"}
10
+ {"Model": "gpt-4o-mini-2024-07-18", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.08, "GSM8K": 60.9, "DC-Homograph": 68.52, "MC-Homograph": 90.09, "PiQA": 90.89, "Proverb-Quiz": 84.05, "VerbEval": 74.23, "Winogrande": 75.73, "Arc-Challenge": 86.43, "Arc-Easy": 94.01, "Feqh": 41.71, "Hallucination (Truthfulness)": 82.04, "P-Hellaswag": 83.84, "Law": 34.0, "AUT Multiple Choice": 54.8, "Parsi Literature": 40.93, "BoolQA": 93.3, "Reading Comprehension": 63.29, "PartExpert": 42.54, "MMLU Pro": 34.8, "Iranian Social Norms": 71.59, "Model sha": "unknown", "Hub License": "unknown"}
11
+ {"Model": "Llama-3.3-70B-Instruct", "#Params (B)": 70.55, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.3-70B-Instruct", "GeneralKnowledge": 78.83, "GSM8K": 31.5, "DC-Homograph": 66.67, "MC-Homograph": 92.17, "PiQA": 90.29, "Proverb-Quiz": 78.92, "VerbEval": 68.32, "Winogrande": 81.93, "Arc-Challenge": 89.53, "Arc-Easy": 95.94, "Feqh": 39.43, "Hallucination (Truthfulness)": 66.68, "P-Hellaswag": 83.91, "Law": 45.0, "AUT Multiple Choice": 57.3, "Parsi Literature": 39.9, "BoolQA": 94.8, "Reading Comprehension": 64.57, "PartExpert": 52.62, "MMLU Pro": 42.7, "Iranian Social Norms": 76.83, "Model sha": "6f6073b423013f6a7d4d9f39144961bfbfbc386b", "Hub License": "llama3.3"}
12
+ {"Model": "Meta-Llama-3.1-70B-Instruct", "#Params (B)": 70.55, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3.1-70B-Instruct", "GeneralKnowledge": 78.83, "GSM8K": 29.3, "DC-Homograph": 63.89, "MC-Homograph": 92.17, "PiQA": 89.89, "Proverb-Quiz": 77.57, "VerbEval": 65.38, "Winogrande": 81.93, "Arc-Challenge": 89.74, "Arc-Easy": 95.61, "Feqh": 42.29, "Hallucination (Truthfulness)": 68.23, "P-Hellaswag": 85.67, "Law": 45.0, "AUT Multiple Choice": 57.2, "Parsi Literature": 38.61, "BoolQA": 95.2, "Reading Comprehension": 59.32, "PartExpert": 53.3, "MMLU Pro": 43.2, "Iranian Social Norms": 76.4, "Model sha": "1605565b47bb9346c5515c34102e054115b4f98b", "Hub License": "llama3.1"}
13
+ {"Model": "Meta-Llama-3-70B-Instruct", "#Params (B)": 70.55, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-70B-Instruct", "GeneralKnowledge": 79.34, "GSM8K": 28.7, "DC-Homograph": 67.59, "MC-Homograph": 94.01, "PiQA": 89.59, "Proverb-Quiz": 72.43, "VerbEval": 59.41, "Winogrande": 75.91, "Arc-Challenge": 88.35, "Arc-Easy": 95.19, "Feqh": 36.57, "Hallucination (Truthfulness)": 54.64, "P-Hellaswag": 82.51, "Law": 44.33, "AUT Multiple Choice": 56.8, "Parsi Literature": 36.94, "BoolQA": 92.2, "Reading Comprehension": 66.88, "PartExpert": 51.12, "MMLU Pro": 38.7, "Iranian Social Norms": 73.05, "Model sha": "50fd307e57011801c7833c87efa1984ddf2db42f", "Hub License": "llama3"}
14
+ {"Model": "Qwen2.5-72B-Instruct", "#Params (B)": 72.7, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-72B-Instruct", "GeneralKnowledge": 76.79, "GSM8K": 40.4, "DC-Homograph": 66.67, "MC-Homograph": 91.24, "PiQA": 88.59, "Proverb-Quiz": 68.92, "VerbEval": 62.17, "Winogrande": 78.03, "Arc-Challenge": 91.35, "Arc-Easy": 94.22, "Feqh": 34.29, "Hallucination (Truthfulness)": 63.25, "P-Hellaswag": 84.13, "Law": 39.0, "AUT Multiple Choice": 55.9, "Parsi Literature": 37.71, "BoolQA": 95.6, "Reading Comprehension": 43.67, "PartExpert": 53.03, "MMLU Pro": 39.1, "Iranian Social Norms": 73.11, "Model sha": "495f39366efef23836d0cfae4fbe635880d2be31", "Hub License": "other"}
15
+ {"Model": "Qwen2-72B-Instruct", "#Params (B)": 72.7, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-72B-Instruct", "GeneralKnowledge": 75.26, "GSM8K": 35.6, "DC-Homograph": 72.22, "MC-Homograph": 91.71, "PiQA": 89.79, "Proverb-Quiz": 70.0, "VerbEval": 53.26, "Winogrande": 80.87, "Arc-Challenge": 88.03, "Arc-Easy": 94.65, "Feqh": 31.43, "Hallucination (Truthfulness)": 62.76, "P-Hellaswag": 83.1, "Law": 38.0, "AUT Multiple Choice": 55.0, "Parsi Literature": 35.78, "BoolQA": 88.8, "Reading Comprehension": 63.97, "PartExpert": 48.63, "MMLU Pro": 34.6, "Iranian Social Norms": 74.45, "Model sha": "c867f763ef53f2ea9d9b31ee8501273dedd391eb", "Hub License": "other"}
16
+ {"Model": "Mistral-Large-Instruct-2407", "#Params (B)": 122.61, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-Large-Instruct-2407", "GeneralKnowledge": 72.45, "GSM8K": 35.3, "DC-Homograph": 73.15, "MC-Homograph": 93.09, "PiQA": 88.29, "Proverb-Quiz": 65.95, "VerbEval": 53.81, "Winogrande": 74.93, "Arc-Challenge": 89.0, "Arc-Easy": 94.87, "Feqh": 26.86, "Hallucination (Truthfulness)": 61.28, "P-Hellaswag": 84.64, "Law": 36.0, "AUT Multiple Choice": 55.4, "Parsi Literature": 37.71, "BoolQA": 87.3, "Reading Comprehension": 67.9, "PartExpert": 48.62, "MMLU Pro": 40.0, "Iranian Social Norms": 74.39, "Model sha": "a286006d554cb37a61d13c7ae61bc90cc1d372fc", "Hub License": "other"}
17
+ {"Model": "gemma-3-27b-it", "#Params (B)": 27.43, "Precision": "BF16", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": 73.72, "GSM8K": 28.3, "DC-Homograph": 63.89, "MC-Homograph": 92.4, "PiQA": 87.29, "Proverb-Quiz": 78.92, "VerbEval": 66.02, "Winogrande": 78.12, "Arc-Challenge": 88.35, "Arc-Easy": 94.22, "Feqh": 24.57, "Hallucination (Truthfulness)": 60.15, "P-Hellaswag": 83.39, "Law": 36.33, "AUT Multiple Choice": 55.2, "Parsi Literature": 40.93, "BoolQA": 91.4, "Reading Comprehension": 58.01, "PartExpert": 49.32, "MMLU Pro": 36.6, "Iranian Social Norms": 70.49, "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
18
+ {"Model": "Mistral-Large-Instruct-2411", "#Params (B)": 122.61, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-Large-Instruct-2411", "GeneralKnowledge": 71.94, "GSM8K": 34.6, "DC-Homograph": 75.93, "MC-Homograph": 94.01, "PiQA": 88.29, "Proverb-Quiz": 65.41, "VerbEval": 54.58, "Winogrande": 76.71, "Arc-Challenge": 88.35, "Arc-Easy": 94.33, "Feqh": 31.43, "Hallucination (Truthfulness)": 45.51, "P-Hellaswag": 83.84, "Law": 37.33, "AUT Multiple Choice": 55.1, "Parsi Literature": 37.19, "BoolQA": 87.5, "Reading Comprehension": 67.21, "PartExpert": 48.67, "MMLU Pro": 37.1, "Iranian Social Norms": 74.33, "Model sha": "ba78820945ae22361b0274cf0ae6d696c967c1a4", "Hub License": "other"}
19
+ {"Model": "c4ai-command-r-plus-08-2024", "#Params (B)": 103.81, "Precision": "F16", "model_name_for_query": "CohereLabs/c4ai-command-r-plus-08-2024", "GeneralKnowledge": 73.21, "GSM8K": 26.0, "DC-Homograph": 62.96, "MC-Homograph": 92.86, "PiQA": 86.29, "Proverb-Quiz": 82.97, "VerbEval": 63.18, "Winogrande": 68.02, "Arc-Challenge": 80.34, "Arc-Easy": 91.34, "Feqh": 33.71, "Hallucination (Truthfulness)": 54.39, "P-Hellaswag": 81.26, "Law": 40.0, "AUT Multiple Choice": 55.1, "Parsi Literature": 37.45, "BoolQA": 84.0, "Reading Comprehension": 64.9, "PartExpert": 44.08, "MMLU Pro": 29.8, "Iranian Social Norms": 70.43, "Model sha": "7d73ba43cd05c39ecb7e35cf414ac8fde277789f", "Hub License": "cc-by-nc-4.0"}
20
+ {"Model": "aya-expanse-32b", "#Params (B)": 32.29, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": 73.72, "GSM8K": 17.5, "DC-Homograph": 62.96, "MC-Homograph": 87.56, "PiQA": 91.19, "Proverb-Quiz": 77.03, "VerbEval": 61.95, "Winogrande": 70.5, "Arc-Challenge": 85.15, "Arc-Easy": 93.37, "Feqh": 37.14, "Hallucination (Truthfulness)": 44.84, "P-Hellaswag": 81.7, "Law": 38.67, "AUT Multiple Choice": 54.7, "Parsi Literature": 34.75, "BoolQA": 89.7, "Reading Comprehension": 67.25, "PartExpert": 44.29, "MMLU Pro": 32.1, "Iranian Social Norms": 74.94, "Model sha": "2963cf90c3a69ca9b65db9224804f377e0e11141", "Hub License": "cc-by-nc-4.0"}
21
+ {"Model": "Qwen3-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": 67.6, "GSM8K": 37.9, "DC-Homograph": 51.85, "MC-Homograph": 89.63, "PiQA": 87.69, "Proverb-Quiz": 64.59, "VerbEval": 56.35, "Winogrande": 71.48, "Arc-Challenge": 91.13, "Arc-Easy": 94.22, "Feqh": 29.71, "Hallucination (Truthfulness)": 47.5, "P-Hellaswag": 83.47, "Law": 37.0, "AUT Multiple Choice": 48.3, "Parsi Literature": 39.12, "BoolQA": 91.1, "Reading Comprehension": 63.96, "PartExpert": 50.06, "MMLU Pro": 42.8, "Iranian Social Norms": 73.48, "Model sha": "9216db5781bf21249d130ec9da846c4624c16137", "Hub License": "apache-2.0"}
22
+ {"Model": "Qwen2.5-32B-Instruct", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": 61.73, "GSM8K": 50.1, "DC-Homograph": 67.59, "MC-Homograph": 91.47, "PiQA": 83.98, "Proverb-Quiz": 63.24, "VerbEval": 54.58, "Winogrande": 80.07, "Arc-Challenge": 85.15, "Arc-Easy": 91.87, "Feqh": 38.86, "Hallucination (Truthfulness)": 59.22, "P-Hellaswag": 82.07, "Law": 42.33, "AUT Multiple Choice": 50.4, "Parsi Literature": 40.41, "BoolQA": 93.4, "Reading Comprehension": 28.11, "PartExpert": 46.78, "MMLU Pro": 37.4, "Iranian Social Norms": 70.0, "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
23
+ {"Model": "QwQ-32B-Preview", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": 63.27, "GSM8K": 34.7, "DC-Homograph": 61.11, "MC-Homograph": 88.25, "PiQA": 81.28, "Proverb-Quiz": 58.11, "VerbEval": 51.97, "Winogrande": 75.64, "Arc-Challenge": 85.58, "Arc-Easy": 91.44, "Feqh": 41.14, "Hallucination (Truthfulness)": 38.84, "P-Hellaswag": 84.13, "Law": 43.0, "AUT Multiple Choice": 50.6, "Parsi Literature": 39.77, "BoolQA": 88.5, "Reading Comprehension": 65.38, "PartExpert": 47.39, "MMLU Pro": 37.3, "Iranian Social Norms": 72.26, "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
24
+ {"Model": "gemma-3-12b-it", "#Params (B)": 12.18, "Precision": "BF16", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": 68.37, "GSM8K": 20.2, "DC-Homograph": 67.59, "MC-Homograph": 91.24, "PiQA": 87.19, "Proverb-Quiz": 72.97, "VerbEval": 63.39, "Winogrande": 73.96, "Arc-Challenge": 83.33, "Arc-Easy": 93.26, "Feqh": 25.14, "Hallucination (Truthfulness)": 46.1, "P-Hellaswag": 83.17, "Law": 36.33, "AUT Multiple Choice": 49.0, "Parsi Literature": 40.03, "BoolQA": 87.6, "Reading Comprehension": 55.26, "PartExpert": 44.12, "MMLU Pro": 32.6, "Iranian Social Norms": 75.55, "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
25
  {"Model": "gemma-2-27b-it", "#Params (B)": 27.22, "Precision": "BF16", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": 68.11, "GSM8K": 26.7, "DC-Homograph": 60.19, "MC-Homograph": 91.24, "PiQA": 89.69, "Proverb-Quiz": 73.51, "VerbEval": 61.16, "Winogrande": 76.44, "Arc-Challenge": 86.75, "Arc-Easy": 94.22, "Feqh": 24.0, "Hallucination (Truthfulness)": 13.05, "P-Hellaswag": 83.69, "Law": 34.67, "AUT Multiple Choice": 50.8, "Parsi Literature": 35.91, "BoolQA": 89.8, "Reading Comprehension": 56.76, "PartExpert": 46.6, "MMLU Pro": 36.9, "Iranian Social Norms": 77.38, "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
 
26
  {"Model": "gemma-2-9b-it", "#Params (B)": 9.24, "Precision": "BF16", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": 64.03, "GSM8K": 17.4, "DC-Homograph": 59.26, "MC-Homograph": 90.55, "PiQA": 87.09, "Proverb-Quiz": 69.19, "VerbEval": 58.25, "Winogrande": 72.01, "Arc-Challenge": 84.29, "Arc-Easy": 93.16, "Feqh": 29.71, "Hallucination (Truthfulness)": 50.58, "P-Hellaswag": 80.82, "Law": 33.67, "AUT Multiple Choice": 48.5, "Parsi Literature": 38.1, "BoolQA": 89.7, "Reading Comprehension": 56.43, "PartExpert": 43.03, "MMLU Pro": 33.2, "Iranian Social Norms": 73.84, "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
27
+ {"Model": "QwQ-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": 60.71, "GSM8K": 29.3, "DC-Homograph": 58.33, "MC-Homograph": 88.25, "PiQA": 81.68, "Proverb-Quiz": 59.19, "VerbEval": 52.31, "Winogrande": 73.07, "Arc-Challenge": 84.94, "Arc-Easy": 90.8, "Feqh": 41.71, "Hallucination (Truthfulness)": 48.93, "P-Hellaswag": 82.22, "Law": 38.0, "AUT Multiple Choice": 49.3, "Parsi Literature": 37.71, "BoolQA": 88.5, "Reading Comprehension": 50.25, "PartExpert": 46.75, "MMLU Pro": 39.0, "Iranian Social Norms": 70.73, "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
28
+ {"Model": "c4ai-command-r-plus", "#Params (B)": 103.81, "Precision": "F16", "model_name_for_query": "CohereLabs/c4ai-command-r-plus", "GeneralKnowledge": 69.9, "GSM8K": 20.9, "DC-Homograph": 62.96, "MC-Homograph": 91.24, "PiQA": 85.49, "Proverb-Quiz": 80.27, "VerbEval": 55.68, "Winogrande": 69.97, "Arc-Challenge": 77.14, "Arc-Easy": 89.41, "Feqh": 33.14, "Hallucination (Truthfulness)": 27.45, "P-Hellaswag": 80.46, "Law": 40.0, "AUT Multiple Choice": 49.5, "Parsi Literature": 35.14, "BoolQA": 87.7, "Reading Comprehension": 67.08, "PartExpert": 42.59, "MMLU Pro": 27.8, "Iranian Social Norms": 70.37, "Model sha": "3de12c88b4c6c882389a6f424cc2074383aa17d8", "Hub License": "cc-by-nc-4.0"}
 
 
 
 
 
 
29
  {"Model": "gpt-4.1-nano-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 68.11, "GSM8K": 58.4, "DC-Homograph": 49.07, "MC-Homograph": 78.11, "PiQA": 84.58, "Proverb-Quiz": 67.84, "VerbEval": 66.21, "Winogrande": 60.32, "Arc-Challenge": 81.41, "Arc-Easy": 91.55, "Feqh": 32.0, "Hallucination (Truthfulness)": 51.24, "P-Hellaswag": 77.96, "Law": 32.67, "AUT Multiple Choice": 46.1, "Parsi Literature": 36.42, "BoolQA": 81.7, "Reading Comprehension": 50.66, "PartExpert": 42.49, "MMLU Pro": 29.9, "Iranian Social Norms": 74.76, "Model sha": "unknown", "Hub License": "unknown"}
30
+ {"Model": "c4ai-command-r-08-2024", "#Params (B)": 32.29, "Precision": "F16", "model_name_for_query": "CohereLabs/c4ai-command-r-08-2024", "GeneralKnowledge": 72.45, "GSM8K": 18.1, "DC-Homograph": 50.93, "MC-Homograph": 80.88, "PiQA": 86.09, "Proverb-Quiz": 77.57, "VerbEval": 56.26, "Winogrande": 67.58, "Arc-Challenge": 81.09, "Arc-Easy": 89.84, "Feqh": 33.14, "Hallucination (Truthfulness)": 49.16, "P-Hellaswag": 79.65, "Law": 36.33, "AUT Multiple Choice": 51.4, "Parsi Literature": 32.95, "BoolQA": 81.4, "Reading Comprehension": 66.03, "PartExpert": 43.04, "MMLU Pro": 30.1, "Iranian Social Norms": 71.59, "Model sha": "96b61ca90ba9a25548d3d4bf68e1938b13506852", "Hub License": "cc-by-nc-4.0"}
31
+ {"Model": "Qwen3-14B", "#Params (B)": 14.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": 56.38, "GSM8K": 31.1, "DC-Homograph": 55.56, "MC-Homograph": 87.56, "PiQA": 77.18, "Proverb-Quiz": 53.78, "VerbEval": 54.36, "Winogrande": 67.32, "Arc-Challenge": 84.29, "Arc-Easy": 91.02, "Feqh": 29.14, "Hallucination (Truthfulness)": 44.54, "P-Hellaswag": 80.97, "Law": 34.67, "AUT Multiple Choice": 44.8, "Parsi Literature": 35.39, "BoolQA": 87.6, "Reading Comprehension": 44.36, "PartExpert": 43.22, "MMLU Pro": 35.5, "Iranian Social Norms": 74.51, "Model sha": "40c069824f4251a91eefaf281ebe4c544efd3e18", "Hub License": "apache-2.0"}
32
+ {"Model": "Qwen2-57B-A14B-Instruct", "#Params (B)": 57.4, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-57B-A14B-Instruct", "GeneralKnowledge": 60.97, "GSM8K": 22.1, "DC-Homograph": 56.48, "MC-Homograph": 85.02, "PiQA": 76.88, "Proverb-Quiz": 58.11, "VerbEval": 52.31, "Winogrande": 65.63, "Arc-Challenge": 76.71, "Arc-Easy": 85.35, "Feqh": 28.0, "Hallucination (Truthfulness)": 46.75, "P-Hellaswag": 78.1, "Law": 30.67, "AUT Multiple Choice": 48.2, "Parsi Literature": 33.85, "BoolQA": 85.2, "Reading Comprehension": 57.74, "PartExpert": 40.2, "MMLU Pro": 27.0, "Iranian Social Norms": 71.89, "Model sha": "50896d66b39f1425d63720541a66c7df13e053c0", "Hub License": "apache-2.0"}
33
+ {"Model": "Qwen2.5-14B-Instruct", "#Params (B)": 14.77, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-14B-Instruct", "GeneralKnowledge": 60.46, "GSM8K": 38.8, "DC-Homograph": 59.26, "MC-Homograph": 86.87, "PiQA": 76.98, "Proverb-Quiz": 54.59, "VerbEval": 51.21, "Winogrande": 51.21, "Arc-Challenge": 82.26, "Arc-Easy": 87.91, "Feqh": 32.0, "Hallucination (Truthfulness)": 26.99, "P-Hellaswag": 78.99, "Law": 34.0, "AUT Multiple Choice": 49.7, "Parsi Literature": 33.85, "BoolQA": 91.1, "Reading Comprehension": 22.46, "PartExpert": 43.41, "MMLU Pro": 34.6, "Iranian Social Norms": 70.37, "Model sha": "cf98f3b3bbb457ad9e2bb7baf9a0125b6b88caa8", "Hub License": "apache-2.0"}
34
+ {"Model": "c4ai-command-r-v01", "#Params (B)": 34.98, "Precision": "F16", "model_name_for_query": "CohereLabs/c4ai-command-r-v01", "GeneralKnowledge": 66.07, "GSM8K": 8.4, "DC-Homograph": 50.0, "MC-Homograph": 83.18, "PiQA": 84.38, "Proverb-Quiz": 71.35, "VerbEval": 45.42, "Winogrande": 61.2, "Arc-Challenge": 75.11, "Arc-Easy": 86.31, "Feqh": 25.71, "Hallucination (Truthfulness)": 45.51, "P-Hellaswag": 79.28, "Law": 36.67, "AUT Multiple Choice": 46.8, "Parsi Literature": 32.18, "BoolQA": 82.1, "Reading Comprehension": 64.52, "PartExpert": 39.08, "MMLU Pro": 27.9, "Iranian Social Norms": 51.22, "Model sha": "760ddb6c203d87ebdbe3c9785b49570e1bf95585", "Hub License": "cc-by-nc-4.0"}
35
+ {"Model": "aya-23-35B", "#Params (B)": 34.98, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": 63.27, "GSM8K": 10.0, "DC-Homograph": 55.56, "MC-Homograph": 83.64, "PiQA": 89.49, "Proverb-Quiz": 67.03, "VerbEval": 47.32, "Winogrande": 65.81, "Arc-Challenge": 77.56, "Arc-Easy": 90.16, "Feqh": 30.29, "Hallucination (Truthfulness)": 11.72, "P-Hellaswag": 79.87, "Law": 32.0, "AUT Multiple Choice": 48.7, "Parsi Literature": 31.92, "BoolQA": 86.2, "Reading Comprehension": 62.82, "PartExpert": 37.44, "MMLU Pro": 24.1, "Iranian Social Norms": 65.0, "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
36
+ {"Model": "Qwen3-30B-A3B", "#Params (B)": 30.53, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": 65.05, "GSM8K": 28.8, "DC-Homograph": 57.41, "MC-Homograph": 86.41, "PiQA": 72.47, "Proverb-Quiz": 50.81, "VerbEval": 48.09, "Winogrande": 65.28, "Arc-Challenge": 87.39, "Arc-Easy": 93.58, "Feqh": 23.43, "Hallucination (Truthfulness)": 3.54, "P-Hellaswag": 83.1, "Law": 35.33, "AUT Multiple Choice": 48.0, "Parsi Literature": 36.55, "BoolQA": 86.2, "Reading Comprehension": 66.24, "PartExpert": 41.13, "MMLU Pro": 36.3, "Iranian Social Norms": 44.21, "Model sha": "ad44e777bcd18fa416d9da3bd8f70d33ebb85d39", "Hub License": "apache-2.0"}
37
+ {"Model": "Qwen3-8B", "#Params (B)": 8.19, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": 49.23, "GSM8K": 25.7, "DC-Homograph": 50.93, "MC-Homograph": 82.95, "PiQA": 75.98, "Proverb-Quiz": 51.89, "VerbEval": 47.93, "Winogrande": 61.91, "Arc-Challenge": 80.24, "Arc-Easy": 87.38, "Feqh": 28.0, "Hallucination (Truthfulness)": 38.46, "P-Hellaswag": 80.38, "Law": 29.67, "AUT Multiple Choice": 46.0, "Parsi Literature": 33.2, "BoolQA": 86.4, "Reading Comprehension": 66.38, "PartExpert": 38.31, "MMLU Pro": 31.1, "Iranian Social Norms": 63.41, "Model sha": "b968826d9c46dd6066d109eabc6255188de91218", "Hub License": "apache-2.0"}
38
+ {"Model": "gemma-3n-E4B-it", "#Params (B)": 7.84, "Precision": "BF16", "model_name_for_query": "google/gemma-3n-E4B-it", "GeneralKnowledge": 59.44, "GSM8K": 10.0, "DC-Homograph": 52.78, "MC-Homograph": 78.57, "PiQA": 82.28, "Proverb-Quiz": 65.95, "VerbEval": 50.87, "Winogrande": 65.46, "Arc-Challenge": 75.85, "Arc-Easy": 85.45, "Feqh": 28.57, "Hallucination (Truthfulness)": 44.93, "P-Hellaswag": 77.96, "Law": 33.0, "AUT Multiple Choice": 42.6, "Parsi Literature": 31.79, "BoolQA": 83.6, "Reading Comprehension": 46.92, "PartExpert": 38.8, "MMLU Pro": 27.3, "Iranian Social Norms": 72.26, "Model sha": "c1221e9c62e34a43ab7ffacd1be0ea71f126ef10", "Hub License": "gemma"}
39
+ {"Model": "aya-expanse-8b", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": 58.67, "GSM8K": 9.8, "DC-Homograph": 51.85, "MC-Homograph": 80.65, "PiQA": 80.18, "Proverb-Quiz": 60.0, "VerbEval": 48.06, "Winogrande": 64.04, "Arc-Challenge": 71.47, "Arc-Easy": 84.6, "Feqh": 29.71, "Hallucination (Truthfulness)": 23.52, "P-Hellaswag": 76.49, "Law": 32.33, "AUT Multiple Choice": 45.8, "Parsi Literature": 34.49, "BoolQA": 82.3, "Reading Comprehension": 61.98, "PartExpert": 35.56, "MMLU Pro": 21.9, "Iranian Social Norms": 71.71, "Model sha": "574bdb00b4dbbacae3d9666906045bafe5a5b44f", "Hub License": "cc-by-nc-4.0"}
40
  {"Model": "Hormoz-8B", "#Params (B)": 8.02, "Precision": "F32", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": 58.42, "GSM8K": 10.0, "DC-Homograph": 50.93, "MC-Homograph": 80.65, "PiQA": 80.68, "Proverb-Quiz": 60.27, "VerbEval": 47.29, "Winogrande": 64.39, "Arc-Challenge": 70.41, "Arc-Easy": 84.28, "Feqh": 28.57, "Hallucination (Truthfulness)": 23.66, "P-Hellaswag": 76.05, "Law": 30.33, "AUT Multiple Choice": 46.7, "Parsi Literature": 33.08, "BoolQA": 79.8, "Reading Comprehension": 61.11, "PartExpert": 35.68, "MMLU Pro": 21.5, "Iranian Social Norms": 70.3, "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
41
+ {"Model": "Qwen2.5-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": 51.02, "GSM8K": 18.0, "DC-Homograph": 52.78, "MC-Homograph": 79.26, "PiQA": 71.07, "Proverb-Quiz": 47.84, "VerbEval": 44.44, "Winogrande": 61.91, "Arc-Challenge": 72.33, "Arc-Easy": 81.5, "Feqh": 36.57, "Hallucination (Truthfulness)": 34.89, "P-Hellaswag": 74.8, "Law": 32.33, "AUT Multiple Choice": 42.6, "Parsi Literature": 31.27, "BoolQA": 82.5, "Reading Comprehension": 58.43, "PartExpert": 37.24, "MMLU Pro": 26.7, "Iranian Social Norms": 64.51, "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
42
+ {"Model": "gemma-3n-E2B-it", "#Params (B)": 5.43, "Precision": "BF16", "model_name_for_query": "google/gemma-3n-E2B-it", "GeneralKnowledge": 51.79, "GSM8K": 6.9, "DC-Homograph": 49.07, "MC-Homograph": 75.35, "PiQA": 78.78, "Proverb-Quiz": 59.73, "VerbEval": 47.69, "Winogrande": 62.36, "Arc-Challenge": 70.83, "Arc-Easy": 84.17, "Feqh": 21.14, "Hallucination (Truthfulness)": 45.6, "P-Hellaswag": 77.08, "Law": 32.33, "AUT Multiple Choice": 42.0, "Parsi Literature": 31.53, "BoolQA": 83.6, "Reading Comprehension": 41.88, "PartExpert": 36.04, "MMLU Pro": 23.3, "Iranian Social Norms": 71.1, "Model sha": "5e092ebca197cdcd8d8b195040accf22693501bc", "Hub License": "gemma"}
43
+ {"Model": "gpt-3.5-turbo-0125\r\n", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 55.1, "GSM8K": 18.9, "DC-Homograph": 50.93, "MC-Homograph": 73.04, "PiQA": 70.87, "Proverb-Quiz": 48.65, "VerbEval": 47.75, "Winogrande": 55.27, "Arc-Challenge": 60.04, "Arc-Easy": 76.68, "Feqh": 27.43, "Hallucination (Truthfulness)": 39.93, "P-Hellaswag": 66.94, "Law": 30.0, "AUT Multiple Choice": 40.9, "Parsi Literature": 32.3, "BoolQA": 77.3, "Reading Comprehension": 63.17, "PartExpert": 35.28, "MMLU Pro": 23.4, "Iranian Social Norms": 70.37, "Model sha": "unknown", "Hub License": "unknown"}
44
+ {"Model": "Qwen3-4B", "#Params (B)": 4.02, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": 43.88, "GSM8K": 20.1, "DC-Homograph": 38.89, "MC-Homograph": 76.27, "PiQA": 66.07, "Proverb-Quiz": 45.41, "VerbEval": 41.23, "Winogrande": 54.56, "Arc-Challenge": 73.61, "Arc-Easy": 83.42, "Feqh": 30.29, "Hallucination (Truthfulness)": 25.29, "P-Hellaswag": 78.03, "Law": 30.33, "AUT Multiple Choice": 40.6, "Parsi Literature": 31.79, "BoolQA": 81.9, "Reading Comprehension": 63.43, "PartExpert": 37.28, "MMLU Pro": 28.9, "Iranian Social Norms": 68.72, "Model sha": "1cfa9a7208912126459214e8b04321603b3df60c", "Hub License": "apache-2.0"}
45
+ {"Model": "Meta-Llama-3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 10.4, "DC-Homograph": 41.67, "MC-Homograph": 81.11, "PiQA": 70.97, "Proverb-Quiz": 42.97, "VerbEval": 38.93, "Winogrande": 56.95, "Arc-Challenge": 66.77, "Arc-Easy": 76.47, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.23, "P-Hellaswag": 76.71, "Law": 32.0, "AUT Multiple Choice": 45.0, "Parsi Literature": 29.99, "BoolQA": 82.5, "Reading Comprehension": 54.79, "PartExpert": 36.3, "MMLU Pro": 26.0, "Iranian Social Norms": 70.06, "Model sha": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2", "Hub License": "llama3"}
46
+ {"Model": "Hermes-3-Llama-3.1-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": 49.49, "GSM8K": 10.2, "DC-Homograph": 44.44, "MC-Homograph": 79.72, "PiQA": 70.37, "Proverb-Quiz": 47.84, "VerbEval": 48.94, "Winogrande": 55.18, "Arc-Challenge": 65.28, "Arc-Easy": 78.07, "Feqh": 30.29, "Hallucination (Truthfulness)": 45.2, "P-Hellaswag": 73.99, "Law": 31.67, "AUT Multiple Choice": 42.1, "Parsi Literature": 30.63, "BoolQA": 83.5, "Reading Comprehension": 56.4, "PartExpert": 35.61, "MMLU Pro": 24.1, "Iranian Social Norms": 54.88, "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
47
+ {"Model": "c4ai-command-r7b-12-2024", "#Params (B)": 8.02, "Precision": "BF16", "model_name_for_query": "CohereLabs/c4ai-command-r7b-12-2024", "GeneralKnowledge": 53.06, "GSM8K": 9.6, "DC-Homograph": 47.22, "MC-Homograph": 71.43, "PiQA": 75.98, "Proverb-Quiz": 56.22, "VerbEval": 42.52, "Winogrande": 58.99, "Arc-Challenge": 68.7, "Arc-Easy": 81.28, "Feqh": 26.29, "Hallucination (Truthfulness)": 42.34, "P-Hellaswag": 72.3, "Law": 31.33, "AUT Multiple Choice": 44.6, "Parsi Literature": 32.3, "BoolQA": 67.3, "Reading Comprehension": 61.38, "PartExpert": 35.53, "MMLU Pro": 23.1, "Iranian Social Norms": 56.28, "Model sha": "c3e86d9049f42adc1e1ee729286bca126e71f30e", "Hub License": "cc-by-nc-4.0"}
48
  {"Model": "Llama-3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": 52.55, "GSM8K": 12.0, "DC-Homograph": 43.52, "MC-Homograph": 79.03, "PiQA": 70.07, "Proverb-Quiz": 47.57, "VerbEval": 42.91, "Winogrande": 54.21, "Arc-Challenge": 68.91, "Arc-Easy": 80.11, "Feqh": 29.71, "Hallucination (Truthfulness)": 6.76, "P-Hellaswag": 79.79, "Law": 32.67, "AUT Multiple Choice": 44.9, "Parsi Literature": 32.3, "BoolQA": 82.7, "Reading Comprehension": 62.45, "PartExpert": 37.62, "MMLU Pro": 25.7, "Iranian Social Norms": 70.98, "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
49
+ {"Model": "Qwen2-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 14.5, "DC-Homograph": 54.63, "MC-Homograph": 72.81, "PiQA": 70.97, "Proverb-Quiz": 50.54, "VerbEval": 40.62, "Winogrande": 60.94, "Arc-Challenge": 69.12, "Arc-Easy": 80.75, "Feqh": 28.0, "Hallucination (Truthfulness)": 25.93, "P-Hellaswag": 76.71, "Law": 28.33, "AUT Multiple Choice": 40.4, "Parsi Literature": 31.4, "BoolQA": 79.0, "Reading Comprehension": 50.14, "PartExpert": 36.31, "MMLU Pro": 23.8, "Iranian Social Norms": 62.2, "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
50
+ {"Model": "gemma-3-4b-it", "#Params (B)": 4.3, "Precision": "BF16", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": 45.92, "GSM8K": 9.6, "DC-Homograph": 42.59, "MC-Homograph": 72.58, "PiQA": 72.77, "Proverb-Quiz": 53.78, "VerbEval": 45.3, "Winogrande": 55.09, "Arc-Challenge": 63.46, "Arc-Easy": 79.57, "Feqh": 21.14, "Hallucination (Truthfulness)": 46.04, "P-Hellaswag": 73.84, "Law": 27.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 30.24, "BoolQA": 78.6, "Reading Comprehension": 47.28, "PartExpert": 34.7, "MMLU Pro": 22.8, "Iranian Social Norms": 65.55, "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
51
+ {"Model": "Dorna2-Llama3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": 48.72, "GSM8K": 11.9, "DC-Homograph": 44.44, "MC-Homograph": 72.81, "PiQA": 69.97, "Proverb-Quiz": 42.97, "VerbEval": 42.06, "Winogrande": 54.47, "Arc-Challenge": 67.63, "Arc-Easy": 78.72, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.91, "P-Hellaswag": 78.91, "Law": 29.67, "AUT Multiple Choice": 41.0, "Parsi Literature": 27.28, "BoolQA": 81.8, "Reading Comprehension": 56.84, "PartExpert": 35.65, "MMLU Pro": 22.7, "Iranian Social Norms": 49.82, "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
52
+ {"Model": "Llama-3.1-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.1-8B", "GeneralKnowledge": 49.23, "GSM8K": 10.8, "DC-Homograph": 46.3, "MC-Homograph": 72.12, "PiQA": 66.47, "Proverb-Quiz": 35.95, "VerbEval": 39.91, "Winogrande": 54.92, "Arc-Challenge": 63.35, "Arc-Easy": 75.08, "Feqh": 30.86, "Hallucination (Truthfulness)": 45.02, "P-Hellaswag": 76.34, "Law": 33.0, "AUT Multiple Choice": 42.6, "Parsi Literature": 27.41, "BoolQA": 71.6, "Reading Comprehension": 58.39, "PartExpert": 35.92, "MMLU Pro": 22.2, "Iranian Social Norms": 61.83, "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "Hub License": "llama3.1"}
53
+ {"Model": "aya-23-8B", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": 52.3, "GSM8K": 6.1, "DC-Homograph": 52.78, "MC-Homograph": 76.27, "PiQA": 80.78, "Proverb-Quiz": 44.32, "VerbEval": 39.3, "Winogrande": 57.13, "Arc-Challenge": 63.68, "Arc-Easy": 81.39, "Feqh": 29.14, "Hallucination (Truthfulness)": 0.6, "P-Hellaswag": 75.83, "Law": 28.33, "AUT Multiple Choice": 42.9, "Parsi Literature": 31.27, "BoolQA": 72.3, "Reading Comprehension": 60.31, "PartExpert": 33.33, "MMLU Pro": 19.9, "Iranian Social Norms": 70.73, "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
54
+ {"Model": "Meta-Llama-3-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": 47.7, "GSM8K": 10.3, "DC-Homograph": 41.67, "MC-Homograph": 74.42, "PiQA": 64.16, "Proverb-Quiz": 37.3, "VerbEval": 39.46, "Winogrande": 55.36, "Arc-Challenge": 62.07, "Arc-Easy": 75.83, "Feqh": 27.43, "Hallucination (Truthfulness)": 37.0, "P-Hellaswag": 76.49, "Law": 35.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 28.19, "BoolQA": 75.2, "Reading Comprehension": 57.69, "PartExpert": 35.1, "MMLU Pro": 22.8, "Iranian Social Norms": 54.02, "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
55
+ {"Model": "Dorna-Llama3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 64.85, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
56
+ {"Model": "Mistral-Nemo-Instruct-2407", "#Params (B)": 12.24, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-Nemo-Instruct-2407", "GeneralKnowledge": 41.07, "GSM8K": 12.8, "DC-Homograph": 51.85, "MC-Homograph": 80.88, "PiQA": 71.57, "Proverb-Quiz": 40.0, "VerbEval": 37.13, "Winogrande": 37.13, "Arc-Challenge": 65.92, "Arc-Easy": 79.89, "Feqh": 29.71, "Hallucination (Truthfulness)": 3.15, "P-Hellaswag": 76.93, "Law": 30.67, "AUT Multiple Choice": 38.7, "Parsi Literature": 30.89, "BoolQA": 69.0, "Reading Comprehension": 62.32, "PartExpert": 33.46, "MMLU Pro": 20.4, "Iranian Social Norms": 67.13, "Model sha": "04d8a90549d23fc6bd7f642064003592df51e9b3", "Hub License": "apache-2.0"}
57
+ {"Model": "Mixtral-8x7B-Instruct-v0.1", "#Params (B)": 46.7, "Precision": "BF16", "model_name_for_query": "mistralai/Mixtral-8x7B-Instruct-v0.1", "GeneralKnowledge": 35.97, "GSM8K": 11.4, "DC-Homograph": 50.0, "MC-Homograph": 68.43, "PiQA": 66.97, "Proverb-Quiz": 43.24, "VerbEval": 38.75, "Winogrande": 55.89, "Arc-Challenge": 55.88, "Arc-Easy": 65.45, "Feqh": 26.29, "Hallucination (Truthfulness)": 36.56, "P-Hellaswag": 74.06, "Law": 28.0, "AUT Multiple Choice": 33.2, "Parsi Literature": 27.16, "BoolQA": 76.5, "Reading Comprehension": 59.79, "PartExpert": 32.46, "MMLU Pro": 23.2, "Iranian Social Norms": 59.27, "Model sha": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61", "Hub License": "apache-2.0"}
58
+ {"Model": "gpt-oss-20b", "#Params (B)": 1.8, "Precision": "BF16", "model_name_for_query": "openai/gpt-oss-20b", "GeneralKnowledge": 54.85, "GSM8K": 9.0, "DC-Homograph": 61.11, "MC-Homograph": 61.29, "PiQA": 68.17, "Proverb-Quiz": 34.86, "VerbEval": 52.56, "Winogrande": 55.27, "Arc-Challenge": 70.19, "Arc-Easy": 83.32, "Feqh": 28.57, "Hallucination (Truthfulness)": 45.6, "P-Hellaswag": 46.22, "Law": 28.67, "AUT Multiple Choice": 42.0, "Parsi Literature": 30.24, "BoolQA": 64.0, "Reading Comprehension": 12.62, "PartExpert": 37.06, "MMLU Pro": 19.9, "Iranian Social Norms": 51.04, "Model sha": "d666cf3b67006cf8227666739edf25164aaffdeb", "Hub License": "apache-2.0"}
59
+ {"Model": "gemma-2-2b-it", "#Params (B)": 2.61, "Precision": "BF16", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 41.79, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
60
+ {"Model": "Mistral-Small-Instruct-2409", "#Params (B)": 22.24, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-Small-Instruct-2409", "GeneralKnowledge": 34.95, "GSM8K": 12.2, "DC-Homograph": 49.07, "MC-Homograph": 78.11, "PiQA": 61.36, "Proverb-Quiz": 37.3, "VerbEval": 31.8, "Winogrande": 31.8, "Arc-Challenge": 55.24, "Arc-Easy": 61.6, "Feqh": 20.57, "Hallucination (Truthfulness)": 28.1, "P-Hellaswag": 75.46, "Law": 28.67, "AUT Multiple Choice": 34.6, "Parsi Literature": 28.7, "BoolQA": 84.8, "Reading Comprehension": 64.56, "PartExpert": 31.52, "MMLU Pro": 21.3, "Iranian Social Norms": 53.29, "Model sha": "4600506f6b13c7ef89e61a54263f4c9bf483de30", "Hub License": "other"}
61
+ {"Model": "Qwen2.5-3B-Instruct", "#Params (B)": 3.08, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-3B-Instruct", "GeneralKnowledge": 40.56, "GSM8K": 34.1, "DC-Homograph": 54.63, "MC-Homograph": 70.97, "PiQA": 62.16, "Proverb-Quiz": 41.08, "VerbEval": 38.66, "Winogrande": 38.66, "Arc-Challenge": 51.5, "Arc-Easy": 67.27, "Feqh": 24.57, "Hallucination (Truthfulness)": 33.09, "P-Hellaswag": 66.5, "Law": 30.33, "AUT Multiple Choice": 38.9, "Parsi Literature": 29.73, "BoolQA": 64.9, "Reading Comprehension": 40.41, "PartExpert": 33.35, "MMLU Pro": 21.1, "Iranian Social Norms": 30.24, "Model sha": "aa8e72537993ba99e69dfaafa59ed015b17504d1", "Hub License": "other"}
62
+ {"Model": "Qwen3-1.7B", "#Params (B)": 2.03, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-1.7B", "GeneralKnowledge": 32.91, "GSM8K": 7.9, "DC-Homograph": 48.15, "MC-Homograph": 63.13, "PiQA": 58.66, "Proverb-Quiz": 38.11, "VerbEval": 33.67, "Winogrande": 33.67, "Arc-Challenge": 52.78, "Arc-Easy": 64.17, "Feqh": 26.29, "Hallucination (Truthfulness)": 32.04, "P-Hellaswag": 69.14, "Law": 31.0, "AUT Multiple Choice": 32.6, "Parsi Literature": 29.99, "BoolQA": 72.1, "Reading Comprehension": 52.6, "PartExpert": 31.85, "MMLU Pro": 20.5, "Iranian Social Norms": 36.16, "Model sha": "70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", "Hub License": "apache-2.0"}
63
+ {"Model": "Mistral-7B-Instruct-v0.3", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-7B-Instruct-v0.3", "GeneralKnowledge": 30.61, "GSM8K": 5.6, "DC-Homograph": 47.22, "MC-Homograph": 64.52, "PiQA": 59.96, "Proverb-Quiz": 38.38, "VerbEval": 32.57, "Winogrande": 50.93, "Arc-Challenge": 40.49, "Arc-Easy": 48.88, "Feqh": 33.71, "Hallucination (Truthfulness)": 28.8, "P-Hellaswag": 68.99, "Law": 24.67, "AUT Multiple Choice": 30.5, "Parsi Literature": 27.41, "BoolQA": 68.7, "Reading Comprehension": 55.47, "PartExpert": 29.19, "MMLU Pro": 17.5, "Iranian Social Norms": 56.83, "Model sha": "0d4b76e1efeb5eb6f6b5e757c79870472e04bd3a", "Hub License": "apache-2.0"}
64
+ {"Model": "Mistral-7B-Instruct-v0.2", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-7B-Instruct-v0.2", "GeneralKnowledge": 32.65, "GSM8K": 4.5, "DC-Homograph": 45.37, "MC-Homograph": 58.29, "PiQA": 56.46, "Proverb-Quiz": 35.41, "VerbEval": 32.05, "Winogrande": 50.04, "Arc-Challenge": 37.07, "Arc-Easy": 45.03, "Feqh": 24.57, "Hallucination (Truthfulness)": 54.96, "P-Hellaswag": 63.56, "Law": 27.33, "AUT Multiple Choice": 27.1, "Parsi Literature": 24.32, "BoolQA": 64.0, "Reading Comprehension": 48.19, "PartExpert": 28.26, "MMLU Pro": 16.4, "Iranian Social Norms": 51.04, "Model sha": "63a8b081895390a26e140280378bc85ec8bce07a", "Hub License": "apache-2.0"}
65
+ {"Model": "gemma-3-1b-it", "#Params (B)": 0.99, "Precision": "BF16", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 31.98, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
66
+ {"Model": "gpt-oss-120b", "#Params (B)": 1.8, "Precision": "BF16", "model_name_for_query": "openai/gpt-oss-20b", "GeneralKnowledge": 51.02, "GSM8K": 9.2, "DC-Homograph": 40.74, "MC-Homograph": 29.03, "PiQA": 55.36, "Proverb-Quiz": 24.05, "VerbEval": 55.19, "Winogrande": 58.9, "Arc-Challenge": 63.25, "Arc-Easy": 80.11, "Feqh": 34.86, "Hallucination (Truthfulness)": 25.62, "P-Hellaswag": 27.92, "Law": 25.0, "AUT Multiple Choice": 31.7, "Parsi Literature": 30.63, "BoolQA": 46.5, "Reading Comprehension": 5.32, "PartExpert": 31.82, "MMLU Pro": 18.6, "Iranian Social Norms": 38.23, "Model sha": "d666cf3b67006cf8227666739edf25164aaffdeb", "Hub License": "apache-2.0"}
67
+ {"Model": "PersianMind-v1.0", "#Params (B)": 0.0, "Precision": "F32", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension": 0.0, "PartExpert": 29.75, "MMLU Pro": 14.5, "Iranian Social Norms": 48.41, "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
68
+ {"Model": "Qwen2-1.5B-Instruct", "#Params (B)": 1.54, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-1.5B-Instruct", "GeneralKnowledge": 28.06, "GSM8K": 3.8, "DC-Homograph": 50.93, "MC-Homograph": 56.45, "PiQA": 56.16, "Proverb-Quiz": 36.49, "VerbEval": 32.57, "Winogrande": 32.57, "Arc-Challenge": 37.18, "Arc-Easy": 48.98, "Feqh": 28.57, "Hallucination (Truthfulness)": 30.69, "P-Hellaswag": 64.07, "Law": 28.67, "AUT Multiple Choice": 33.1, "Parsi Literature": 26.77, "BoolQA": 63.8, "Reading Comprehension": 38.91, "PartExpert": 29.96, "MMLU Pro": 15.1, "Iranian Social Norms": 29.39, "Model sha": "ba1cf1846d7df0a0591d6c00649f57e798519da8", "Hub License": "apache-2.0"}
69
+ {"Model": "Qwen2.5-1.5B-Instruct", "#Params (B)": 1.54, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-1.5B-Instruct", "GeneralKnowledge": 28.32, "GSM8K": 5.7, "DC-Homograph": 47.22, "MC-Homograph": 61.29, "PiQA": 56.16, "Proverb-Quiz": 35.14, "VerbEval": 31.99, "Winogrande": 31.99, "Arc-Challenge": 42.63, "Arc-Easy": 48.02, "Feqh": 21.71, "Hallucination (Truthfulness)": 15.7, "P-Hellaswag": 70.1, "Law": 24.67, "AUT Multiple Choice": 34.2, "Parsi Literature": 27.67, "BoolQA": 70.0, "Reading Comprehension": 42.47, "PartExpert": 30.07, "MMLU Pro": 18.2, "Iranian Social Norms": 28.78, "Model sha": "989aa7980e4cf806f80c7fef2b1adb7bc71aa306", "Hub License": "apache-2.0"}
70
+ {"Model": "Mistral-7B-Instruct-v0.1", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "mistralai/Mistral-7B-Instruct-v0.1", "GeneralKnowledge": 27.3, "GSM8K": 4.5, "DC-Homograph": 53.7, "MC-Homograph": 40.78, "PiQA": 55.16, "Proverb-Quiz": 33.24, "VerbEval": 26.6, "Winogrande": 51.46, "Arc-Challenge": 29.7, "Arc-Easy": 33.48, "Feqh": 30.86, "Hallucination (Truthfulness)": 47.41, "P-Hellaswag": 65.61, "Law": 23.67, "AUT Multiple Choice": 28.6, "Parsi Literature": 24.45, "BoolQA": 63.6, "Reading Comprehension": 34.45, "PartExpert": 27.17, "MMLU Pro": 13.8, "Iranian Social Norms": 50.73, "Model sha": "ec5deb64f2c6e6fa90c1abf74a91d5c93a9669ca", "Hub License": "apache-2.0"}
71
+ {"Model": "Qwen3-0.6B", "#Params (B)": 0.75, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-0.6B", "GeneralKnowledge": 24.49, "GSM8K": 4.4, "DC-Homograph": 53.7, "MC-Homograph": 46.77, "PiQA": 52.85, "Proverb-Quiz": 34.59, "VerbEval": 23.14, "Winogrande": 23.14, "Arc-Challenge": 31.73, "Arc-Easy": 37.22, "Feqh": 29.71, "Hallucination (Truthfulness)": 54.16, "P-Hellaswag": 62.23, "Law": 26.0, "AUT Multiple Choice": 27.3, "Parsi Literature": 28.96, "BoolQA": 67.6, "Reading Comprehension": 38.1, "PartExpert": 27.6, "MMLU Pro": 17.1, "Iranian Social Norms": 50.55, "Model sha": "c1899de289a04d12100db370d81485cdf75e47ca", "Hub License": "apache-2.0"}
72
  {"Model": "Llama-3.2-1B-Instruct", "#Params (B)": 1.23, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 38.0, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
73
  {"Model": "Maral-7B-alpha-1", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 42.04, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
74
+ {"Model": "Qwen2.5-0.5B-Instruct", "#Params (B)": 0.49, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-0.5B-Instruct", "GeneralKnowledge": 28.32, "GSM8K": 2.5, "DC-Homograph": 53.7, "MC-Homograph": 43.09, "PiQA": 53.35, "Proverb-Quiz": 34.32, "VerbEval": 28.41, "Winogrande": 28.41, "Arc-Challenge": 28.42, "Arc-Easy": 35.08, "Feqh": 26.86, "Hallucination (Truthfulness)": 63.75, "P-Hellaswag": 51.73, "Law": 24.67, "AUT Multiple Choice": 26.9, "Parsi Literature": 27.28, "BoolQA": 46.4, "Reading Comprehension": 25.22, "PartExpert": 27.01, "MMLU Pro": 14.2, "Iranian Social Norms": 30.3, "Model sha": "7ae557604adf67be50417f59c2c2f167def9a775", "Hub License": "apache-2.0"}
75
+ {"Model": "Llama-2-70b-chat-hf", "#Params (B)": 68.97, "Precision": "F16", "model_name_for_query": "meta-llama/Llama-2-70b-chat-hf", "GeneralKnowledge": 23.98, "GSM8K": 4.9, "DC-Homograph": 44.44, "MC-Homograph": 53.46, "PiQA": 54.75, "Proverb-Quiz": 28.38, "VerbEval": 24.03, "Winogrande": 49.16, "Arc-Challenge": 31.94, "Arc-Easy": 33.26, "Feqh": 30.29, "Hallucination (Truthfulness)": 2.18, "P-Hellaswag": 63.7, "Law": 27.67, "AUT Multiple Choice": 27.2, "Parsi Literature": 23.29, "BoolQA": 67.6, "Reading Comprehension": 0.0, "PartExpert": 26.58, "MMLU Pro": 17.1, "Iranian Social Norms": 51.65, "Model sha": "e9149a12809580e8602995856f8098ce973d1080", "Hub License": "llama2"}
76
+ {"Model": "Llama-2-7b-chat-hf", "#Params (B)": 6.73, "Precision": "F16", "model_name_for_query": "meta-llama/Llama-2-7b-chat-hf", "GeneralKnowledge": 26.53, "GSM8K": 3.3, "DC-Homograph": 46.3, "MC-Homograph": 37.56, "PiQA": 49.25, "Proverb-Quiz": 33.51, "VerbEval": 26.11, "Winogrande": 48.8, "Arc-Challenge": 27.99, "Arc-Easy": 29.52, "Feqh": 25.71, "Hallucination (Truthfulness)": 20.23, "P-Hellaswag": 57.75, "Law": 26.0, "AUT Multiple Choice": 26.4, "Parsi Literature": 25.35, "BoolQA": 64.1, "Reading Comprehension": 0.0, "PartExpert": 26.2, "MMLU Pro": 15.2, "Iranian Social Norms": 50.61, "Model sha": "f5db02db724555f92da89c216ac04704f23d4590", "Hub License": "llama2"}
77
+ {"Model": "Qwen2-0.5B-Instruct", "#Params (B)": 0.49, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-0.5B-Instruct", "GeneralKnowledge": 27.3, "GSM8K": 3.5, "DC-Homograph": 42.59, "MC-Homograph": 41.01, "PiQA": 54.75, "Proverb-Quiz": 32.43, "VerbEval": 27.15, "Winogrande": 27.15, "Arc-Challenge": 28.63, "Arc-Easy": 31.76, "Feqh": 25.14, "Hallucination (Truthfulness)": 22.78, "P-Hellaswag": 50.48, "Law": 27.33, "AUT Multiple Choice": 28.1, "Parsi Literature": 26.38, "BoolQA": 37.1, "Reading Comprehension": 22.28, "PartExpert": 27.35, "MMLU Pro": 13.6, "Iranian Social Norms": 46.4, "Model sha": "c540970f9e29518b1d8f06ab8b24cba66ad77b6d", "Hub License": "apache-2.0"}
78
+ {"Model": "Llama-2-13b-chat-hf", "#Params (B)": 0.0, "Precision": "F32", "model_name_for_query": "meta-llama/Llama-2-13b-chat-hf", "GeneralKnowledge": 28.57, "GSM8K": 3.3, "DC-Homograph": 44.44, "MC-Homograph": 37.33, "PiQA": 50.15, "Proverb-Quiz": 30.0, "VerbEval": 26.17, "Winogrande": 49.16, "Arc-Challenge": 28.42, "Arc-Easy": 29.52, "Feqh": 22.86, "Hallucination (Truthfulness)": 30.26, "P-Hellaswag": 36.96, "Law": 19.67, "AUT Multiple Choice": 25.4, "Parsi Literature": 26.13, "BoolQA": 63.9, "Reading Comprehension": 0.0, "PartExpert": 25.94, "MMLU Pro": 13.8, "Iranian Social Norms": 25.49, "Model sha": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "Hub License": "llama2"}