Files changed (1) hide show
  1. README.md +121 -7
README.md CHANGED
@@ -8,15 +8,115 @@ model-index:
8
  - name: NinjaDolphin-7B
9
  results:
10
  - task:
11
- type: text-generation # Required. Example: automatic-speech-recognition
12
  dataset:
13
- type: openai_humaneval # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
14
- name: HumanEval # Required. A pretty name for the dataset. Example: Common Voice (French)
15
  metrics:
16
- - type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
17
- value: 52.4390243902439 # Required. Example: 20.90
18
- name: pass@1 # Optional. Example: Test WER
19
- verified: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ---
21
 
22
  # NinjaDolphin-7B
@@ -93,3 +193,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
93
  |Winogrande (5-shot) |80.27|
94
  |GSM8k (5-shot) |67.85|
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  - name: NinjaDolphin-7B
9
  results:
10
  - task:
11
+ type: text-generation
12
  dataset:
13
+ name: HumanEval
14
+ type: openai_humaneval
15
  metrics:
16
+ - type: pass@1
17
+ value: 52.4390243902439
18
+ name: pass@1
19
+ verified: false
20
+ - task:
21
+ type: text-generation
22
+ name: Text Generation
23
+ dataset:
24
+ name: AI2 Reasoning Challenge (25-Shot)
25
+ type: ai2_arc
26
+ config: ARC-Challenge
27
+ split: test
28
+ args:
29
+ num_few_shot: 25
30
+ metrics:
31
+ - type: acc_norm
32
+ value: 65.61
33
+ name: normalized accuracy
34
+ source:
35
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=FelixChao/NinjaDolphin-7B
36
+ name: Open LLM Leaderboard
37
+ - task:
38
+ type: text-generation
39
+ name: Text Generation
40
+ dataset:
41
+ name: HellaSwag (10-Shot)
42
+ type: hellaswag
43
+ split: validation
44
+ args:
45
+ num_few_shot: 10
46
+ metrics:
47
+ - type: acc_norm
48
+ value: 85.35
49
+ name: normalized accuracy
50
+ source:
51
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=FelixChao/NinjaDolphin-7B
52
+ name: Open LLM Leaderboard
53
+ - task:
54
+ type: text-generation
55
+ name: Text Generation
56
+ dataset:
57
+ name: MMLU (5-Shot)
58
+ type: cais/mmlu
59
+ config: all
60
+ split: test
61
+ args:
62
+ num_few_shot: 5
63
+ metrics:
64
+ - type: acc
65
+ value: 64.43
66
+ name: accuracy
67
+ source:
68
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=FelixChao/NinjaDolphin-7B
69
+ name: Open LLM Leaderboard
70
+ - task:
71
+ type: text-generation
72
+ name: Text Generation
73
+ dataset:
74
+ name: TruthfulQA (0-shot)
75
+ type: truthful_qa
76
+ config: multiple_choice
77
+ split: validation
78
+ args:
79
+ num_few_shot: 0
80
+ metrics:
81
+ - type: mc2
82
+ value: 54.94
83
+ source:
84
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=FelixChao/NinjaDolphin-7B
85
+ name: Open LLM Leaderboard
86
+ - task:
87
+ type: text-generation
88
+ name: Text Generation
89
+ dataset:
90
+ name: Winogrande (5-shot)
91
+ type: winogrande
92
+ config: winogrande_xl
93
+ split: validation
94
+ args:
95
+ num_few_shot: 5
96
+ metrics:
97
+ - type: acc
98
+ value: 80.27
99
+ name: accuracy
100
+ source:
101
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=FelixChao/NinjaDolphin-7B
102
+ name: Open LLM Leaderboard
103
+ - task:
104
+ type: text-generation
105
+ name: Text Generation
106
+ dataset:
107
+ name: GSM8k (5-shot)
108
+ type: gsm8k
109
+ config: main
110
+ split: test
111
+ args:
112
+ num_few_shot: 5
113
+ metrics:
114
+ - type: acc
115
+ value: 67.85
116
+ name: accuracy
117
+ source:
118
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=FelixChao/NinjaDolphin-7B
119
+ name: Open LLM Leaderboard
120
  ---
121
 
122
  # NinjaDolphin-7B
 
193
  |Winogrande (5-shot) |80.27|
194
  |GSM8k (5-shot) |67.85|
195
 
196
+
197
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
198
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_FelixChao__NinjaDolphin-7B)
199
+
200
+ | Metric |Value|
201
+ |---------------------------------|----:|
202
+ |Avg. |69.74|
203
+ |AI2 Reasoning Challenge (25-Shot)|65.61|
204
+ |HellaSwag (10-Shot) |85.35|
205
+ |MMLU (5-Shot) |64.43|
206
+ |TruthfulQA (0-shot) |54.94|
207
+ |Winogrande (5-shot) |80.27|
208
+ |GSM8k (5-shot) |67.85|
209
+