AaronWu901225 commited on
Commit
8047cd0
·
verified ·
1 Parent(s): 8bb92a7

Upload LoRA adapter folder

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +12 -0
  2. README.md +209 -0
  3. adapter_config.json +42 -0
  4. adapter_model.safetensors +3 -0
  5. checkpoint-1434/README.md +209 -0
  6. checkpoint-1434/adapter_config.json +42 -0
  7. checkpoint-1434/adapter_model.safetensors +3 -0
  8. checkpoint-1434/optimizer.pt +3 -0
  9. checkpoint-1434/rng_state.pth +3 -0
  10. checkpoint-1434/scheduler.pt +3 -0
  11. checkpoint-1434/special_tokens_map.json +27 -0
  12. checkpoint-1434/tokenizer.json +3 -0
  13. checkpoint-1434/tokenizer_config.json +2070 -0
  14. checkpoint-1434/trainer_state.json +1058 -0
  15. checkpoint-1434/training_args.bin +3 -0
  16. checkpoint-1912/README.md +209 -0
  17. checkpoint-1912/adapter_config.json +42 -0
  18. checkpoint-1912/adapter_model.safetensors +3 -0
  19. checkpoint-1912/optimizer.pt +3 -0
  20. checkpoint-1912/rng_state.pth +3 -0
  21. checkpoint-1912/scheduler.pt +3 -0
  22. checkpoint-1912/special_tokens_map.json +27 -0
  23. checkpoint-1912/tokenizer.json +3 -0
  24. checkpoint-1912/tokenizer_config.json +2070 -0
  25. checkpoint-1912/trainer_state.json +1402 -0
  26. checkpoint-1912/training_args.bin +3 -0
  27. checkpoint-2390/README.md +209 -0
  28. checkpoint-2390/adapter_config.json +42 -0
  29. checkpoint-2390/adapter_model.safetensors +3 -0
  30. checkpoint-2390/optimizer.pt +3 -0
  31. checkpoint-2390/rng_state.pth +3 -0
  32. checkpoint-2390/scheduler.pt +3 -0
  33. checkpoint-2390/special_tokens_map.json +27 -0
  34. checkpoint-2390/tokenizer.json +3 -0
  35. checkpoint-2390/tokenizer_config.json +2070 -0
  36. checkpoint-2390/trainer_state.json +1746 -0
  37. checkpoint-2390/training_args.bin +3 -0
  38. checkpoint-2868/README.md +209 -0
  39. checkpoint-2868/adapter_config.json +42 -0
  40. checkpoint-2868/adapter_model.safetensors +3 -0
  41. checkpoint-2868/optimizer.pt +3 -0
  42. checkpoint-2868/rng_state.pth +3 -0
  43. checkpoint-2868/scheduler.pt +3 -0
  44. checkpoint-2868/special_tokens_map.json +27 -0
  45. checkpoint-2868/tokenizer.json +3 -0
  46. checkpoint-2868/tokenizer_config.json +2070 -0
  47. checkpoint-2868/trainer_state.json +2083 -0
  48. checkpoint-2868/training_args.bin +3 -0
  49. checkpoint-3346/README.md +209 -0
  50. checkpoint-3346/adapter_config.json +42 -0
.gitattributes CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1434/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1912/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-2390/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-2868/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-3346/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint-3824/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ checkpoint-4302/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ checkpoint-4305/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ checkpoint-478/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ checkpoint-956/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ final/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Salesforce/Llama-xLAM-2-8b-fc-r
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "k_proj",
32
+ "up_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c6008e1c411944d60a2071b0e6d0b690a8c2a1e05987e988f1e4e5734424128
3
+ size 671149168
checkpoint-1434/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Salesforce/Llama-xLAM-2-8b-fc-r
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
checkpoint-1434/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "k_proj",
32
+ "up_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
checkpoint-1434/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46afe0209d02a435adc4b628228c72683504c7d141f1aa1ccec1726b3afcde89
3
+ size 671149168
checkpoint-1434/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4614214155c667f00d9c097d1259286474e1968780f10eca63e08af1841a8ad6
3
+ size 1342555602
checkpoint-1434/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d96c98f1c4a1d4739ec65fce3a9e15bd4c6cacdc12cb5f504e466ed2d3ff9005
3
+ size 14244
checkpoint-1434/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d95869f7c3821e99bd35ab74d4aa35ac511220b477bfdfeaceb13fe5c8e9e4d
3
+ size 1064
checkpoint-1434/special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|eot_id|>",
4
+ "<|eom_id|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|begin_of_text|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|eot_id|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|eot_id|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
checkpoint-1434/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a55c1a4c5e3af7f2fb2bc0cd245a09dabd742dc24e7cb3741db1e11c7fe1a52
3
+ size 17210019
checkpoint-1434/tokenizer_config.json ADDED
@@ -0,0 +1,2070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "additional_special_tokens": [
2053
+ "<|eot_id|>",
2054
+ "<|eom_id|>"
2055
+ ],
2056
+ "bos_token": "<|begin_of_text|>",
2057
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}",
2058
+ "clean_up_tokenization_spaces": true,
2059
+ "eos_token": "<|eot_id|>",
2060
+ "extra_special_tokens": {},
2061
+ "model_input_names": [
2062
+ "input_ids",
2063
+ "attention_mask"
2064
+ ],
2065
+ "model_max_length": 16384,
2066
+ "pad_token": "<|eot_id|>",
2067
+ "padding_side": "right",
2068
+ "split_special_tokens": false,
2069
+ "tokenizer_class": "PreTrainedTokenizerFast"
2070
+ }
checkpoint-1434/trainer_state.json ADDED
@@ -0,0 +1,1058 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.21637588739395142,
3
+ "best_model_checkpoint": "./xlam_Qlora_out_eval/checkpoint-1434",
4
+ "epoch": 0.9986941760250718,
5
+ "eval_steps": 478,
6
+ "global_step": 1434,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.006964394532950292,
13
+ "grad_norm": 0.29273906350135803,
14
+ "learning_rate": 1.5384615384615387e-06,
15
+ "loss": 1.8834,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.013928789065900584,
20
+ "grad_norm": 0.3107926845550537,
21
+ "learning_rate": 3.0769230769230774e-06,
22
+ "loss": 1.876,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.020893183598850874,
27
+ "grad_norm": 0.3098163902759552,
28
+ "learning_rate": 4.615384615384616e-06,
29
+ "loss": 1.9099,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.027857578131801168,
34
+ "grad_norm": 0.36715859174728394,
35
+ "learning_rate": 6.153846153846155e-06,
36
+ "loss": 1.8142,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.034821972664751455,
41
+ "grad_norm": 0.3511175215244293,
42
+ "learning_rate": 7.692307692307694e-06,
43
+ "loss": 1.7271,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04178636719770175,
48
+ "grad_norm": 0.3458055853843689,
49
+ "learning_rate": 9.230769230769232e-06,
50
+ "loss": 1.6073,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.04875076173065204,
55
+ "grad_norm": 0.37719297409057617,
56
+ "learning_rate": 1.076923076923077e-05,
57
+ "loss": 1.4997,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.055715156263602336,
62
+ "grad_norm": 0.30273741483688354,
63
+ "learning_rate": 1.230769230769231e-05,
64
+ "loss": 1.2914,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.06267955079655263,
69
+ "grad_norm": 0.3690374195575714,
70
+ "learning_rate": 1.3846153846153847e-05,
71
+ "loss": 1.0086,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.06964394532950291,
76
+ "grad_norm": 0.3111755847930908,
77
+ "learning_rate": 1.5384615384615387e-05,
78
+ "loss": 0.7469,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.0766083398624532,
83
+ "grad_norm": 0.13778822124004364,
84
+ "learning_rate": 1.6923076923076924e-05,
85
+ "loss": 0.5848,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.0835727343954035,
90
+ "grad_norm": 0.10629754513502121,
91
+ "learning_rate": 1.8461538461538465e-05,
92
+ "loss": 0.5366,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.09053712892835379,
97
+ "grad_norm": 0.09681010991334915,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5038,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.09750152346130408,
104
+ "grad_norm": 0.10756956040859222,
105
+ "learning_rate": 1.9952095808383235e-05,
106
+ "loss": 0.5002,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.10446591799425438,
111
+ "grad_norm": 0.10488647222518921,
112
+ "learning_rate": 1.9904191616766468e-05,
113
+ "loss": 0.5107,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.11143031252720467,
118
+ "grad_norm": 0.09359394758939743,
119
+ "learning_rate": 1.98562874251497e-05,
120
+ "loss": 0.4604,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.11839470706015495,
125
+ "grad_norm": 0.10392932593822479,
126
+ "learning_rate": 1.9808383233532935e-05,
127
+ "loss": 0.484,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.12535910159310526,
132
+ "grad_norm": 0.12986360490322113,
133
+ "learning_rate": 1.9760479041916168e-05,
134
+ "loss": 0.4527,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.13232349612605554,
139
+ "grad_norm": 0.14294420182704926,
140
+ "learning_rate": 1.97125748502994e-05,
141
+ "loss": 0.4661,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.13928789065900582,
146
+ "grad_norm": 0.18483737111091614,
147
+ "learning_rate": 1.9664670658682635e-05,
148
+ "loss": 0.4395,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.14625228519195613,
153
+ "grad_norm": 0.21114414930343628,
154
+ "learning_rate": 1.961676646706587e-05,
155
+ "loss": 0.4071,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.1532166797249064,
160
+ "grad_norm": 0.21779198944568634,
161
+ "learning_rate": 1.95688622754491e-05,
162
+ "loss": 0.3846,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.16018107425785671,
167
+ "grad_norm": 0.1294960230588913,
168
+ "learning_rate": 1.9520958083832338e-05,
169
+ "loss": 0.3952,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.167145468790807,
174
+ "grad_norm": 0.0927898958325386,
175
+ "learning_rate": 1.9473053892215568e-05,
176
+ "loss": 0.4112,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.1741098633237573,
181
+ "grad_norm": 0.10358591377735138,
182
+ "learning_rate": 1.9425149700598805e-05,
183
+ "loss": 0.406,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.18107425785670758,
188
+ "grad_norm": 0.13144026696681976,
189
+ "learning_rate": 1.9377245508982038e-05,
190
+ "loss": 0.4058,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.18803865238965786,
195
+ "grad_norm": 0.12745942175388336,
196
+ "learning_rate": 1.932934131736527e-05,
197
+ "loss": 0.3938,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.19500304692260817,
202
+ "grad_norm": 0.09832354635000229,
203
+ "learning_rate": 1.9281437125748505e-05,
204
+ "loss": 0.3819,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.20196744145555845,
209
+ "grad_norm": 0.0927552729845047,
210
+ "learning_rate": 1.9233532934131738e-05,
211
+ "loss": 0.3944,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.20893183598850876,
216
+ "grad_norm": 0.09627388417720795,
217
+ "learning_rate": 1.918562874251497e-05,
218
+ "loss": 0.348,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.21589623052145904,
223
+ "grad_norm": 0.09034324437379837,
224
+ "learning_rate": 1.9137724550898205e-05,
225
+ "loss": 0.3521,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.22286062505440934,
230
+ "grad_norm": 0.09156423062086105,
231
+ "learning_rate": 1.9089820359281438e-05,
232
+ "loss": 0.3663,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.22982501958735962,
237
+ "grad_norm": 0.0996803268790245,
238
+ "learning_rate": 1.904191616766467e-05,
239
+ "loss": 0.3347,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.2367894141203099,
244
+ "grad_norm": 0.09962292015552521,
245
+ "learning_rate": 1.8994011976047905e-05,
246
+ "loss": 0.373,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.2437538086532602,
251
+ "grad_norm": 0.08211114257574081,
252
+ "learning_rate": 1.894610778443114e-05,
253
+ "loss": 0.3641,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.2507182031862105,
258
+ "grad_norm": 0.10449781268835068,
259
+ "learning_rate": 1.889820359281437e-05,
260
+ "loss": 0.3358,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.2576825977191608,
265
+ "grad_norm": 0.11216866970062256,
266
+ "learning_rate": 1.8850299401197608e-05,
267
+ "loss": 0.372,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.2646469922521111,
272
+ "grad_norm": 0.11125332862138748,
273
+ "learning_rate": 1.8802395209580838e-05,
274
+ "loss": 0.3521,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.27161138678506136,
279
+ "grad_norm": 0.1620815545320511,
280
+ "learning_rate": 1.8754491017964075e-05,
281
+ "loss": 0.3726,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.27857578131801164,
286
+ "grad_norm": 0.11439554393291473,
287
+ "learning_rate": 1.8706586826347305e-05,
288
+ "loss": 0.3287,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.285540175850962,
293
+ "grad_norm": 0.10733096301555634,
294
+ "learning_rate": 1.865868263473054e-05,
295
+ "loss": 0.3459,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.29250457038391225,
300
+ "grad_norm": 0.12426561117172241,
301
+ "learning_rate": 1.8610778443113775e-05,
302
+ "loss": 0.3705,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.29946896491686253,
307
+ "grad_norm": 0.1148887649178505,
308
+ "learning_rate": 1.8562874251497008e-05,
309
+ "loss": 0.3216,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.3064333594498128,
314
+ "grad_norm": 0.10842527449131012,
315
+ "learning_rate": 1.851497005988024e-05,
316
+ "loss": 0.3502,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.31339775398276315,
321
+ "grad_norm": 0.11736515164375305,
322
+ "learning_rate": 1.8467065868263475e-05,
323
+ "loss": 0.3394,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.32036214851571343,
328
+ "grad_norm": 0.1381833553314209,
329
+ "learning_rate": 1.8419161676646708e-05,
330
+ "loss": 0.3166,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.3273265430486637,
335
+ "grad_norm": 0.10973569750785828,
336
+ "learning_rate": 1.837125748502994e-05,
337
+ "loss": 0.3245,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.3328980586750239,
342
+ "eval_loss": 0.3075329661369324,
343
+ "eval_runtime": 356.0276,
344
+ "eval_samples_per_second": 3.396,
345
+ "eval_steps_per_second": 1.699,
346
+ "step": 478
347
+ },
348
+ {
349
+ "epoch": 0.334290937581614,
350
+ "grad_norm": 0.13474580645561218,
351
+ "learning_rate": 1.8323353293413175e-05,
352
+ "loss": 0.2914,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.34125533211456427,
357
+ "grad_norm": 0.12812362611293793,
358
+ "learning_rate": 1.827544910179641e-05,
359
+ "loss": 0.3327,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.3482197266475146,
364
+ "grad_norm": 0.11661785840988159,
365
+ "learning_rate": 1.822754491017964e-05,
366
+ "loss": 0.3468,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.3551841211804649,
371
+ "grad_norm": 0.12990142405033112,
372
+ "learning_rate": 1.8179640718562878e-05,
373
+ "loss": 0.3318,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.36214851571341516,
378
+ "grad_norm": 0.1309327632188797,
379
+ "learning_rate": 1.8131736526946108e-05,
380
+ "loss": 0.2942,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.36911291024636544,
385
+ "grad_norm": 0.12246479839086533,
386
+ "learning_rate": 1.8083832335329345e-05,
387
+ "loss": 0.3751,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.3760773047793157,
392
+ "grad_norm": 0.14134123921394348,
393
+ "learning_rate": 1.8035928143712575e-05,
394
+ "loss": 0.3248,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.38304169931226606,
399
+ "grad_norm": 0.1433170884847641,
400
+ "learning_rate": 1.798802395209581e-05,
401
+ "loss": 0.3344,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.39000609384521634,
406
+ "grad_norm": 0.13364745676517487,
407
+ "learning_rate": 1.7940119760479045e-05,
408
+ "loss": 0.2928,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.3969704883781666,
413
+ "grad_norm": 0.13446496427059174,
414
+ "learning_rate": 1.7892215568862278e-05,
415
+ "loss": 0.3421,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.4039348829111169,
420
+ "grad_norm": 0.1586250364780426,
421
+ "learning_rate": 1.784431137724551e-05,
422
+ "loss": 0.3099,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.4108992774440672,
427
+ "grad_norm": 0.1283930391073227,
428
+ "learning_rate": 1.7796407185628745e-05,
429
+ "loss": 0.3123,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.4178636719770175,
434
+ "grad_norm": 0.14261852204799652,
435
+ "learning_rate": 1.7748502994011978e-05,
436
+ "loss": 0.2886,
437
+ "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.4248280665099678,
441
+ "grad_norm": 0.13329675793647766,
442
+ "learning_rate": 1.770059880239521e-05,
443
+ "loss": 0.3255,
444
+ "step": 610
445
+ },
446
+ {
447
+ "epoch": 0.4317924610429181,
448
+ "grad_norm": 0.13880160450935364,
449
+ "learning_rate": 1.7652694610778445e-05,
450
+ "loss": 0.326,
451
+ "step": 620
452
+ },
453
+ {
454
+ "epoch": 0.43875685557586835,
455
+ "grad_norm": 0.14388258755207062,
456
+ "learning_rate": 1.7604790419161678e-05,
457
+ "loss": 0.3269,
458
+ "step": 630
459
+ },
460
+ {
461
+ "epoch": 0.4457212501088187,
462
+ "grad_norm": 0.1571163535118103,
463
+ "learning_rate": 1.755688622754491e-05,
464
+ "loss": 0.2919,
465
+ "step": 640
466
+ },
467
+ {
468
+ "epoch": 0.45268564464176897,
469
+ "grad_norm": 0.15307635068893433,
470
+ "learning_rate": 1.7508982035928145e-05,
471
+ "loss": 0.2773,
472
+ "step": 650
473
+ },
474
+ {
475
+ "epoch": 0.45965003917471925,
476
+ "grad_norm": 0.1438165158033371,
477
+ "learning_rate": 1.7461077844311378e-05,
478
+ "loss": 0.2847,
479
+ "step": 660
480
+ },
481
+ {
482
+ "epoch": 0.46661443370766953,
483
+ "grad_norm": 0.15620216727256775,
484
+ "learning_rate": 1.741317365269461e-05,
485
+ "loss": 0.29,
486
+ "step": 670
487
+ },
488
+ {
489
+ "epoch": 0.4735788282406198,
490
+ "grad_norm": 0.1414933055639267,
491
+ "learning_rate": 1.7365269461077845e-05,
492
+ "loss": 0.2754,
493
+ "step": 680
494
+ },
495
+ {
496
+ "epoch": 0.48054322277357014,
497
+ "grad_norm": 0.15853798389434814,
498
+ "learning_rate": 1.7317365269461078e-05,
499
+ "loss": 0.3062,
500
+ "step": 690
501
+ },
502
+ {
503
+ "epoch": 0.4875076173065204,
504
+ "grad_norm": 0.17159980535507202,
505
+ "learning_rate": 1.726946107784431e-05,
506
+ "loss": 0.2953,
507
+ "step": 700
508
+ },
509
+ {
510
+ "epoch": 0.4944720118394707,
511
+ "grad_norm": 0.16937299072742462,
512
+ "learning_rate": 1.7221556886227548e-05,
513
+ "loss": 0.2895,
514
+ "step": 710
515
+ },
516
+ {
517
+ "epoch": 0.501436406372421,
518
+ "grad_norm": 0.1455833464860916,
519
+ "learning_rate": 1.717365269461078e-05,
520
+ "loss": 0.2912,
521
+ "step": 720
522
+ },
523
+ {
524
+ "epoch": 0.5084008009053713,
525
+ "grad_norm": 0.15824872255325317,
526
+ "learning_rate": 1.7125748502994015e-05,
527
+ "loss": 0.3012,
528
+ "step": 730
529
+ },
530
+ {
531
+ "epoch": 0.5153651954383216,
532
+ "grad_norm": 0.1499851644039154,
533
+ "learning_rate": 1.7077844311377248e-05,
534
+ "loss": 0.263,
535
+ "step": 740
536
+ },
537
+ {
538
+ "epoch": 0.5223295899712719,
539
+ "grad_norm": 0.1611323356628418,
540
+ "learning_rate": 1.702994011976048e-05,
541
+ "loss": 0.2595,
542
+ "step": 750
543
+ },
544
+ {
545
+ "epoch": 0.5292939845042222,
546
+ "grad_norm": 0.16144199669361115,
547
+ "learning_rate": 1.6982035928143714e-05,
548
+ "loss": 0.2939,
549
+ "step": 760
550
+ },
551
+ {
552
+ "epoch": 0.5362583790371724,
553
+ "grad_norm": 0.13433364033699036,
554
+ "learning_rate": 1.6934131736526948e-05,
555
+ "loss": 0.2817,
556
+ "step": 770
557
+ },
558
+ {
559
+ "epoch": 0.5432227735701227,
560
+ "grad_norm": 0.15188777446746826,
561
+ "learning_rate": 1.688622754491018e-05,
562
+ "loss": 0.2804,
563
+ "step": 780
564
+ },
565
+ {
566
+ "epoch": 0.550187168103073,
567
+ "grad_norm": 0.1530751883983612,
568
+ "learning_rate": 1.6838323353293414e-05,
569
+ "loss": 0.298,
570
+ "step": 790
571
+ },
572
+ {
573
+ "epoch": 0.5571515626360233,
574
+ "grad_norm": 0.14582635462284088,
575
+ "learning_rate": 1.6790419161676648e-05,
576
+ "loss": 0.3177,
577
+ "step": 800
578
+ },
579
+ {
580
+ "epoch": 0.5641159571689737,
581
+ "grad_norm": 0.174430251121521,
582
+ "learning_rate": 1.674251497005988e-05,
583
+ "loss": 0.2624,
584
+ "step": 810
585
+ },
586
+ {
587
+ "epoch": 0.571080351701924,
588
+ "grad_norm": 0.1558169573545456,
589
+ "learning_rate": 1.6694610778443114e-05,
590
+ "loss": 0.2631,
591
+ "step": 820
592
+ },
593
+ {
594
+ "epoch": 0.5780447462348742,
595
+ "grad_norm": 0.17108450829982758,
596
+ "learning_rate": 1.6646706586826348e-05,
597
+ "loss": 0.2903,
598
+ "step": 830
599
+ },
600
+ {
601
+ "epoch": 0.5850091407678245,
602
+ "grad_norm": 0.16796821355819702,
603
+ "learning_rate": 1.659880239520958e-05,
604
+ "loss": 0.2928,
605
+ "step": 840
606
+ },
607
+ {
608
+ "epoch": 0.5919735353007748,
609
+ "grad_norm": 0.16556940972805023,
610
+ "learning_rate": 1.6550898203592814e-05,
611
+ "loss": 0.2882,
612
+ "step": 850
613
+ },
614
+ {
615
+ "epoch": 0.5989379298337251,
616
+ "grad_norm": 0.1716109961271286,
617
+ "learning_rate": 1.650299401197605e-05,
618
+ "loss": 0.2814,
619
+ "step": 860
620
+ },
621
+ {
622
+ "epoch": 0.6059023243666753,
623
+ "grad_norm": 0.1695253700017929,
624
+ "learning_rate": 1.645508982035928e-05,
625
+ "loss": 0.2909,
626
+ "step": 870
627
+ },
628
+ {
629
+ "epoch": 0.6128667188996256,
630
+ "grad_norm": 0.1896178424358368,
631
+ "learning_rate": 1.6407185628742518e-05,
632
+ "loss": 0.2909,
633
+ "step": 880
634
+ },
635
+ {
636
+ "epoch": 0.6198311134325759,
637
+ "grad_norm": 0.1624131202697754,
638
+ "learning_rate": 1.6359281437125748e-05,
639
+ "loss": 0.2672,
640
+ "step": 890
641
+ },
642
+ {
643
+ "epoch": 0.6267955079655263,
644
+ "grad_norm": 0.16821731626987457,
645
+ "learning_rate": 1.6311377245508984e-05,
646
+ "loss": 0.2626,
647
+ "step": 900
648
+ },
649
+ {
650
+ "epoch": 0.6337599024984766,
651
+ "grad_norm": 0.17026785016059875,
652
+ "learning_rate": 1.6263473053892214e-05,
653
+ "loss": 0.2735,
654
+ "step": 910
655
+ },
656
+ {
657
+ "epoch": 0.6407242970314269,
658
+ "grad_norm": 0.19429761171340942,
659
+ "learning_rate": 1.621556886227545e-05,
660
+ "loss": 0.2605,
661
+ "step": 920
662
+ },
663
+ {
664
+ "epoch": 0.6476886915643771,
665
+ "grad_norm": 0.16931040585041046,
666
+ "learning_rate": 1.6167664670658684e-05,
667
+ "loss": 0.2722,
668
+ "step": 930
669
+ },
670
+ {
671
+ "epoch": 0.6546530860973274,
672
+ "grad_norm": 0.20588335394859314,
673
+ "learning_rate": 1.6119760479041918e-05,
674
+ "loss": 0.301,
675
+ "step": 940
676
+ },
677
+ {
678
+ "epoch": 0.6616174806302777,
679
+ "grad_norm": 0.16699948906898499,
680
+ "learning_rate": 1.607185628742515e-05,
681
+ "loss": 0.2646,
682
+ "step": 950
683
+ },
684
+ {
685
+ "epoch": 0.6657961173500478,
686
+ "eval_loss": 0.2465677410364151,
687
+ "eval_runtime": 356.1016,
688
+ "eval_samples_per_second": 3.395,
689
+ "eval_steps_per_second": 1.699,
690
+ "step": 956
691
+ },
692
+ {
693
+ "epoch": 0.668581875163228,
694
+ "grad_norm": 0.20734217762947083,
695
+ "learning_rate": 1.6023952095808384e-05,
696
+ "loss": 0.2476,
697
+ "step": 960
698
+ },
699
+ {
700
+ "epoch": 0.6755462696961783,
701
+ "grad_norm": 0.21317210793495178,
702
+ "learning_rate": 1.5976047904191618e-05,
703
+ "loss": 0.2434,
704
+ "step": 970
705
+ },
706
+ {
707
+ "epoch": 0.6825106642291285,
708
+ "grad_norm": 0.19125515222549438,
709
+ "learning_rate": 1.592814371257485e-05,
710
+ "loss": 0.2823,
711
+ "step": 980
712
+ },
713
+ {
714
+ "epoch": 0.6894750587620788,
715
+ "grad_norm": 0.1651381254196167,
716
+ "learning_rate": 1.5880239520958084e-05,
717
+ "loss": 0.2769,
718
+ "step": 990
719
+ },
720
+ {
721
+ "epoch": 0.6964394532950292,
722
+ "grad_norm": 0.18061324954032898,
723
+ "learning_rate": 1.583233532934132e-05,
724
+ "loss": 0.2788,
725
+ "step": 1000
726
+ },
727
+ {
728
+ "epoch": 0.7034038478279795,
729
+ "grad_norm": 0.17788180708885193,
730
+ "learning_rate": 1.578443113772455e-05,
731
+ "loss": 0.2596,
732
+ "step": 1010
733
+ },
734
+ {
735
+ "epoch": 0.7103682423609298,
736
+ "grad_norm": 0.19635842740535736,
737
+ "learning_rate": 1.5736526946107788e-05,
738
+ "loss": 0.2391,
739
+ "step": 1020
740
+ },
741
+ {
742
+ "epoch": 0.71733263689388,
743
+ "grad_norm": 0.18000468611717224,
744
+ "learning_rate": 1.5688622754491018e-05,
745
+ "loss": 0.2589,
746
+ "step": 1030
747
+ },
748
+ {
749
+ "epoch": 0.7242970314268303,
750
+ "grad_norm": 0.18016283214092255,
751
+ "learning_rate": 1.5640718562874254e-05,
752
+ "loss": 0.2673,
753
+ "step": 1040
754
+ },
755
+ {
756
+ "epoch": 0.7312614259597806,
757
+ "grad_norm": 0.2085445076227188,
758
+ "learning_rate": 1.5592814371257484e-05,
759
+ "loss": 0.295,
760
+ "step": 1050
761
+ },
762
+ {
763
+ "epoch": 0.7382258204927309,
764
+ "grad_norm": 0.1937682181596756,
765
+ "learning_rate": 1.554491017964072e-05,
766
+ "loss": 0.2741,
767
+ "step": 1060
768
+ },
769
+ {
770
+ "epoch": 0.7451902150256812,
771
+ "grad_norm": 0.18496188521385193,
772
+ "learning_rate": 1.5497005988023954e-05,
773
+ "loss": 0.2485,
774
+ "step": 1070
775
+ },
776
+ {
777
+ "epoch": 0.7521546095586314,
778
+ "grad_norm": 0.21024686098098755,
779
+ "learning_rate": 1.5449101796407188e-05,
780
+ "loss": 0.2569,
781
+ "step": 1080
782
+ },
783
+ {
784
+ "epoch": 0.7591190040915818,
785
+ "grad_norm": 0.1960270255804062,
786
+ "learning_rate": 1.540119760479042e-05,
787
+ "loss": 0.2335,
788
+ "step": 1090
789
+ },
790
+ {
791
+ "epoch": 0.7660833986245321,
792
+ "grad_norm": 0.19557876884937286,
793
+ "learning_rate": 1.5353293413173654e-05,
794
+ "loss": 0.279,
795
+ "step": 1100
796
+ },
797
+ {
798
+ "epoch": 0.7730477931574824,
799
+ "grad_norm": 0.17300210893154144,
800
+ "learning_rate": 1.5305389221556888e-05,
801
+ "loss": 0.249,
802
+ "step": 1110
803
+ },
804
+ {
805
+ "epoch": 0.7800121876904327,
806
+ "grad_norm": 0.1768539994955063,
807
+ "learning_rate": 1.5257485029940121e-05,
808
+ "loss": 0.2415,
809
+ "step": 1120
810
+ },
811
+ {
812
+ "epoch": 0.786976582223383,
813
+ "grad_norm": 0.20632915198802948,
814
+ "learning_rate": 1.5209580838323354e-05,
815
+ "loss": 0.2312,
816
+ "step": 1130
817
+ },
818
+ {
819
+ "epoch": 0.7939409767563332,
820
+ "grad_norm": 0.1948348730802536,
821
+ "learning_rate": 1.516167664670659e-05,
822
+ "loss": 0.2415,
823
+ "step": 1140
824
+ },
825
+ {
826
+ "epoch": 0.8009053712892835,
827
+ "grad_norm": 0.16288821399211884,
828
+ "learning_rate": 1.511377245508982e-05,
829
+ "loss": 0.2571,
830
+ "step": 1150
831
+ },
832
+ {
833
+ "epoch": 0.8078697658222338,
834
+ "grad_norm": 0.21365194022655487,
835
+ "learning_rate": 1.5065868263473056e-05,
836
+ "loss": 0.2594,
837
+ "step": 1160
838
+ },
839
+ {
840
+ "epoch": 0.8148341603551841,
841
+ "grad_norm": 0.20960865914821625,
842
+ "learning_rate": 1.5017964071856287e-05,
843
+ "loss": 0.2537,
844
+ "step": 1170
845
+ },
846
+ {
847
+ "epoch": 0.8217985548881344,
848
+ "grad_norm": 0.20458345115184784,
849
+ "learning_rate": 1.4970059880239522e-05,
850
+ "loss": 0.2675,
851
+ "step": 1180
852
+ },
853
+ {
854
+ "epoch": 0.8287629494210847,
855
+ "grad_norm": 0.22388549149036407,
856
+ "learning_rate": 1.4922155688622754e-05,
857
+ "loss": 0.2525,
858
+ "step": 1190
859
+ },
860
+ {
861
+ "epoch": 0.835727343954035,
862
+ "grad_norm": 0.2139691859483719,
863
+ "learning_rate": 1.4874251497005989e-05,
864
+ "loss": 0.251,
865
+ "step": 1200
866
+ },
867
+ {
868
+ "epoch": 0.8426917384869853,
869
+ "grad_norm": 0.18761980533599854,
870
+ "learning_rate": 1.4826347305389224e-05,
871
+ "loss": 0.2328,
872
+ "step": 1210
873
+ },
874
+ {
875
+ "epoch": 0.8496561330199356,
876
+ "grad_norm": 0.183973029255867,
877
+ "learning_rate": 1.4778443113772456e-05,
878
+ "loss": 0.2239,
879
+ "step": 1220
880
+ },
881
+ {
882
+ "epoch": 0.8566205275528859,
883
+ "grad_norm": 0.23718492686748505,
884
+ "learning_rate": 1.473053892215569e-05,
885
+ "loss": 0.2423,
886
+ "step": 1230
887
+ },
888
+ {
889
+ "epoch": 0.8635849220858361,
890
+ "grad_norm": 0.23875045776367188,
891
+ "learning_rate": 1.4682634730538922e-05,
892
+ "loss": 0.2365,
893
+ "step": 1240
894
+ },
895
+ {
896
+ "epoch": 0.8705493166187864,
897
+ "grad_norm": 0.21570812165737152,
898
+ "learning_rate": 1.4634730538922157e-05,
899
+ "loss": 0.268,
900
+ "step": 1250
901
+ },
902
+ {
903
+ "epoch": 0.8775137111517367,
904
+ "grad_norm": 0.20839928090572357,
905
+ "learning_rate": 1.4586826347305389e-05,
906
+ "loss": 0.2502,
907
+ "step": 1260
908
+ },
909
+ {
910
+ "epoch": 0.884478105684687,
911
+ "grad_norm": 0.21605758368968964,
912
+ "learning_rate": 1.4538922155688624e-05,
913
+ "loss": 0.2365,
914
+ "step": 1270
915
+ },
916
+ {
917
+ "epoch": 0.8914425002176374,
918
+ "grad_norm": 0.23300880193710327,
919
+ "learning_rate": 1.4491017964071859e-05,
920
+ "loss": 0.2404,
921
+ "step": 1280
922
+ },
923
+ {
924
+ "epoch": 0.8984068947505877,
925
+ "grad_norm": 0.21796733140945435,
926
+ "learning_rate": 1.444311377245509e-05,
927
+ "loss": 0.2704,
928
+ "step": 1290
929
+ },
930
+ {
931
+ "epoch": 0.9053712892835379,
932
+ "grad_norm": 0.25555598735809326,
933
+ "learning_rate": 1.4395209580838326e-05,
934
+ "loss": 0.2674,
935
+ "step": 1300
936
+ },
937
+ {
938
+ "epoch": 0.9123356838164882,
939
+ "grad_norm": 0.22284413874149323,
940
+ "learning_rate": 1.4347305389221557e-05,
941
+ "loss": 0.2471,
942
+ "step": 1310
943
+ },
944
+ {
945
+ "epoch": 0.9193000783494385,
946
+ "grad_norm": 0.22234566509723663,
947
+ "learning_rate": 1.4299401197604792e-05,
948
+ "loss": 0.2529,
949
+ "step": 1320
950
+ },
951
+ {
952
+ "epoch": 0.9262644728823888,
953
+ "grad_norm": 0.221883624792099,
954
+ "learning_rate": 1.4251497005988024e-05,
955
+ "loss": 0.1932,
956
+ "step": 1330
957
+ },
958
+ {
959
+ "epoch": 0.9332288674153391,
960
+ "grad_norm": 0.22873520851135254,
961
+ "learning_rate": 1.4203592814371259e-05,
962
+ "loss": 0.2412,
963
+ "step": 1340
964
+ },
965
+ {
966
+ "epoch": 0.9401932619482893,
967
+ "grad_norm": 0.21307863295078278,
968
+ "learning_rate": 1.4155688622754492e-05,
969
+ "loss": 0.2384,
970
+ "step": 1350
971
+ },
972
+ {
973
+ "epoch": 0.9471576564812396,
974
+ "grad_norm": 0.21296364068984985,
975
+ "learning_rate": 1.4107784431137726e-05,
976
+ "loss": 0.2318,
977
+ "step": 1360
978
+ },
979
+ {
980
+ "epoch": 0.95412205101419,
981
+ "grad_norm": 0.21060685813426971,
982
+ "learning_rate": 1.405988023952096e-05,
983
+ "loss": 0.2323,
984
+ "step": 1370
985
+ },
986
+ {
987
+ "epoch": 0.9610864455471403,
988
+ "grad_norm": 0.23930418491363525,
989
+ "learning_rate": 1.4011976047904192e-05,
990
+ "loss": 0.2398,
991
+ "step": 1380
992
+ },
993
+ {
994
+ "epoch": 0.9680508400800906,
995
+ "grad_norm": 0.22837185859680176,
996
+ "learning_rate": 1.3964071856287427e-05,
997
+ "loss": 0.233,
998
+ "step": 1390
999
+ },
1000
+ {
1001
+ "epoch": 0.9750152346130408,
1002
+ "grad_norm": 0.21798169612884521,
1003
+ "learning_rate": 1.3916167664670659e-05,
1004
+ "loss": 0.2161,
1005
+ "step": 1400
1006
+ },
1007
+ {
1008
+ "epoch": 0.9819796291459911,
1009
+ "grad_norm": 0.2846814692020416,
1010
+ "learning_rate": 1.3868263473053894e-05,
1011
+ "loss": 0.2301,
1012
+ "step": 1410
1013
+ },
1014
+ {
1015
+ "epoch": 0.9889440236789414,
1016
+ "grad_norm": 0.22030217945575714,
1017
+ "learning_rate": 1.3820359281437127e-05,
1018
+ "loss": 0.2017,
1019
+ "step": 1420
1020
+ },
1021
+ {
1022
+ "epoch": 0.9959084182118917,
1023
+ "grad_norm": 0.25554022192955017,
1024
+ "learning_rate": 1.377245508982036e-05,
1025
+ "loss": 0.2296,
1026
+ "step": 1430
1027
+ },
1028
+ {
1029
+ "epoch": 0.9986941760250718,
1030
+ "eval_loss": 0.21637588739395142,
1031
+ "eval_runtime": 356.0721,
1032
+ "eval_samples_per_second": 3.395,
1033
+ "eval_steps_per_second": 1.699,
1034
+ "step": 1434
1035
+ }
1036
+ ],
1037
+ "logging_steps": 10,
1038
+ "max_steps": 4305,
1039
+ "num_input_tokens_seen": 0,
1040
+ "num_train_epochs": 3,
1041
+ "save_steps": 478,
1042
+ "stateful_callbacks": {
1043
+ "TrainerControl": {
1044
+ "args": {
1045
+ "should_epoch_stop": false,
1046
+ "should_evaluate": false,
1047
+ "should_log": false,
1048
+ "should_save": true,
1049
+ "should_training_stop": false
1050
+ },
1051
+ "attributes": {}
1052
+ }
1053
+ },
1054
+ "total_flos": 6.68888676426793e+17,
1055
+ "train_batch_size": 2,
1056
+ "trial_name": null,
1057
+ "trial_params": null
1058
+ }
checkpoint-1434/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:956bd3fe04334201160cff0d2e3faf62e32c6c8f709339d63e01a7d4b73b24d0
3
+ size 5560
checkpoint-1912/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Salesforce/Llama-xLAM-2-8b-fc-r
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
checkpoint-1912/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "k_proj",
32
+ "up_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
checkpoint-1912/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ea3d29d25191471cf841402ee92d3a3e6e8703263d92a49b5cf251f1843ee8
3
+ size 671149168
checkpoint-1912/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600dfa6960d41b89b6a1d41f9b3bd5b791aba4b0e6554bb90e731934413cf6d7
3
+ size 1342555602
checkpoint-1912/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2b1a8f992a3995f905827a704361cb8085a3ed3a353f94109da49c5641e67c
3
+ size 14244
checkpoint-1912/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd3c490c908becf27d1f2471f166f418ed21eeda9029b219116d716d1a232247
3
+ size 1064
checkpoint-1912/special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|eot_id|>",
4
+ "<|eom_id|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|begin_of_text|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|eot_id|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|eot_id|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
checkpoint-1912/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a55c1a4c5e3af7f2fb2bc0cd245a09dabd742dc24e7cb3741db1e11c7fe1a52
3
+ size 17210019
checkpoint-1912/tokenizer_config.json ADDED
@@ -0,0 +1,2070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "additional_special_tokens": [
2053
+ "<|eot_id|>",
2054
+ "<|eom_id|>"
2055
+ ],
2056
+ "bos_token": "<|begin_of_text|>",
2057
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}",
2058
+ "clean_up_tokenization_spaces": true,
2059
+ "eos_token": "<|eot_id|>",
2060
+ "extra_special_tokens": {},
2061
+ "model_input_names": [
2062
+ "input_ids",
2063
+ "attention_mask"
2064
+ ],
2065
+ "model_max_length": 16384,
2066
+ "pad_token": "<|eot_id|>",
2067
+ "padding_side": "right",
2068
+ "split_special_tokens": false,
2069
+ "tokenizer_class": "PreTrainedTokenizerFast"
2070
+ }
checkpoint-1912/trainer_state.json ADDED
@@ -0,0 +1,1402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.19295775890350342,
3
+ "best_model_checkpoint": "./xlam_Qlora_out_eval/checkpoint-1912",
4
+ "epoch": 1.3317663445634196,
5
+ "eval_steps": 478,
6
+ "global_step": 1912,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.006964394532950292,
13
+ "grad_norm": 0.29273906350135803,
14
+ "learning_rate": 1.5384615384615387e-06,
15
+ "loss": 1.8834,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.013928789065900584,
20
+ "grad_norm": 0.3107926845550537,
21
+ "learning_rate": 3.0769230769230774e-06,
22
+ "loss": 1.876,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.020893183598850874,
27
+ "grad_norm": 0.3098163902759552,
28
+ "learning_rate": 4.615384615384616e-06,
29
+ "loss": 1.9099,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.027857578131801168,
34
+ "grad_norm": 0.36715859174728394,
35
+ "learning_rate": 6.153846153846155e-06,
36
+ "loss": 1.8142,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.034821972664751455,
41
+ "grad_norm": 0.3511175215244293,
42
+ "learning_rate": 7.692307692307694e-06,
43
+ "loss": 1.7271,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04178636719770175,
48
+ "grad_norm": 0.3458055853843689,
49
+ "learning_rate": 9.230769230769232e-06,
50
+ "loss": 1.6073,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.04875076173065204,
55
+ "grad_norm": 0.37719297409057617,
56
+ "learning_rate": 1.076923076923077e-05,
57
+ "loss": 1.4997,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.055715156263602336,
62
+ "grad_norm": 0.30273741483688354,
63
+ "learning_rate": 1.230769230769231e-05,
64
+ "loss": 1.2914,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.06267955079655263,
69
+ "grad_norm": 0.3690374195575714,
70
+ "learning_rate": 1.3846153846153847e-05,
71
+ "loss": 1.0086,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.06964394532950291,
76
+ "grad_norm": 0.3111755847930908,
77
+ "learning_rate": 1.5384615384615387e-05,
78
+ "loss": 0.7469,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.0766083398624532,
83
+ "grad_norm": 0.13778822124004364,
84
+ "learning_rate": 1.6923076923076924e-05,
85
+ "loss": 0.5848,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.0835727343954035,
90
+ "grad_norm": 0.10629754513502121,
91
+ "learning_rate": 1.8461538461538465e-05,
92
+ "loss": 0.5366,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.09053712892835379,
97
+ "grad_norm": 0.09681010991334915,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5038,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.09750152346130408,
104
+ "grad_norm": 0.10756956040859222,
105
+ "learning_rate": 1.9952095808383235e-05,
106
+ "loss": 0.5002,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.10446591799425438,
111
+ "grad_norm": 0.10488647222518921,
112
+ "learning_rate": 1.9904191616766468e-05,
113
+ "loss": 0.5107,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.11143031252720467,
118
+ "grad_norm": 0.09359394758939743,
119
+ "learning_rate": 1.98562874251497e-05,
120
+ "loss": 0.4604,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.11839470706015495,
125
+ "grad_norm": 0.10392932593822479,
126
+ "learning_rate": 1.9808383233532935e-05,
127
+ "loss": 0.484,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.12535910159310526,
132
+ "grad_norm": 0.12986360490322113,
133
+ "learning_rate": 1.9760479041916168e-05,
134
+ "loss": 0.4527,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.13232349612605554,
139
+ "grad_norm": 0.14294420182704926,
140
+ "learning_rate": 1.97125748502994e-05,
141
+ "loss": 0.4661,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.13928789065900582,
146
+ "grad_norm": 0.18483737111091614,
147
+ "learning_rate": 1.9664670658682635e-05,
148
+ "loss": 0.4395,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.14625228519195613,
153
+ "grad_norm": 0.21114414930343628,
154
+ "learning_rate": 1.961676646706587e-05,
155
+ "loss": 0.4071,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.1532166797249064,
160
+ "grad_norm": 0.21779198944568634,
161
+ "learning_rate": 1.95688622754491e-05,
162
+ "loss": 0.3846,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.16018107425785671,
167
+ "grad_norm": 0.1294960230588913,
168
+ "learning_rate": 1.9520958083832338e-05,
169
+ "loss": 0.3952,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.167145468790807,
174
+ "grad_norm": 0.0927898958325386,
175
+ "learning_rate": 1.9473053892215568e-05,
176
+ "loss": 0.4112,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.1741098633237573,
181
+ "grad_norm": 0.10358591377735138,
182
+ "learning_rate": 1.9425149700598805e-05,
183
+ "loss": 0.406,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.18107425785670758,
188
+ "grad_norm": 0.13144026696681976,
189
+ "learning_rate": 1.9377245508982038e-05,
190
+ "loss": 0.4058,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.18803865238965786,
195
+ "grad_norm": 0.12745942175388336,
196
+ "learning_rate": 1.932934131736527e-05,
197
+ "loss": 0.3938,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.19500304692260817,
202
+ "grad_norm": 0.09832354635000229,
203
+ "learning_rate": 1.9281437125748505e-05,
204
+ "loss": 0.3819,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.20196744145555845,
209
+ "grad_norm": 0.0927552729845047,
210
+ "learning_rate": 1.9233532934131738e-05,
211
+ "loss": 0.3944,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.20893183598850876,
216
+ "grad_norm": 0.09627388417720795,
217
+ "learning_rate": 1.918562874251497e-05,
218
+ "loss": 0.348,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.21589623052145904,
223
+ "grad_norm": 0.09034324437379837,
224
+ "learning_rate": 1.9137724550898205e-05,
225
+ "loss": 0.3521,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.22286062505440934,
230
+ "grad_norm": 0.09156423062086105,
231
+ "learning_rate": 1.9089820359281438e-05,
232
+ "loss": 0.3663,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.22982501958735962,
237
+ "grad_norm": 0.0996803268790245,
238
+ "learning_rate": 1.904191616766467e-05,
239
+ "loss": 0.3347,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.2367894141203099,
244
+ "grad_norm": 0.09962292015552521,
245
+ "learning_rate": 1.8994011976047905e-05,
246
+ "loss": 0.373,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.2437538086532602,
251
+ "grad_norm": 0.08211114257574081,
252
+ "learning_rate": 1.894610778443114e-05,
253
+ "loss": 0.3641,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.2507182031862105,
258
+ "grad_norm": 0.10449781268835068,
259
+ "learning_rate": 1.889820359281437e-05,
260
+ "loss": 0.3358,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.2576825977191608,
265
+ "grad_norm": 0.11216866970062256,
266
+ "learning_rate": 1.8850299401197608e-05,
267
+ "loss": 0.372,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.2646469922521111,
272
+ "grad_norm": 0.11125332862138748,
273
+ "learning_rate": 1.8802395209580838e-05,
274
+ "loss": 0.3521,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.27161138678506136,
279
+ "grad_norm": 0.1620815545320511,
280
+ "learning_rate": 1.8754491017964075e-05,
281
+ "loss": 0.3726,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.27857578131801164,
286
+ "grad_norm": 0.11439554393291473,
287
+ "learning_rate": 1.8706586826347305e-05,
288
+ "loss": 0.3287,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.285540175850962,
293
+ "grad_norm": 0.10733096301555634,
294
+ "learning_rate": 1.865868263473054e-05,
295
+ "loss": 0.3459,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.29250457038391225,
300
+ "grad_norm": 0.12426561117172241,
301
+ "learning_rate": 1.8610778443113775e-05,
302
+ "loss": 0.3705,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.29946896491686253,
307
+ "grad_norm": 0.1148887649178505,
308
+ "learning_rate": 1.8562874251497008e-05,
309
+ "loss": 0.3216,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.3064333594498128,
314
+ "grad_norm": 0.10842527449131012,
315
+ "learning_rate": 1.851497005988024e-05,
316
+ "loss": 0.3502,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.31339775398276315,
321
+ "grad_norm": 0.11736515164375305,
322
+ "learning_rate": 1.8467065868263475e-05,
323
+ "loss": 0.3394,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.32036214851571343,
328
+ "grad_norm": 0.1381833553314209,
329
+ "learning_rate": 1.8419161676646708e-05,
330
+ "loss": 0.3166,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.3273265430486637,
335
+ "grad_norm": 0.10973569750785828,
336
+ "learning_rate": 1.837125748502994e-05,
337
+ "loss": 0.3245,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.3328980586750239,
342
+ "eval_loss": 0.3075329661369324,
343
+ "eval_runtime": 356.0276,
344
+ "eval_samples_per_second": 3.396,
345
+ "eval_steps_per_second": 1.699,
346
+ "step": 478
347
+ },
348
+ {
349
+ "epoch": 0.334290937581614,
350
+ "grad_norm": 0.13474580645561218,
351
+ "learning_rate": 1.8323353293413175e-05,
352
+ "loss": 0.2914,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.34125533211456427,
357
+ "grad_norm": 0.12812362611293793,
358
+ "learning_rate": 1.827544910179641e-05,
359
+ "loss": 0.3327,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.3482197266475146,
364
+ "grad_norm": 0.11661785840988159,
365
+ "learning_rate": 1.822754491017964e-05,
366
+ "loss": 0.3468,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.3551841211804649,
371
+ "grad_norm": 0.12990142405033112,
372
+ "learning_rate": 1.8179640718562878e-05,
373
+ "loss": 0.3318,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.36214851571341516,
378
+ "grad_norm": 0.1309327632188797,
379
+ "learning_rate": 1.8131736526946108e-05,
380
+ "loss": 0.2942,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.36911291024636544,
385
+ "grad_norm": 0.12246479839086533,
386
+ "learning_rate": 1.8083832335329345e-05,
387
+ "loss": 0.3751,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.3760773047793157,
392
+ "grad_norm": 0.14134123921394348,
393
+ "learning_rate": 1.8035928143712575e-05,
394
+ "loss": 0.3248,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.38304169931226606,
399
+ "grad_norm": 0.1433170884847641,
400
+ "learning_rate": 1.798802395209581e-05,
401
+ "loss": 0.3344,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.39000609384521634,
406
+ "grad_norm": 0.13364745676517487,
407
+ "learning_rate": 1.7940119760479045e-05,
408
+ "loss": 0.2928,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.3969704883781666,
413
+ "grad_norm": 0.13446496427059174,
414
+ "learning_rate": 1.7892215568862278e-05,
415
+ "loss": 0.3421,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.4039348829111169,
420
+ "grad_norm": 0.1586250364780426,
421
+ "learning_rate": 1.784431137724551e-05,
422
+ "loss": 0.3099,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.4108992774440672,
427
+ "grad_norm": 0.1283930391073227,
428
+ "learning_rate": 1.7796407185628745e-05,
429
+ "loss": 0.3123,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.4178636719770175,
434
+ "grad_norm": 0.14261852204799652,
435
+ "learning_rate": 1.7748502994011978e-05,
436
+ "loss": 0.2886,
437
+ "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.4248280665099678,
441
+ "grad_norm": 0.13329675793647766,
442
+ "learning_rate": 1.770059880239521e-05,
443
+ "loss": 0.3255,
444
+ "step": 610
445
+ },
446
+ {
447
+ "epoch": 0.4317924610429181,
448
+ "grad_norm": 0.13880160450935364,
449
+ "learning_rate": 1.7652694610778445e-05,
450
+ "loss": 0.326,
451
+ "step": 620
452
+ },
453
+ {
454
+ "epoch": 0.43875685557586835,
455
+ "grad_norm": 0.14388258755207062,
456
+ "learning_rate": 1.7604790419161678e-05,
457
+ "loss": 0.3269,
458
+ "step": 630
459
+ },
460
+ {
461
+ "epoch": 0.4457212501088187,
462
+ "grad_norm": 0.1571163535118103,
463
+ "learning_rate": 1.755688622754491e-05,
464
+ "loss": 0.2919,
465
+ "step": 640
466
+ },
467
+ {
468
+ "epoch": 0.45268564464176897,
469
+ "grad_norm": 0.15307635068893433,
470
+ "learning_rate": 1.7508982035928145e-05,
471
+ "loss": 0.2773,
472
+ "step": 650
473
+ },
474
+ {
475
+ "epoch": 0.45965003917471925,
476
+ "grad_norm": 0.1438165158033371,
477
+ "learning_rate": 1.7461077844311378e-05,
478
+ "loss": 0.2847,
479
+ "step": 660
480
+ },
481
+ {
482
+ "epoch": 0.46661443370766953,
483
+ "grad_norm": 0.15620216727256775,
484
+ "learning_rate": 1.741317365269461e-05,
485
+ "loss": 0.29,
486
+ "step": 670
487
+ },
488
+ {
489
+ "epoch": 0.4735788282406198,
490
+ "grad_norm": 0.1414933055639267,
491
+ "learning_rate": 1.7365269461077845e-05,
492
+ "loss": 0.2754,
493
+ "step": 680
494
+ },
495
+ {
496
+ "epoch": 0.48054322277357014,
497
+ "grad_norm": 0.15853798389434814,
498
+ "learning_rate": 1.7317365269461078e-05,
499
+ "loss": 0.3062,
500
+ "step": 690
501
+ },
502
+ {
503
+ "epoch": 0.4875076173065204,
504
+ "grad_norm": 0.17159980535507202,
505
+ "learning_rate": 1.726946107784431e-05,
506
+ "loss": 0.2953,
507
+ "step": 700
508
+ },
509
+ {
510
+ "epoch": 0.4944720118394707,
511
+ "grad_norm": 0.16937299072742462,
512
+ "learning_rate": 1.7221556886227548e-05,
513
+ "loss": 0.2895,
514
+ "step": 710
515
+ },
516
+ {
517
+ "epoch": 0.501436406372421,
518
+ "grad_norm": 0.1455833464860916,
519
+ "learning_rate": 1.717365269461078e-05,
520
+ "loss": 0.2912,
521
+ "step": 720
522
+ },
523
+ {
524
+ "epoch": 0.5084008009053713,
525
+ "grad_norm": 0.15824872255325317,
526
+ "learning_rate": 1.7125748502994015e-05,
527
+ "loss": 0.3012,
528
+ "step": 730
529
+ },
530
+ {
531
+ "epoch": 0.5153651954383216,
532
+ "grad_norm": 0.1499851644039154,
533
+ "learning_rate": 1.7077844311377248e-05,
534
+ "loss": 0.263,
535
+ "step": 740
536
+ },
537
+ {
538
+ "epoch": 0.5223295899712719,
539
+ "grad_norm": 0.1611323356628418,
540
+ "learning_rate": 1.702994011976048e-05,
541
+ "loss": 0.2595,
542
+ "step": 750
543
+ },
544
+ {
545
+ "epoch": 0.5292939845042222,
546
+ "grad_norm": 0.16144199669361115,
547
+ "learning_rate": 1.6982035928143714e-05,
548
+ "loss": 0.2939,
549
+ "step": 760
550
+ },
551
+ {
552
+ "epoch": 0.5362583790371724,
553
+ "grad_norm": 0.13433364033699036,
554
+ "learning_rate": 1.6934131736526948e-05,
555
+ "loss": 0.2817,
556
+ "step": 770
557
+ },
558
+ {
559
+ "epoch": 0.5432227735701227,
560
+ "grad_norm": 0.15188777446746826,
561
+ "learning_rate": 1.688622754491018e-05,
562
+ "loss": 0.2804,
563
+ "step": 780
564
+ },
565
+ {
566
+ "epoch": 0.550187168103073,
567
+ "grad_norm": 0.1530751883983612,
568
+ "learning_rate": 1.6838323353293414e-05,
569
+ "loss": 0.298,
570
+ "step": 790
571
+ },
572
+ {
573
+ "epoch": 0.5571515626360233,
574
+ "grad_norm": 0.14582635462284088,
575
+ "learning_rate": 1.6790419161676648e-05,
576
+ "loss": 0.3177,
577
+ "step": 800
578
+ },
579
+ {
580
+ "epoch": 0.5641159571689737,
581
+ "grad_norm": 0.174430251121521,
582
+ "learning_rate": 1.674251497005988e-05,
583
+ "loss": 0.2624,
584
+ "step": 810
585
+ },
586
+ {
587
+ "epoch": 0.571080351701924,
588
+ "grad_norm": 0.1558169573545456,
589
+ "learning_rate": 1.6694610778443114e-05,
590
+ "loss": 0.2631,
591
+ "step": 820
592
+ },
593
+ {
594
+ "epoch": 0.5780447462348742,
595
+ "grad_norm": 0.17108450829982758,
596
+ "learning_rate": 1.6646706586826348e-05,
597
+ "loss": 0.2903,
598
+ "step": 830
599
+ },
600
+ {
601
+ "epoch": 0.5850091407678245,
602
+ "grad_norm": 0.16796821355819702,
603
+ "learning_rate": 1.659880239520958e-05,
604
+ "loss": 0.2928,
605
+ "step": 840
606
+ },
607
+ {
608
+ "epoch": 0.5919735353007748,
609
+ "grad_norm": 0.16556940972805023,
610
+ "learning_rate": 1.6550898203592814e-05,
611
+ "loss": 0.2882,
612
+ "step": 850
613
+ },
614
+ {
615
+ "epoch": 0.5989379298337251,
616
+ "grad_norm": 0.1716109961271286,
617
+ "learning_rate": 1.650299401197605e-05,
618
+ "loss": 0.2814,
619
+ "step": 860
620
+ },
621
+ {
622
+ "epoch": 0.6059023243666753,
623
+ "grad_norm": 0.1695253700017929,
624
+ "learning_rate": 1.645508982035928e-05,
625
+ "loss": 0.2909,
626
+ "step": 870
627
+ },
628
+ {
629
+ "epoch": 0.6128667188996256,
630
+ "grad_norm": 0.1896178424358368,
631
+ "learning_rate": 1.6407185628742518e-05,
632
+ "loss": 0.2909,
633
+ "step": 880
634
+ },
635
+ {
636
+ "epoch": 0.6198311134325759,
637
+ "grad_norm": 0.1624131202697754,
638
+ "learning_rate": 1.6359281437125748e-05,
639
+ "loss": 0.2672,
640
+ "step": 890
641
+ },
642
+ {
643
+ "epoch": 0.6267955079655263,
644
+ "grad_norm": 0.16821731626987457,
645
+ "learning_rate": 1.6311377245508984e-05,
646
+ "loss": 0.2626,
647
+ "step": 900
648
+ },
649
+ {
650
+ "epoch": 0.6337599024984766,
651
+ "grad_norm": 0.17026785016059875,
652
+ "learning_rate": 1.6263473053892214e-05,
653
+ "loss": 0.2735,
654
+ "step": 910
655
+ },
656
+ {
657
+ "epoch": 0.6407242970314269,
658
+ "grad_norm": 0.19429761171340942,
659
+ "learning_rate": 1.621556886227545e-05,
660
+ "loss": 0.2605,
661
+ "step": 920
662
+ },
663
+ {
664
+ "epoch": 0.6476886915643771,
665
+ "grad_norm": 0.16931040585041046,
666
+ "learning_rate": 1.6167664670658684e-05,
667
+ "loss": 0.2722,
668
+ "step": 930
669
+ },
670
+ {
671
+ "epoch": 0.6546530860973274,
672
+ "grad_norm": 0.20588335394859314,
673
+ "learning_rate": 1.6119760479041918e-05,
674
+ "loss": 0.301,
675
+ "step": 940
676
+ },
677
+ {
678
+ "epoch": 0.6616174806302777,
679
+ "grad_norm": 0.16699948906898499,
680
+ "learning_rate": 1.607185628742515e-05,
681
+ "loss": 0.2646,
682
+ "step": 950
683
+ },
684
+ {
685
+ "epoch": 0.6657961173500478,
686
+ "eval_loss": 0.2465677410364151,
687
+ "eval_runtime": 356.1016,
688
+ "eval_samples_per_second": 3.395,
689
+ "eval_steps_per_second": 1.699,
690
+ "step": 956
691
+ },
692
+ {
693
+ "epoch": 0.668581875163228,
694
+ "grad_norm": 0.20734217762947083,
695
+ "learning_rate": 1.6023952095808384e-05,
696
+ "loss": 0.2476,
697
+ "step": 960
698
+ },
699
+ {
700
+ "epoch": 0.6755462696961783,
701
+ "grad_norm": 0.21317210793495178,
702
+ "learning_rate": 1.5976047904191618e-05,
703
+ "loss": 0.2434,
704
+ "step": 970
705
+ },
706
+ {
707
+ "epoch": 0.6825106642291285,
708
+ "grad_norm": 0.19125515222549438,
709
+ "learning_rate": 1.592814371257485e-05,
710
+ "loss": 0.2823,
711
+ "step": 980
712
+ },
713
+ {
714
+ "epoch": 0.6894750587620788,
715
+ "grad_norm": 0.1651381254196167,
716
+ "learning_rate": 1.5880239520958084e-05,
717
+ "loss": 0.2769,
718
+ "step": 990
719
+ },
720
+ {
721
+ "epoch": 0.6964394532950292,
722
+ "grad_norm": 0.18061324954032898,
723
+ "learning_rate": 1.583233532934132e-05,
724
+ "loss": 0.2788,
725
+ "step": 1000
726
+ },
727
+ {
728
+ "epoch": 0.7034038478279795,
729
+ "grad_norm": 0.17788180708885193,
730
+ "learning_rate": 1.578443113772455e-05,
731
+ "loss": 0.2596,
732
+ "step": 1010
733
+ },
734
+ {
735
+ "epoch": 0.7103682423609298,
736
+ "grad_norm": 0.19635842740535736,
737
+ "learning_rate": 1.5736526946107788e-05,
738
+ "loss": 0.2391,
739
+ "step": 1020
740
+ },
741
+ {
742
+ "epoch": 0.71733263689388,
743
+ "grad_norm": 0.18000468611717224,
744
+ "learning_rate": 1.5688622754491018e-05,
745
+ "loss": 0.2589,
746
+ "step": 1030
747
+ },
748
+ {
749
+ "epoch": 0.7242970314268303,
750
+ "grad_norm": 0.18016283214092255,
751
+ "learning_rate": 1.5640718562874254e-05,
752
+ "loss": 0.2673,
753
+ "step": 1040
754
+ },
755
+ {
756
+ "epoch": 0.7312614259597806,
757
+ "grad_norm": 0.2085445076227188,
758
+ "learning_rate": 1.5592814371257484e-05,
759
+ "loss": 0.295,
760
+ "step": 1050
761
+ },
762
+ {
763
+ "epoch": 0.7382258204927309,
764
+ "grad_norm": 0.1937682181596756,
765
+ "learning_rate": 1.554491017964072e-05,
766
+ "loss": 0.2741,
767
+ "step": 1060
768
+ },
769
+ {
770
+ "epoch": 0.7451902150256812,
771
+ "grad_norm": 0.18496188521385193,
772
+ "learning_rate": 1.5497005988023954e-05,
773
+ "loss": 0.2485,
774
+ "step": 1070
775
+ },
776
+ {
777
+ "epoch": 0.7521546095586314,
778
+ "grad_norm": 0.21024686098098755,
779
+ "learning_rate": 1.5449101796407188e-05,
780
+ "loss": 0.2569,
781
+ "step": 1080
782
+ },
783
+ {
784
+ "epoch": 0.7591190040915818,
785
+ "grad_norm": 0.1960270255804062,
786
+ "learning_rate": 1.540119760479042e-05,
787
+ "loss": 0.2335,
788
+ "step": 1090
789
+ },
790
+ {
791
+ "epoch": 0.7660833986245321,
792
+ "grad_norm": 0.19557876884937286,
793
+ "learning_rate": 1.5353293413173654e-05,
794
+ "loss": 0.279,
795
+ "step": 1100
796
+ },
797
+ {
798
+ "epoch": 0.7730477931574824,
799
+ "grad_norm": 0.17300210893154144,
800
+ "learning_rate": 1.5305389221556888e-05,
801
+ "loss": 0.249,
802
+ "step": 1110
803
+ },
804
+ {
805
+ "epoch": 0.7800121876904327,
806
+ "grad_norm": 0.1768539994955063,
807
+ "learning_rate": 1.5257485029940121e-05,
808
+ "loss": 0.2415,
809
+ "step": 1120
810
+ },
811
+ {
812
+ "epoch": 0.786976582223383,
813
+ "grad_norm": 0.20632915198802948,
814
+ "learning_rate": 1.5209580838323354e-05,
815
+ "loss": 0.2312,
816
+ "step": 1130
817
+ },
818
+ {
819
+ "epoch": 0.7939409767563332,
820
+ "grad_norm": 0.1948348730802536,
821
+ "learning_rate": 1.516167664670659e-05,
822
+ "loss": 0.2415,
823
+ "step": 1140
824
+ },
825
+ {
826
+ "epoch": 0.8009053712892835,
827
+ "grad_norm": 0.16288821399211884,
828
+ "learning_rate": 1.511377245508982e-05,
829
+ "loss": 0.2571,
830
+ "step": 1150
831
+ },
832
+ {
833
+ "epoch": 0.8078697658222338,
834
+ "grad_norm": 0.21365194022655487,
835
+ "learning_rate": 1.5065868263473056e-05,
836
+ "loss": 0.2594,
837
+ "step": 1160
838
+ },
839
+ {
840
+ "epoch": 0.8148341603551841,
841
+ "grad_norm": 0.20960865914821625,
842
+ "learning_rate": 1.5017964071856287e-05,
843
+ "loss": 0.2537,
844
+ "step": 1170
845
+ },
846
+ {
847
+ "epoch": 0.8217985548881344,
848
+ "grad_norm": 0.20458345115184784,
849
+ "learning_rate": 1.4970059880239522e-05,
850
+ "loss": 0.2675,
851
+ "step": 1180
852
+ },
853
+ {
854
+ "epoch": 0.8287629494210847,
855
+ "grad_norm": 0.22388549149036407,
856
+ "learning_rate": 1.4922155688622754e-05,
857
+ "loss": 0.2525,
858
+ "step": 1190
859
+ },
860
+ {
861
+ "epoch": 0.835727343954035,
862
+ "grad_norm": 0.2139691859483719,
863
+ "learning_rate": 1.4874251497005989e-05,
864
+ "loss": 0.251,
865
+ "step": 1200
866
+ },
867
+ {
868
+ "epoch": 0.8426917384869853,
869
+ "grad_norm": 0.18761980533599854,
870
+ "learning_rate": 1.4826347305389224e-05,
871
+ "loss": 0.2328,
872
+ "step": 1210
873
+ },
874
+ {
875
+ "epoch": 0.8496561330199356,
876
+ "grad_norm": 0.183973029255867,
877
+ "learning_rate": 1.4778443113772456e-05,
878
+ "loss": 0.2239,
879
+ "step": 1220
880
+ },
881
+ {
882
+ "epoch": 0.8566205275528859,
883
+ "grad_norm": 0.23718492686748505,
884
+ "learning_rate": 1.473053892215569e-05,
885
+ "loss": 0.2423,
886
+ "step": 1230
887
+ },
888
+ {
889
+ "epoch": 0.8635849220858361,
890
+ "grad_norm": 0.23875045776367188,
891
+ "learning_rate": 1.4682634730538922e-05,
892
+ "loss": 0.2365,
893
+ "step": 1240
894
+ },
895
+ {
896
+ "epoch": 0.8705493166187864,
897
+ "grad_norm": 0.21570812165737152,
898
+ "learning_rate": 1.4634730538922157e-05,
899
+ "loss": 0.268,
900
+ "step": 1250
901
+ },
902
+ {
903
+ "epoch": 0.8775137111517367,
904
+ "grad_norm": 0.20839928090572357,
905
+ "learning_rate": 1.4586826347305389e-05,
906
+ "loss": 0.2502,
907
+ "step": 1260
908
+ },
909
+ {
910
+ "epoch": 0.884478105684687,
911
+ "grad_norm": 0.21605758368968964,
912
+ "learning_rate": 1.4538922155688624e-05,
913
+ "loss": 0.2365,
914
+ "step": 1270
915
+ },
916
+ {
917
+ "epoch": 0.8914425002176374,
918
+ "grad_norm": 0.23300880193710327,
919
+ "learning_rate": 1.4491017964071859e-05,
920
+ "loss": 0.2404,
921
+ "step": 1280
922
+ },
923
+ {
924
+ "epoch": 0.8984068947505877,
925
+ "grad_norm": 0.21796733140945435,
926
+ "learning_rate": 1.444311377245509e-05,
927
+ "loss": 0.2704,
928
+ "step": 1290
929
+ },
930
+ {
931
+ "epoch": 0.9053712892835379,
932
+ "grad_norm": 0.25555598735809326,
933
+ "learning_rate": 1.4395209580838326e-05,
934
+ "loss": 0.2674,
935
+ "step": 1300
936
+ },
937
+ {
938
+ "epoch": 0.9123356838164882,
939
+ "grad_norm": 0.22284413874149323,
940
+ "learning_rate": 1.4347305389221557e-05,
941
+ "loss": 0.2471,
942
+ "step": 1310
943
+ },
944
+ {
945
+ "epoch": 0.9193000783494385,
946
+ "grad_norm": 0.22234566509723663,
947
+ "learning_rate": 1.4299401197604792e-05,
948
+ "loss": 0.2529,
949
+ "step": 1320
950
+ },
951
+ {
952
+ "epoch": 0.9262644728823888,
953
+ "grad_norm": 0.221883624792099,
954
+ "learning_rate": 1.4251497005988024e-05,
955
+ "loss": 0.1932,
956
+ "step": 1330
957
+ },
958
+ {
959
+ "epoch": 0.9332288674153391,
960
+ "grad_norm": 0.22873520851135254,
961
+ "learning_rate": 1.4203592814371259e-05,
962
+ "loss": 0.2412,
963
+ "step": 1340
964
+ },
965
+ {
966
+ "epoch": 0.9401932619482893,
967
+ "grad_norm": 0.21307863295078278,
968
+ "learning_rate": 1.4155688622754492e-05,
969
+ "loss": 0.2384,
970
+ "step": 1350
971
+ },
972
+ {
973
+ "epoch": 0.9471576564812396,
974
+ "grad_norm": 0.21296364068984985,
975
+ "learning_rate": 1.4107784431137726e-05,
976
+ "loss": 0.2318,
977
+ "step": 1360
978
+ },
979
+ {
980
+ "epoch": 0.95412205101419,
981
+ "grad_norm": 0.21060685813426971,
982
+ "learning_rate": 1.405988023952096e-05,
983
+ "loss": 0.2323,
984
+ "step": 1370
985
+ },
986
+ {
987
+ "epoch": 0.9610864455471403,
988
+ "grad_norm": 0.23930418491363525,
989
+ "learning_rate": 1.4011976047904192e-05,
990
+ "loss": 0.2398,
991
+ "step": 1380
992
+ },
993
+ {
994
+ "epoch": 0.9680508400800906,
995
+ "grad_norm": 0.22837185859680176,
996
+ "learning_rate": 1.3964071856287427e-05,
997
+ "loss": 0.233,
998
+ "step": 1390
999
+ },
1000
+ {
1001
+ "epoch": 0.9750152346130408,
1002
+ "grad_norm": 0.21798169612884521,
1003
+ "learning_rate": 1.3916167664670659e-05,
1004
+ "loss": 0.2161,
1005
+ "step": 1400
1006
+ },
1007
+ {
1008
+ "epoch": 0.9819796291459911,
1009
+ "grad_norm": 0.2846814692020416,
1010
+ "learning_rate": 1.3868263473053894e-05,
1011
+ "loss": 0.2301,
1012
+ "step": 1410
1013
+ },
1014
+ {
1015
+ "epoch": 0.9889440236789414,
1016
+ "grad_norm": 0.22030217945575714,
1017
+ "learning_rate": 1.3820359281437127e-05,
1018
+ "loss": 0.2017,
1019
+ "step": 1420
1020
+ },
1021
+ {
1022
+ "epoch": 0.9959084182118917,
1023
+ "grad_norm": 0.25554022192955017,
1024
+ "learning_rate": 1.377245508982036e-05,
1025
+ "loss": 0.2296,
1026
+ "step": 1430
1027
+ },
1028
+ {
1029
+ "epoch": 0.9986941760250718,
1030
+ "eval_loss": 0.21637588739395142,
1031
+ "eval_runtime": 356.0721,
1032
+ "eval_samples_per_second": 3.395,
1033
+ "eval_steps_per_second": 1.699,
1034
+ "step": 1434
1035
+ },
1036
+ {
1037
+ "epoch": 1.0030469226081657,
1038
+ "grad_norm": 0.2862723469734192,
1039
+ "learning_rate": 1.3724550898203594e-05,
1040
+ "loss": 0.2538,
1041
+ "step": 1440
1042
+ },
1043
+ {
1044
+ "epoch": 1.010011317141116,
1045
+ "grad_norm": 0.2589089870452881,
1046
+ "learning_rate": 1.3676646706586827e-05,
1047
+ "loss": 0.2196,
1048
+ "step": 1450
1049
+ },
1050
+ {
1051
+ "epoch": 1.0169757116740663,
1052
+ "grad_norm": 0.27893784642219543,
1053
+ "learning_rate": 1.362874251497006e-05,
1054
+ "loss": 0.1935,
1055
+ "step": 1460
1056
+ },
1057
+ {
1058
+ "epoch": 1.0239401062070166,
1059
+ "grad_norm": 0.2530602514743805,
1060
+ "learning_rate": 1.3580838323353294e-05,
1061
+ "loss": 0.2248,
1062
+ "step": 1470
1063
+ },
1064
+ {
1065
+ "epoch": 1.0309045007399669,
1066
+ "grad_norm": 0.24902446568012238,
1067
+ "learning_rate": 1.3532934131736529e-05,
1068
+ "loss": 0.224,
1069
+ "step": 1480
1070
+ },
1071
+ {
1072
+ "epoch": 1.0378688952729171,
1073
+ "grad_norm": 0.27132269740104675,
1074
+ "learning_rate": 1.3485029940119762e-05,
1075
+ "loss": 0.2208,
1076
+ "step": 1490
1077
+ },
1078
+ {
1079
+ "epoch": 1.0448332898058674,
1080
+ "grad_norm": 0.2570977807044983,
1081
+ "learning_rate": 1.3437125748502996e-05,
1082
+ "loss": 0.2103,
1083
+ "step": 1500
1084
+ },
1085
+ {
1086
+ "epoch": 1.0517976843388177,
1087
+ "grad_norm": 0.2994561493396759,
1088
+ "learning_rate": 1.3389221556886229e-05,
1089
+ "loss": 0.2243,
1090
+ "step": 1510
1091
+ },
1092
+ {
1093
+ "epoch": 1.058762078871768,
1094
+ "grad_norm": 0.22994251549243927,
1095
+ "learning_rate": 1.3341317365269462e-05,
1096
+ "loss": 0.2217,
1097
+ "step": 1520
1098
+ },
1099
+ {
1100
+ "epoch": 1.0657264734047183,
1101
+ "grad_norm": 0.22706159949302673,
1102
+ "learning_rate": 1.3293413173652696e-05,
1103
+ "loss": 0.2219,
1104
+ "step": 1530
1105
+ },
1106
+ {
1107
+ "epoch": 1.0726908679376688,
1108
+ "grad_norm": 0.26584964990615845,
1109
+ "learning_rate": 1.3245508982035929e-05,
1110
+ "loss": 0.2329,
1111
+ "step": 1540
1112
+ },
1113
+ {
1114
+ "epoch": 1.079655262470619,
1115
+ "grad_norm": 0.2549090087413788,
1116
+ "learning_rate": 1.3197604790419162e-05,
1117
+ "loss": 0.2235,
1118
+ "step": 1550
1119
+ },
1120
+ {
1121
+ "epoch": 1.0866196570035693,
1122
+ "grad_norm": 0.27169135212898254,
1123
+ "learning_rate": 1.3149700598802397e-05,
1124
+ "loss": 0.2332,
1125
+ "step": 1560
1126
+ },
1127
+ {
1128
+ "epoch": 1.0935840515365196,
1129
+ "grad_norm": 0.26733773946762085,
1130
+ "learning_rate": 1.3101796407185629e-05,
1131
+ "loss": 0.2062,
1132
+ "step": 1570
1133
+ },
1134
+ {
1135
+ "epoch": 1.1005484460694699,
1136
+ "grad_norm": 0.3222082853317261,
1137
+ "learning_rate": 1.3053892215568864e-05,
1138
+ "loss": 0.2111,
1139
+ "step": 1580
1140
+ },
1141
+ {
1142
+ "epoch": 1.1075128406024202,
1143
+ "grad_norm": 0.20403257012367249,
1144
+ "learning_rate": 1.3005988023952097e-05,
1145
+ "loss": 0.226,
1146
+ "step": 1590
1147
+ },
1148
+ {
1149
+ "epoch": 1.1144772351353704,
1150
+ "grad_norm": 0.2658451199531555,
1151
+ "learning_rate": 1.295808383233533e-05,
1152
+ "loss": 0.2013,
1153
+ "step": 1600
1154
+ },
1155
+ {
1156
+ "epoch": 1.1214416296683207,
1157
+ "grad_norm": 0.2719573676586151,
1158
+ "learning_rate": 1.2910179640718564e-05,
1159
+ "loss": 0.2036,
1160
+ "step": 1610
1161
+ },
1162
+ {
1163
+ "epoch": 1.128406024201271,
1164
+ "grad_norm": 0.29076331853866577,
1165
+ "learning_rate": 1.2862275449101797e-05,
1166
+ "loss": 0.2162,
1167
+ "step": 1620
1168
+ },
1169
+ {
1170
+ "epoch": 1.1353704187342213,
1171
+ "grad_norm": 0.2324078530073166,
1172
+ "learning_rate": 1.2814371257485032e-05,
1173
+ "loss": 0.2002,
1174
+ "step": 1630
1175
+ },
1176
+ {
1177
+ "epoch": 1.1423348132671716,
1178
+ "grad_norm": 0.27219468355178833,
1179
+ "learning_rate": 1.2766467065868264e-05,
1180
+ "loss": 0.2238,
1181
+ "step": 1640
1182
+ },
1183
+ {
1184
+ "epoch": 1.1492992078001218,
1185
+ "grad_norm": 0.22101271152496338,
1186
+ "learning_rate": 1.2718562874251499e-05,
1187
+ "loss": 0.1845,
1188
+ "step": 1650
1189
+ },
1190
+ {
1191
+ "epoch": 1.1562636023330721,
1192
+ "grad_norm": 0.25101369619369507,
1193
+ "learning_rate": 1.267065868263473e-05,
1194
+ "loss": 0.2296,
1195
+ "step": 1660
1196
+ },
1197
+ {
1198
+ "epoch": 1.1632279968660224,
1199
+ "grad_norm": 0.25895142555236816,
1200
+ "learning_rate": 1.2622754491017965e-05,
1201
+ "loss": 0.2271,
1202
+ "step": 1670
1203
+ },
1204
+ {
1205
+ "epoch": 1.1701923913989727,
1206
+ "grad_norm": 0.26405826210975647,
1207
+ "learning_rate": 1.2574850299401197e-05,
1208
+ "loss": 0.2159,
1209
+ "step": 1680
1210
+ },
1211
+ {
1212
+ "epoch": 1.177156785931923,
1213
+ "grad_norm": 0.23231615126132965,
1214
+ "learning_rate": 1.2526946107784432e-05,
1215
+ "loss": 0.1916,
1216
+ "step": 1690
1217
+ },
1218
+ {
1219
+ "epoch": 1.1841211804648732,
1220
+ "grad_norm": 0.3211207985877991,
1221
+ "learning_rate": 1.2479041916167665e-05,
1222
+ "loss": 0.2154,
1223
+ "step": 1700
1224
+ },
1225
+ {
1226
+ "epoch": 1.1910855749978237,
1227
+ "grad_norm": 0.2948794662952423,
1228
+ "learning_rate": 1.2431137724550899e-05,
1229
+ "loss": 0.1999,
1230
+ "step": 1710
1231
+ },
1232
+ {
1233
+ "epoch": 1.198049969530774,
1234
+ "grad_norm": 0.28081706166267395,
1235
+ "learning_rate": 1.2383233532934134e-05,
1236
+ "loss": 0.2301,
1237
+ "step": 1720
1238
+ },
1239
+ {
1240
+ "epoch": 1.2050143640637243,
1241
+ "grad_norm": 0.296283096075058,
1242
+ "learning_rate": 1.2335329341317365e-05,
1243
+ "loss": 0.2164,
1244
+ "step": 1730
1245
+ },
1246
+ {
1247
+ "epoch": 1.2119787585966746,
1248
+ "grad_norm": 0.29658278822898865,
1249
+ "learning_rate": 1.22874251497006e-05,
1250
+ "loss": 0.1916,
1251
+ "step": 1740
1252
+ },
1253
+ {
1254
+ "epoch": 1.2189431531296249,
1255
+ "grad_norm": 0.2917250990867615,
1256
+ "learning_rate": 1.2239520958083832e-05,
1257
+ "loss": 0.2045,
1258
+ "step": 1750
1259
+ },
1260
+ {
1261
+ "epoch": 1.2259075476625751,
1262
+ "grad_norm": 0.35840919613838196,
1263
+ "learning_rate": 1.2191616766467067e-05,
1264
+ "loss": 0.1965,
1265
+ "step": 1760
1266
+ },
1267
+ {
1268
+ "epoch": 1.2328719421955254,
1269
+ "grad_norm": 0.2819244861602783,
1270
+ "learning_rate": 1.2143712574850299e-05,
1271
+ "loss": 0.2032,
1272
+ "step": 1770
1273
+ },
1274
+ {
1275
+ "epoch": 1.2398363367284757,
1276
+ "grad_norm": 0.3518809974193573,
1277
+ "learning_rate": 1.2095808383233534e-05,
1278
+ "loss": 0.2043,
1279
+ "step": 1780
1280
+ },
1281
+ {
1282
+ "epoch": 1.246800731261426,
1283
+ "grad_norm": 0.3232385516166687,
1284
+ "learning_rate": 1.2047904191616769e-05,
1285
+ "loss": 0.23,
1286
+ "step": 1790
1287
+ },
1288
+ {
1289
+ "epoch": 1.2537651257943763,
1290
+ "grad_norm": 0.23131519556045532,
1291
+ "learning_rate": 1.2e-05,
1292
+ "loss": 0.2106,
1293
+ "step": 1800
1294
+ },
1295
+ {
1296
+ "epoch": 1.2607295203273265,
1297
+ "grad_norm": 0.30877017974853516,
1298
+ "learning_rate": 1.1952095808383235e-05,
1299
+ "loss": 0.2088,
1300
+ "step": 1810
1301
+ },
1302
+ {
1303
+ "epoch": 1.2676939148602768,
1304
+ "grad_norm": 0.36684754490852356,
1305
+ "learning_rate": 1.1904191616766467e-05,
1306
+ "loss": 0.2024,
1307
+ "step": 1820
1308
+ },
1309
+ {
1310
+ "epoch": 1.274658309393227,
1311
+ "grad_norm": 0.21124888956546783,
1312
+ "learning_rate": 1.1856287425149702e-05,
1313
+ "loss": 0.1929,
1314
+ "step": 1830
1315
+ },
1316
+ {
1317
+ "epoch": 1.2816227039261774,
1318
+ "grad_norm": 0.2815339267253876,
1319
+ "learning_rate": 1.1808383233532934e-05,
1320
+ "loss": 0.1973,
1321
+ "step": 1840
1322
+ },
1323
+ {
1324
+ "epoch": 1.2885870984591277,
1325
+ "grad_norm": 0.3017849922180176,
1326
+ "learning_rate": 1.1760479041916169e-05,
1327
+ "loss": 0.233,
1328
+ "step": 1850
1329
+ },
1330
+ {
1331
+ "epoch": 1.295551492992078,
1332
+ "grad_norm": 0.3456547260284424,
1333
+ "learning_rate": 1.1712574850299404e-05,
1334
+ "loss": 0.1769,
1335
+ "step": 1860
1336
+ },
1337
+ {
1338
+ "epoch": 1.3025158875250282,
1339
+ "grad_norm": 0.28210344910621643,
1340
+ "learning_rate": 1.1664670658682635e-05,
1341
+ "loss": 0.204,
1342
+ "step": 1870
1343
+ },
1344
+ {
1345
+ "epoch": 1.3094802820579785,
1346
+ "grad_norm": 0.3701432943344116,
1347
+ "learning_rate": 1.161676646706587e-05,
1348
+ "loss": 0.1923,
1349
+ "step": 1880
1350
+ },
1351
+ {
1352
+ "epoch": 1.3164446765909288,
1353
+ "grad_norm": 0.3656926453113556,
1354
+ "learning_rate": 1.1568862275449102e-05,
1355
+ "loss": 0.2136,
1356
+ "step": 1890
1357
+ },
1358
+ {
1359
+ "epoch": 1.323409071123879,
1360
+ "grad_norm": 0.29142090678215027,
1361
+ "learning_rate": 1.1520958083832337e-05,
1362
+ "loss": 0.1957,
1363
+ "step": 1900
1364
+ },
1365
+ {
1366
+ "epoch": 1.3303734656568293,
1367
+ "grad_norm": 0.2799660563468933,
1368
+ "learning_rate": 1.1473053892215569e-05,
1369
+ "loss": 0.2009,
1370
+ "step": 1910
1371
+ },
1372
+ {
1373
+ "epoch": 1.3317663445634196,
1374
+ "eval_loss": 0.19295775890350342,
1375
+ "eval_runtime": 356.0153,
1376
+ "eval_samples_per_second": 3.396,
1377
+ "eval_steps_per_second": 1.699,
1378
+ "step": 1912
1379
+ }
1380
+ ],
1381
+ "logging_steps": 10,
1382
+ "max_steps": 4305,
1383
+ "num_input_tokens_seen": 0,
1384
+ "num_train_epochs": 3,
1385
+ "save_steps": 478,
1386
+ "stateful_callbacks": {
1387
+ "TrainerControl": {
1388
+ "args": {
1389
+ "should_epoch_stop": false,
1390
+ "should_evaluate": false,
1391
+ "should_log": false,
1392
+ "should_save": true,
1393
+ "should_training_stop": false
1394
+ },
1395
+ "attributes": {}
1396
+ }
1397
+ },
1398
+ "total_flos": 8.929647234254438e+17,
1399
+ "train_batch_size": 2,
1400
+ "trial_name": null,
1401
+ "trial_params": null
1402
+ }
checkpoint-1912/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:956bd3fe04334201160cff0d2e3faf62e32c6c8f709339d63e01a7d4b73b24d0
3
+ size 5560
checkpoint-2390/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Salesforce/Llama-xLAM-2-8b-fc-r
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
checkpoint-2390/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "k_proj",
32
+ "up_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
checkpoint-2390/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25fe76b45b61c738c5af50c849eb346dfaeeb3102c0a7abd8c16d6d6142a277
3
+ size 671149168
checkpoint-2390/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b30748271c159d0ef28ff2f39abf3a369e9ae422ab05f0facb7717c6f1212b
3
+ size 1342555602
checkpoint-2390/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e45c7e2a85335ad5036342a2571df9086fcca4257cbf7e27bd3cb7490778b40e
3
+ size 14244
checkpoint-2390/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32caa7f4861b8b3bb4853ddaed10fc51e3a5729a353a3fa812005040b9a0cd89
3
+ size 1064
checkpoint-2390/special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|eot_id|>",
4
+ "<|eom_id|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|begin_of_text|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|eot_id|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|eot_id|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
checkpoint-2390/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a55c1a4c5e3af7f2fb2bc0cd245a09dabd742dc24e7cb3741db1e11c7fe1a52
3
+ size 17210019
checkpoint-2390/tokenizer_config.json ADDED
@@ -0,0 +1,2070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "additional_special_tokens": [
2053
+ "<|eot_id|>",
2054
+ "<|eom_id|>"
2055
+ ],
2056
+ "bos_token": "<|begin_of_text|>",
2057
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}",
2058
+ "clean_up_tokenization_spaces": true,
2059
+ "eos_token": "<|eot_id|>",
2060
+ "extra_special_tokens": {},
2061
+ "model_input_names": [
2062
+ "input_ids",
2063
+ "attention_mask"
2064
+ ],
2065
+ "model_max_length": 16384,
2066
+ "pad_token": "<|eot_id|>",
2067
+ "padding_side": "right",
2068
+ "split_special_tokens": false,
2069
+ "tokenizer_class": "PreTrainedTokenizerFast"
2070
+ }
checkpoint-2390/trainer_state.json ADDED
@@ -0,0 +1,1746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.17429304122924805,
3
+ "best_model_checkpoint": "./xlam_Qlora_out_eval/checkpoint-2390",
4
+ "epoch": 1.6646644032384434,
5
+ "eval_steps": 478,
6
+ "global_step": 2390,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.006964394532950292,
13
+ "grad_norm": 0.29273906350135803,
14
+ "learning_rate": 1.5384615384615387e-06,
15
+ "loss": 1.8834,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.013928789065900584,
20
+ "grad_norm": 0.3107926845550537,
21
+ "learning_rate": 3.0769230769230774e-06,
22
+ "loss": 1.876,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.020893183598850874,
27
+ "grad_norm": 0.3098163902759552,
28
+ "learning_rate": 4.615384615384616e-06,
29
+ "loss": 1.9099,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.027857578131801168,
34
+ "grad_norm": 0.36715859174728394,
35
+ "learning_rate": 6.153846153846155e-06,
36
+ "loss": 1.8142,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.034821972664751455,
41
+ "grad_norm": 0.3511175215244293,
42
+ "learning_rate": 7.692307692307694e-06,
43
+ "loss": 1.7271,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04178636719770175,
48
+ "grad_norm": 0.3458055853843689,
49
+ "learning_rate": 9.230769230769232e-06,
50
+ "loss": 1.6073,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.04875076173065204,
55
+ "grad_norm": 0.37719297409057617,
56
+ "learning_rate": 1.076923076923077e-05,
57
+ "loss": 1.4997,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.055715156263602336,
62
+ "grad_norm": 0.30273741483688354,
63
+ "learning_rate": 1.230769230769231e-05,
64
+ "loss": 1.2914,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.06267955079655263,
69
+ "grad_norm": 0.3690374195575714,
70
+ "learning_rate": 1.3846153846153847e-05,
71
+ "loss": 1.0086,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.06964394532950291,
76
+ "grad_norm": 0.3111755847930908,
77
+ "learning_rate": 1.5384615384615387e-05,
78
+ "loss": 0.7469,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.0766083398624532,
83
+ "grad_norm": 0.13778822124004364,
84
+ "learning_rate": 1.6923076923076924e-05,
85
+ "loss": 0.5848,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.0835727343954035,
90
+ "grad_norm": 0.10629754513502121,
91
+ "learning_rate": 1.8461538461538465e-05,
92
+ "loss": 0.5366,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.09053712892835379,
97
+ "grad_norm": 0.09681010991334915,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5038,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.09750152346130408,
104
+ "grad_norm": 0.10756956040859222,
105
+ "learning_rate": 1.9952095808383235e-05,
106
+ "loss": 0.5002,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.10446591799425438,
111
+ "grad_norm": 0.10488647222518921,
112
+ "learning_rate": 1.9904191616766468e-05,
113
+ "loss": 0.5107,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.11143031252720467,
118
+ "grad_norm": 0.09359394758939743,
119
+ "learning_rate": 1.98562874251497e-05,
120
+ "loss": 0.4604,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.11839470706015495,
125
+ "grad_norm": 0.10392932593822479,
126
+ "learning_rate": 1.9808383233532935e-05,
127
+ "loss": 0.484,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.12535910159310526,
132
+ "grad_norm": 0.12986360490322113,
133
+ "learning_rate": 1.9760479041916168e-05,
134
+ "loss": 0.4527,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.13232349612605554,
139
+ "grad_norm": 0.14294420182704926,
140
+ "learning_rate": 1.97125748502994e-05,
141
+ "loss": 0.4661,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.13928789065900582,
146
+ "grad_norm": 0.18483737111091614,
147
+ "learning_rate": 1.9664670658682635e-05,
148
+ "loss": 0.4395,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.14625228519195613,
153
+ "grad_norm": 0.21114414930343628,
154
+ "learning_rate": 1.961676646706587e-05,
155
+ "loss": 0.4071,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.1532166797249064,
160
+ "grad_norm": 0.21779198944568634,
161
+ "learning_rate": 1.95688622754491e-05,
162
+ "loss": 0.3846,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.16018107425785671,
167
+ "grad_norm": 0.1294960230588913,
168
+ "learning_rate": 1.9520958083832338e-05,
169
+ "loss": 0.3952,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.167145468790807,
174
+ "grad_norm": 0.0927898958325386,
175
+ "learning_rate": 1.9473053892215568e-05,
176
+ "loss": 0.4112,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.1741098633237573,
181
+ "grad_norm": 0.10358591377735138,
182
+ "learning_rate": 1.9425149700598805e-05,
183
+ "loss": 0.406,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.18107425785670758,
188
+ "grad_norm": 0.13144026696681976,
189
+ "learning_rate": 1.9377245508982038e-05,
190
+ "loss": 0.4058,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.18803865238965786,
195
+ "grad_norm": 0.12745942175388336,
196
+ "learning_rate": 1.932934131736527e-05,
197
+ "loss": 0.3938,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.19500304692260817,
202
+ "grad_norm": 0.09832354635000229,
203
+ "learning_rate": 1.9281437125748505e-05,
204
+ "loss": 0.3819,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.20196744145555845,
209
+ "grad_norm": 0.0927552729845047,
210
+ "learning_rate": 1.9233532934131738e-05,
211
+ "loss": 0.3944,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.20893183598850876,
216
+ "grad_norm": 0.09627388417720795,
217
+ "learning_rate": 1.918562874251497e-05,
218
+ "loss": 0.348,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.21589623052145904,
223
+ "grad_norm": 0.09034324437379837,
224
+ "learning_rate": 1.9137724550898205e-05,
225
+ "loss": 0.3521,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.22286062505440934,
230
+ "grad_norm": 0.09156423062086105,
231
+ "learning_rate": 1.9089820359281438e-05,
232
+ "loss": 0.3663,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.22982501958735962,
237
+ "grad_norm": 0.0996803268790245,
238
+ "learning_rate": 1.904191616766467e-05,
239
+ "loss": 0.3347,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.2367894141203099,
244
+ "grad_norm": 0.09962292015552521,
245
+ "learning_rate": 1.8994011976047905e-05,
246
+ "loss": 0.373,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.2437538086532602,
251
+ "grad_norm": 0.08211114257574081,
252
+ "learning_rate": 1.894610778443114e-05,
253
+ "loss": 0.3641,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.2507182031862105,
258
+ "grad_norm": 0.10449781268835068,
259
+ "learning_rate": 1.889820359281437e-05,
260
+ "loss": 0.3358,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.2576825977191608,
265
+ "grad_norm": 0.11216866970062256,
266
+ "learning_rate": 1.8850299401197608e-05,
267
+ "loss": 0.372,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.2646469922521111,
272
+ "grad_norm": 0.11125332862138748,
273
+ "learning_rate": 1.8802395209580838e-05,
274
+ "loss": 0.3521,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.27161138678506136,
279
+ "grad_norm": 0.1620815545320511,
280
+ "learning_rate": 1.8754491017964075e-05,
281
+ "loss": 0.3726,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.27857578131801164,
286
+ "grad_norm": 0.11439554393291473,
287
+ "learning_rate": 1.8706586826347305e-05,
288
+ "loss": 0.3287,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.285540175850962,
293
+ "grad_norm": 0.10733096301555634,
294
+ "learning_rate": 1.865868263473054e-05,
295
+ "loss": 0.3459,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.29250457038391225,
300
+ "grad_norm": 0.12426561117172241,
301
+ "learning_rate": 1.8610778443113775e-05,
302
+ "loss": 0.3705,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.29946896491686253,
307
+ "grad_norm": 0.1148887649178505,
308
+ "learning_rate": 1.8562874251497008e-05,
309
+ "loss": 0.3216,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.3064333594498128,
314
+ "grad_norm": 0.10842527449131012,
315
+ "learning_rate": 1.851497005988024e-05,
316
+ "loss": 0.3502,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.31339775398276315,
321
+ "grad_norm": 0.11736515164375305,
322
+ "learning_rate": 1.8467065868263475e-05,
323
+ "loss": 0.3394,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.32036214851571343,
328
+ "grad_norm": 0.1381833553314209,
329
+ "learning_rate": 1.8419161676646708e-05,
330
+ "loss": 0.3166,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.3273265430486637,
335
+ "grad_norm": 0.10973569750785828,
336
+ "learning_rate": 1.837125748502994e-05,
337
+ "loss": 0.3245,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.3328980586750239,
342
+ "eval_loss": 0.3075329661369324,
343
+ "eval_runtime": 356.0276,
344
+ "eval_samples_per_second": 3.396,
345
+ "eval_steps_per_second": 1.699,
346
+ "step": 478
347
+ },
348
+ {
349
+ "epoch": 0.334290937581614,
350
+ "grad_norm": 0.13474580645561218,
351
+ "learning_rate": 1.8323353293413175e-05,
352
+ "loss": 0.2914,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.34125533211456427,
357
+ "grad_norm": 0.12812362611293793,
358
+ "learning_rate": 1.827544910179641e-05,
359
+ "loss": 0.3327,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.3482197266475146,
364
+ "grad_norm": 0.11661785840988159,
365
+ "learning_rate": 1.822754491017964e-05,
366
+ "loss": 0.3468,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.3551841211804649,
371
+ "grad_norm": 0.12990142405033112,
372
+ "learning_rate": 1.8179640718562878e-05,
373
+ "loss": 0.3318,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.36214851571341516,
378
+ "grad_norm": 0.1309327632188797,
379
+ "learning_rate": 1.8131736526946108e-05,
380
+ "loss": 0.2942,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.36911291024636544,
385
+ "grad_norm": 0.12246479839086533,
386
+ "learning_rate": 1.8083832335329345e-05,
387
+ "loss": 0.3751,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.3760773047793157,
392
+ "grad_norm": 0.14134123921394348,
393
+ "learning_rate": 1.8035928143712575e-05,
394
+ "loss": 0.3248,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.38304169931226606,
399
+ "grad_norm": 0.1433170884847641,
400
+ "learning_rate": 1.798802395209581e-05,
401
+ "loss": 0.3344,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.39000609384521634,
406
+ "grad_norm": 0.13364745676517487,
407
+ "learning_rate": 1.7940119760479045e-05,
408
+ "loss": 0.2928,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.3969704883781666,
413
+ "grad_norm": 0.13446496427059174,
414
+ "learning_rate": 1.7892215568862278e-05,
415
+ "loss": 0.3421,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.4039348829111169,
420
+ "grad_norm": 0.1586250364780426,
421
+ "learning_rate": 1.784431137724551e-05,
422
+ "loss": 0.3099,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.4108992774440672,
427
+ "grad_norm": 0.1283930391073227,
428
+ "learning_rate": 1.7796407185628745e-05,
429
+ "loss": 0.3123,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.4178636719770175,
434
+ "grad_norm": 0.14261852204799652,
435
+ "learning_rate": 1.7748502994011978e-05,
436
+ "loss": 0.2886,
437
+ "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.4248280665099678,
441
+ "grad_norm": 0.13329675793647766,
442
+ "learning_rate": 1.770059880239521e-05,
443
+ "loss": 0.3255,
444
+ "step": 610
445
+ },
446
+ {
447
+ "epoch": 0.4317924610429181,
448
+ "grad_norm": 0.13880160450935364,
449
+ "learning_rate": 1.7652694610778445e-05,
450
+ "loss": 0.326,
451
+ "step": 620
452
+ },
453
+ {
454
+ "epoch": 0.43875685557586835,
455
+ "grad_norm": 0.14388258755207062,
456
+ "learning_rate": 1.7604790419161678e-05,
457
+ "loss": 0.3269,
458
+ "step": 630
459
+ },
460
+ {
461
+ "epoch": 0.4457212501088187,
462
+ "grad_norm": 0.1571163535118103,
463
+ "learning_rate": 1.755688622754491e-05,
464
+ "loss": 0.2919,
465
+ "step": 640
466
+ },
467
+ {
468
+ "epoch": 0.45268564464176897,
469
+ "grad_norm": 0.15307635068893433,
470
+ "learning_rate": 1.7508982035928145e-05,
471
+ "loss": 0.2773,
472
+ "step": 650
473
+ },
474
+ {
475
+ "epoch": 0.45965003917471925,
476
+ "grad_norm": 0.1438165158033371,
477
+ "learning_rate": 1.7461077844311378e-05,
478
+ "loss": 0.2847,
479
+ "step": 660
480
+ },
481
+ {
482
+ "epoch": 0.46661443370766953,
483
+ "grad_norm": 0.15620216727256775,
484
+ "learning_rate": 1.741317365269461e-05,
485
+ "loss": 0.29,
486
+ "step": 670
487
+ },
488
+ {
489
+ "epoch": 0.4735788282406198,
490
+ "grad_norm": 0.1414933055639267,
491
+ "learning_rate": 1.7365269461077845e-05,
492
+ "loss": 0.2754,
493
+ "step": 680
494
+ },
495
+ {
496
+ "epoch": 0.48054322277357014,
497
+ "grad_norm": 0.15853798389434814,
498
+ "learning_rate": 1.7317365269461078e-05,
499
+ "loss": 0.3062,
500
+ "step": 690
501
+ },
502
+ {
503
+ "epoch": 0.4875076173065204,
504
+ "grad_norm": 0.17159980535507202,
505
+ "learning_rate": 1.726946107784431e-05,
506
+ "loss": 0.2953,
507
+ "step": 700
508
+ },
509
+ {
510
+ "epoch": 0.4944720118394707,
511
+ "grad_norm": 0.16937299072742462,
512
+ "learning_rate": 1.7221556886227548e-05,
513
+ "loss": 0.2895,
514
+ "step": 710
515
+ },
516
+ {
517
+ "epoch": 0.501436406372421,
518
+ "grad_norm": 0.1455833464860916,
519
+ "learning_rate": 1.717365269461078e-05,
520
+ "loss": 0.2912,
521
+ "step": 720
522
+ },
523
+ {
524
+ "epoch": 0.5084008009053713,
525
+ "grad_norm": 0.15824872255325317,
526
+ "learning_rate": 1.7125748502994015e-05,
527
+ "loss": 0.3012,
528
+ "step": 730
529
+ },
530
+ {
531
+ "epoch": 0.5153651954383216,
532
+ "grad_norm": 0.1499851644039154,
533
+ "learning_rate": 1.7077844311377248e-05,
534
+ "loss": 0.263,
535
+ "step": 740
536
+ },
537
+ {
538
+ "epoch": 0.5223295899712719,
539
+ "grad_norm": 0.1611323356628418,
540
+ "learning_rate": 1.702994011976048e-05,
541
+ "loss": 0.2595,
542
+ "step": 750
543
+ },
544
+ {
545
+ "epoch": 0.5292939845042222,
546
+ "grad_norm": 0.16144199669361115,
547
+ "learning_rate": 1.6982035928143714e-05,
548
+ "loss": 0.2939,
549
+ "step": 760
550
+ },
551
+ {
552
+ "epoch": 0.5362583790371724,
553
+ "grad_norm": 0.13433364033699036,
554
+ "learning_rate": 1.6934131736526948e-05,
555
+ "loss": 0.2817,
556
+ "step": 770
557
+ },
558
+ {
559
+ "epoch": 0.5432227735701227,
560
+ "grad_norm": 0.15188777446746826,
561
+ "learning_rate": 1.688622754491018e-05,
562
+ "loss": 0.2804,
563
+ "step": 780
564
+ },
565
+ {
566
+ "epoch": 0.550187168103073,
567
+ "grad_norm": 0.1530751883983612,
568
+ "learning_rate": 1.6838323353293414e-05,
569
+ "loss": 0.298,
570
+ "step": 790
571
+ },
572
+ {
573
+ "epoch": 0.5571515626360233,
574
+ "grad_norm": 0.14582635462284088,
575
+ "learning_rate": 1.6790419161676648e-05,
576
+ "loss": 0.3177,
577
+ "step": 800
578
+ },
579
+ {
580
+ "epoch": 0.5641159571689737,
581
+ "grad_norm": 0.174430251121521,
582
+ "learning_rate": 1.674251497005988e-05,
583
+ "loss": 0.2624,
584
+ "step": 810
585
+ },
586
+ {
587
+ "epoch": 0.571080351701924,
588
+ "grad_norm": 0.1558169573545456,
589
+ "learning_rate": 1.6694610778443114e-05,
590
+ "loss": 0.2631,
591
+ "step": 820
592
+ },
593
+ {
594
+ "epoch": 0.5780447462348742,
595
+ "grad_norm": 0.17108450829982758,
596
+ "learning_rate": 1.6646706586826348e-05,
597
+ "loss": 0.2903,
598
+ "step": 830
599
+ },
600
+ {
601
+ "epoch": 0.5850091407678245,
602
+ "grad_norm": 0.16796821355819702,
603
+ "learning_rate": 1.659880239520958e-05,
604
+ "loss": 0.2928,
605
+ "step": 840
606
+ },
607
+ {
608
+ "epoch": 0.5919735353007748,
609
+ "grad_norm": 0.16556940972805023,
610
+ "learning_rate": 1.6550898203592814e-05,
611
+ "loss": 0.2882,
612
+ "step": 850
613
+ },
614
+ {
615
+ "epoch": 0.5989379298337251,
616
+ "grad_norm": 0.1716109961271286,
617
+ "learning_rate": 1.650299401197605e-05,
618
+ "loss": 0.2814,
619
+ "step": 860
620
+ },
621
+ {
622
+ "epoch": 0.6059023243666753,
623
+ "grad_norm": 0.1695253700017929,
624
+ "learning_rate": 1.645508982035928e-05,
625
+ "loss": 0.2909,
626
+ "step": 870
627
+ },
628
+ {
629
+ "epoch": 0.6128667188996256,
630
+ "grad_norm": 0.1896178424358368,
631
+ "learning_rate": 1.6407185628742518e-05,
632
+ "loss": 0.2909,
633
+ "step": 880
634
+ },
635
+ {
636
+ "epoch": 0.6198311134325759,
637
+ "grad_norm": 0.1624131202697754,
638
+ "learning_rate": 1.6359281437125748e-05,
639
+ "loss": 0.2672,
640
+ "step": 890
641
+ },
642
+ {
643
+ "epoch": 0.6267955079655263,
644
+ "grad_norm": 0.16821731626987457,
645
+ "learning_rate": 1.6311377245508984e-05,
646
+ "loss": 0.2626,
647
+ "step": 900
648
+ },
649
+ {
650
+ "epoch": 0.6337599024984766,
651
+ "grad_norm": 0.17026785016059875,
652
+ "learning_rate": 1.6263473053892214e-05,
653
+ "loss": 0.2735,
654
+ "step": 910
655
+ },
656
+ {
657
+ "epoch": 0.6407242970314269,
658
+ "grad_norm": 0.19429761171340942,
659
+ "learning_rate": 1.621556886227545e-05,
660
+ "loss": 0.2605,
661
+ "step": 920
662
+ },
663
+ {
664
+ "epoch": 0.6476886915643771,
665
+ "grad_norm": 0.16931040585041046,
666
+ "learning_rate": 1.6167664670658684e-05,
667
+ "loss": 0.2722,
668
+ "step": 930
669
+ },
670
+ {
671
+ "epoch": 0.6546530860973274,
672
+ "grad_norm": 0.20588335394859314,
673
+ "learning_rate": 1.6119760479041918e-05,
674
+ "loss": 0.301,
675
+ "step": 940
676
+ },
677
+ {
678
+ "epoch": 0.6616174806302777,
679
+ "grad_norm": 0.16699948906898499,
680
+ "learning_rate": 1.607185628742515e-05,
681
+ "loss": 0.2646,
682
+ "step": 950
683
+ },
684
+ {
685
+ "epoch": 0.6657961173500478,
686
+ "eval_loss": 0.2465677410364151,
687
+ "eval_runtime": 356.1016,
688
+ "eval_samples_per_second": 3.395,
689
+ "eval_steps_per_second": 1.699,
690
+ "step": 956
691
+ },
692
+ {
693
+ "epoch": 0.668581875163228,
694
+ "grad_norm": 0.20734217762947083,
695
+ "learning_rate": 1.6023952095808384e-05,
696
+ "loss": 0.2476,
697
+ "step": 960
698
+ },
699
+ {
700
+ "epoch": 0.6755462696961783,
701
+ "grad_norm": 0.21317210793495178,
702
+ "learning_rate": 1.5976047904191618e-05,
703
+ "loss": 0.2434,
704
+ "step": 970
705
+ },
706
+ {
707
+ "epoch": 0.6825106642291285,
708
+ "grad_norm": 0.19125515222549438,
709
+ "learning_rate": 1.592814371257485e-05,
710
+ "loss": 0.2823,
711
+ "step": 980
712
+ },
713
+ {
714
+ "epoch": 0.6894750587620788,
715
+ "grad_norm": 0.1651381254196167,
716
+ "learning_rate": 1.5880239520958084e-05,
717
+ "loss": 0.2769,
718
+ "step": 990
719
+ },
720
+ {
721
+ "epoch": 0.6964394532950292,
722
+ "grad_norm": 0.18061324954032898,
723
+ "learning_rate": 1.583233532934132e-05,
724
+ "loss": 0.2788,
725
+ "step": 1000
726
+ },
727
+ {
728
+ "epoch": 0.7034038478279795,
729
+ "grad_norm": 0.17788180708885193,
730
+ "learning_rate": 1.578443113772455e-05,
731
+ "loss": 0.2596,
732
+ "step": 1010
733
+ },
734
+ {
735
+ "epoch": 0.7103682423609298,
736
+ "grad_norm": 0.19635842740535736,
737
+ "learning_rate": 1.5736526946107788e-05,
738
+ "loss": 0.2391,
739
+ "step": 1020
740
+ },
741
+ {
742
+ "epoch": 0.71733263689388,
743
+ "grad_norm": 0.18000468611717224,
744
+ "learning_rate": 1.5688622754491018e-05,
745
+ "loss": 0.2589,
746
+ "step": 1030
747
+ },
748
+ {
749
+ "epoch": 0.7242970314268303,
750
+ "grad_norm": 0.18016283214092255,
751
+ "learning_rate": 1.5640718562874254e-05,
752
+ "loss": 0.2673,
753
+ "step": 1040
754
+ },
755
+ {
756
+ "epoch": 0.7312614259597806,
757
+ "grad_norm": 0.2085445076227188,
758
+ "learning_rate": 1.5592814371257484e-05,
759
+ "loss": 0.295,
760
+ "step": 1050
761
+ },
762
+ {
763
+ "epoch": 0.7382258204927309,
764
+ "grad_norm": 0.1937682181596756,
765
+ "learning_rate": 1.554491017964072e-05,
766
+ "loss": 0.2741,
767
+ "step": 1060
768
+ },
769
+ {
770
+ "epoch": 0.7451902150256812,
771
+ "grad_norm": 0.18496188521385193,
772
+ "learning_rate": 1.5497005988023954e-05,
773
+ "loss": 0.2485,
774
+ "step": 1070
775
+ },
776
+ {
777
+ "epoch": 0.7521546095586314,
778
+ "grad_norm": 0.21024686098098755,
779
+ "learning_rate": 1.5449101796407188e-05,
780
+ "loss": 0.2569,
781
+ "step": 1080
782
+ },
783
+ {
784
+ "epoch": 0.7591190040915818,
785
+ "grad_norm": 0.1960270255804062,
786
+ "learning_rate": 1.540119760479042e-05,
787
+ "loss": 0.2335,
788
+ "step": 1090
789
+ },
790
+ {
791
+ "epoch": 0.7660833986245321,
792
+ "grad_norm": 0.19557876884937286,
793
+ "learning_rate": 1.5353293413173654e-05,
794
+ "loss": 0.279,
795
+ "step": 1100
796
+ },
797
+ {
798
+ "epoch": 0.7730477931574824,
799
+ "grad_norm": 0.17300210893154144,
800
+ "learning_rate": 1.5305389221556888e-05,
801
+ "loss": 0.249,
802
+ "step": 1110
803
+ },
804
+ {
805
+ "epoch": 0.7800121876904327,
806
+ "grad_norm": 0.1768539994955063,
807
+ "learning_rate": 1.5257485029940121e-05,
808
+ "loss": 0.2415,
809
+ "step": 1120
810
+ },
811
+ {
812
+ "epoch": 0.786976582223383,
813
+ "grad_norm": 0.20632915198802948,
814
+ "learning_rate": 1.5209580838323354e-05,
815
+ "loss": 0.2312,
816
+ "step": 1130
817
+ },
818
+ {
819
+ "epoch": 0.7939409767563332,
820
+ "grad_norm": 0.1948348730802536,
821
+ "learning_rate": 1.516167664670659e-05,
822
+ "loss": 0.2415,
823
+ "step": 1140
824
+ },
825
+ {
826
+ "epoch": 0.8009053712892835,
827
+ "grad_norm": 0.16288821399211884,
828
+ "learning_rate": 1.511377245508982e-05,
829
+ "loss": 0.2571,
830
+ "step": 1150
831
+ },
832
+ {
833
+ "epoch": 0.8078697658222338,
834
+ "grad_norm": 0.21365194022655487,
835
+ "learning_rate": 1.5065868263473056e-05,
836
+ "loss": 0.2594,
837
+ "step": 1160
838
+ },
839
+ {
840
+ "epoch": 0.8148341603551841,
841
+ "grad_norm": 0.20960865914821625,
842
+ "learning_rate": 1.5017964071856287e-05,
843
+ "loss": 0.2537,
844
+ "step": 1170
845
+ },
846
+ {
847
+ "epoch": 0.8217985548881344,
848
+ "grad_norm": 0.20458345115184784,
849
+ "learning_rate": 1.4970059880239522e-05,
850
+ "loss": 0.2675,
851
+ "step": 1180
852
+ },
853
+ {
854
+ "epoch": 0.8287629494210847,
855
+ "grad_norm": 0.22388549149036407,
856
+ "learning_rate": 1.4922155688622754e-05,
857
+ "loss": 0.2525,
858
+ "step": 1190
859
+ },
860
+ {
861
+ "epoch": 0.835727343954035,
862
+ "grad_norm": 0.2139691859483719,
863
+ "learning_rate": 1.4874251497005989e-05,
864
+ "loss": 0.251,
865
+ "step": 1200
866
+ },
867
+ {
868
+ "epoch": 0.8426917384869853,
869
+ "grad_norm": 0.18761980533599854,
870
+ "learning_rate": 1.4826347305389224e-05,
871
+ "loss": 0.2328,
872
+ "step": 1210
873
+ },
874
+ {
875
+ "epoch": 0.8496561330199356,
876
+ "grad_norm": 0.183973029255867,
877
+ "learning_rate": 1.4778443113772456e-05,
878
+ "loss": 0.2239,
879
+ "step": 1220
880
+ },
881
+ {
882
+ "epoch": 0.8566205275528859,
883
+ "grad_norm": 0.23718492686748505,
884
+ "learning_rate": 1.473053892215569e-05,
885
+ "loss": 0.2423,
886
+ "step": 1230
887
+ },
888
+ {
889
+ "epoch": 0.8635849220858361,
890
+ "grad_norm": 0.23875045776367188,
891
+ "learning_rate": 1.4682634730538922e-05,
892
+ "loss": 0.2365,
893
+ "step": 1240
894
+ },
895
+ {
896
+ "epoch": 0.8705493166187864,
897
+ "grad_norm": 0.21570812165737152,
898
+ "learning_rate": 1.4634730538922157e-05,
899
+ "loss": 0.268,
900
+ "step": 1250
901
+ },
902
+ {
903
+ "epoch": 0.8775137111517367,
904
+ "grad_norm": 0.20839928090572357,
905
+ "learning_rate": 1.4586826347305389e-05,
906
+ "loss": 0.2502,
907
+ "step": 1260
908
+ },
909
+ {
910
+ "epoch": 0.884478105684687,
911
+ "grad_norm": 0.21605758368968964,
912
+ "learning_rate": 1.4538922155688624e-05,
913
+ "loss": 0.2365,
914
+ "step": 1270
915
+ },
916
+ {
917
+ "epoch": 0.8914425002176374,
918
+ "grad_norm": 0.23300880193710327,
919
+ "learning_rate": 1.4491017964071859e-05,
920
+ "loss": 0.2404,
921
+ "step": 1280
922
+ },
923
+ {
924
+ "epoch": 0.8984068947505877,
925
+ "grad_norm": 0.21796733140945435,
926
+ "learning_rate": 1.444311377245509e-05,
927
+ "loss": 0.2704,
928
+ "step": 1290
929
+ },
930
+ {
931
+ "epoch": 0.9053712892835379,
932
+ "grad_norm": 0.25555598735809326,
933
+ "learning_rate": 1.4395209580838326e-05,
934
+ "loss": 0.2674,
935
+ "step": 1300
936
+ },
937
+ {
938
+ "epoch": 0.9123356838164882,
939
+ "grad_norm": 0.22284413874149323,
940
+ "learning_rate": 1.4347305389221557e-05,
941
+ "loss": 0.2471,
942
+ "step": 1310
943
+ },
944
+ {
945
+ "epoch": 0.9193000783494385,
946
+ "grad_norm": 0.22234566509723663,
947
+ "learning_rate": 1.4299401197604792e-05,
948
+ "loss": 0.2529,
949
+ "step": 1320
950
+ },
951
+ {
952
+ "epoch": 0.9262644728823888,
953
+ "grad_norm": 0.221883624792099,
954
+ "learning_rate": 1.4251497005988024e-05,
955
+ "loss": 0.1932,
956
+ "step": 1330
957
+ },
958
+ {
959
+ "epoch": 0.9332288674153391,
960
+ "grad_norm": 0.22873520851135254,
961
+ "learning_rate": 1.4203592814371259e-05,
962
+ "loss": 0.2412,
963
+ "step": 1340
964
+ },
965
+ {
966
+ "epoch": 0.9401932619482893,
967
+ "grad_norm": 0.21307863295078278,
968
+ "learning_rate": 1.4155688622754492e-05,
969
+ "loss": 0.2384,
970
+ "step": 1350
971
+ },
972
+ {
973
+ "epoch": 0.9471576564812396,
974
+ "grad_norm": 0.21296364068984985,
975
+ "learning_rate": 1.4107784431137726e-05,
976
+ "loss": 0.2318,
977
+ "step": 1360
978
+ },
979
+ {
980
+ "epoch": 0.95412205101419,
981
+ "grad_norm": 0.21060685813426971,
982
+ "learning_rate": 1.405988023952096e-05,
983
+ "loss": 0.2323,
984
+ "step": 1370
985
+ },
986
+ {
987
+ "epoch": 0.9610864455471403,
988
+ "grad_norm": 0.23930418491363525,
989
+ "learning_rate": 1.4011976047904192e-05,
990
+ "loss": 0.2398,
991
+ "step": 1380
992
+ },
993
+ {
994
+ "epoch": 0.9680508400800906,
995
+ "grad_norm": 0.22837185859680176,
996
+ "learning_rate": 1.3964071856287427e-05,
997
+ "loss": 0.233,
998
+ "step": 1390
999
+ },
1000
+ {
1001
+ "epoch": 0.9750152346130408,
1002
+ "grad_norm": 0.21798169612884521,
1003
+ "learning_rate": 1.3916167664670659e-05,
1004
+ "loss": 0.2161,
1005
+ "step": 1400
1006
+ },
1007
+ {
1008
+ "epoch": 0.9819796291459911,
1009
+ "grad_norm": 0.2846814692020416,
1010
+ "learning_rate": 1.3868263473053894e-05,
1011
+ "loss": 0.2301,
1012
+ "step": 1410
1013
+ },
1014
+ {
1015
+ "epoch": 0.9889440236789414,
1016
+ "grad_norm": 0.22030217945575714,
1017
+ "learning_rate": 1.3820359281437127e-05,
1018
+ "loss": 0.2017,
1019
+ "step": 1420
1020
+ },
1021
+ {
1022
+ "epoch": 0.9959084182118917,
1023
+ "grad_norm": 0.25554022192955017,
1024
+ "learning_rate": 1.377245508982036e-05,
1025
+ "loss": 0.2296,
1026
+ "step": 1430
1027
+ },
1028
+ {
1029
+ "epoch": 0.9986941760250718,
1030
+ "eval_loss": 0.21637588739395142,
1031
+ "eval_runtime": 356.0721,
1032
+ "eval_samples_per_second": 3.395,
1033
+ "eval_steps_per_second": 1.699,
1034
+ "step": 1434
1035
+ },
1036
+ {
1037
+ "epoch": 1.0030469226081657,
1038
+ "grad_norm": 0.2862723469734192,
1039
+ "learning_rate": 1.3724550898203594e-05,
1040
+ "loss": 0.2538,
1041
+ "step": 1440
1042
+ },
1043
+ {
1044
+ "epoch": 1.010011317141116,
1045
+ "grad_norm": 0.2589089870452881,
1046
+ "learning_rate": 1.3676646706586827e-05,
1047
+ "loss": 0.2196,
1048
+ "step": 1450
1049
+ },
1050
+ {
1051
+ "epoch": 1.0169757116740663,
1052
+ "grad_norm": 0.27893784642219543,
1053
+ "learning_rate": 1.362874251497006e-05,
1054
+ "loss": 0.1935,
1055
+ "step": 1460
1056
+ },
1057
+ {
1058
+ "epoch": 1.0239401062070166,
1059
+ "grad_norm": 0.2530602514743805,
1060
+ "learning_rate": 1.3580838323353294e-05,
1061
+ "loss": 0.2248,
1062
+ "step": 1470
1063
+ },
1064
+ {
1065
+ "epoch": 1.0309045007399669,
1066
+ "grad_norm": 0.24902446568012238,
1067
+ "learning_rate": 1.3532934131736529e-05,
1068
+ "loss": 0.224,
1069
+ "step": 1480
1070
+ },
1071
+ {
1072
+ "epoch": 1.0378688952729171,
1073
+ "grad_norm": 0.27132269740104675,
1074
+ "learning_rate": 1.3485029940119762e-05,
1075
+ "loss": 0.2208,
1076
+ "step": 1490
1077
+ },
1078
+ {
1079
+ "epoch": 1.0448332898058674,
1080
+ "grad_norm": 0.2570977807044983,
1081
+ "learning_rate": 1.3437125748502996e-05,
1082
+ "loss": 0.2103,
1083
+ "step": 1500
1084
+ },
1085
+ {
1086
+ "epoch": 1.0517976843388177,
1087
+ "grad_norm": 0.2994561493396759,
1088
+ "learning_rate": 1.3389221556886229e-05,
1089
+ "loss": 0.2243,
1090
+ "step": 1510
1091
+ },
1092
+ {
1093
+ "epoch": 1.058762078871768,
1094
+ "grad_norm": 0.22994251549243927,
1095
+ "learning_rate": 1.3341317365269462e-05,
1096
+ "loss": 0.2217,
1097
+ "step": 1520
1098
+ },
1099
+ {
1100
+ "epoch": 1.0657264734047183,
1101
+ "grad_norm": 0.22706159949302673,
1102
+ "learning_rate": 1.3293413173652696e-05,
1103
+ "loss": 0.2219,
1104
+ "step": 1530
1105
+ },
1106
+ {
1107
+ "epoch": 1.0726908679376688,
1108
+ "grad_norm": 0.26584964990615845,
1109
+ "learning_rate": 1.3245508982035929e-05,
1110
+ "loss": 0.2329,
1111
+ "step": 1540
1112
+ },
1113
+ {
1114
+ "epoch": 1.079655262470619,
1115
+ "grad_norm": 0.2549090087413788,
1116
+ "learning_rate": 1.3197604790419162e-05,
1117
+ "loss": 0.2235,
1118
+ "step": 1550
1119
+ },
1120
+ {
1121
+ "epoch": 1.0866196570035693,
1122
+ "grad_norm": 0.27169135212898254,
1123
+ "learning_rate": 1.3149700598802397e-05,
1124
+ "loss": 0.2332,
1125
+ "step": 1560
1126
+ },
1127
+ {
1128
+ "epoch": 1.0935840515365196,
1129
+ "grad_norm": 0.26733773946762085,
1130
+ "learning_rate": 1.3101796407185629e-05,
1131
+ "loss": 0.2062,
1132
+ "step": 1570
1133
+ },
1134
+ {
1135
+ "epoch": 1.1005484460694699,
1136
+ "grad_norm": 0.3222082853317261,
1137
+ "learning_rate": 1.3053892215568864e-05,
1138
+ "loss": 0.2111,
1139
+ "step": 1580
1140
+ },
1141
+ {
1142
+ "epoch": 1.1075128406024202,
1143
+ "grad_norm": 0.20403257012367249,
1144
+ "learning_rate": 1.3005988023952097e-05,
1145
+ "loss": 0.226,
1146
+ "step": 1590
1147
+ },
1148
+ {
1149
+ "epoch": 1.1144772351353704,
1150
+ "grad_norm": 0.2658451199531555,
1151
+ "learning_rate": 1.295808383233533e-05,
1152
+ "loss": 0.2013,
1153
+ "step": 1600
1154
+ },
1155
+ {
1156
+ "epoch": 1.1214416296683207,
1157
+ "grad_norm": 0.2719573676586151,
1158
+ "learning_rate": 1.2910179640718564e-05,
1159
+ "loss": 0.2036,
1160
+ "step": 1610
1161
+ },
1162
+ {
1163
+ "epoch": 1.128406024201271,
1164
+ "grad_norm": 0.29076331853866577,
1165
+ "learning_rate": 1.2862275449101797e-05,
1166
+ "loss": 0.2162,
1167
+ "step": 1620
1168
+ },
1169
+ {
1170
+ "epoch": 1.1353704187342213,
1171
+ "grad_norm": 0.2324078530073166,
1172
+ "learning_rate": 1.2814371257485032e-05,
1173
+ "loss": 0.2002,
1174
+ "step": 1630
1175
+ },
1176
+ {
1177
+ "epoch": 1.1423348132671716,
1178
+ "grad_norm": 0.27219468355178833,
1179
+ "learning_rate": 1.2766467065868264e-05,
1180
+ "loss": 0.2238,
1181
+ "step": 1640
1182
+ },
1183
+ {
1184
+ "epoch": 1.1492992078001218,
1185
+ "grad_norm": 0.22101271152496338,
1186
+ "learning_rate": 1.2718562874251499e-05,
1187
+ "loss": 0.1845,
1188
+ "step": 1650
1189
+ },
1190
+ {
1191
+ "epoch": 1.1562636023330721,
1192
+ "grad_norm": 0.25101369619369507,
1193
+ "learning_rate": 1.267065868263473e-05,
1194
+ "loss": 0.2296,
1195
+ "step": 1660
1196
+ },
1197
+ {
1198
+ "epoch": 1.1632279968660224,
1199
+ "grad_norm": 0.25895142555236816,
1200
+ "learning_rate": 1.2622754491017965e-05,
1201
+ "loss": 0.2271,
1202
+ "step": 1670
1203
+ },
1204
+ {
1205
+ "epoch": 1.1701923913989727,
1206
+ "grad_norm": 0.26405826210975647,
1207
+ "learning_rate": 1.2574850299401197e-05,
1208
+ "loss": 0.2159,
1209
+ "step": 1680
1210
+ },
1211
+ {
1212
+ "epoch": 1.177156785931923,
1213
+ "grad_norm": 0.23231615126132965,
1214
+ "learning_rate": 1.2526946107784432e-05,
1215
+ "loss": 0.1916,
1216
+ "step": 1690
1217
+ },
1218
+ {
1219
+ "epoch": 1.1841211804648732,
1220
+ "grad_norm": 0.3211207985877991,
1221
+ "learning_rate": 1.2479041916167665e-05,
1222
+ "loss": 0.2154,
1223
+ "step": 1700
1224
+ },
1225
+ {
1226
+ "epoch": 1.1910855749978237,
1227
+ "grad_norm": 0.2948794662952423,
1228
+ "learning_rate": 1.2431137724550899e-05,
1229
+ "loss": 0.1999,
1230
+ "step": 1710
1231
+ },
1232
+ {
1233
+ "epoch": 1.198049969530774,
1234
+ "grad_norm": 0.28081706166267395,
1235
+ "learning_rate": 1.2383233532934134e-05,
1236
+ "loss": 0.2301,
1237
+ "step": 1720
1238
+ },
1239
+ {
1240
+ "epoch": 1.2050143640637243,
1241
+ "grad_norm": 0.296283096075058,
1242
+ "learning_rate": 1.2335329341317365e-05,
1243
+ "loss": 0.2164,
1244
+ "step": 1730
1245
+ },
1246
+ {
1247
+ "epoch": 1.2119787585966746,
1248
+ "grad_norm": 0.29658278822898865,
1249
+ "learning_rate": 1.22874251497006e-05,
1250
+ "loss": 0.1916,
1251
+ "step": 1740
1252
+ },
1253
+ {
1254
+ "epoch": 1.2189431531296249,
1255
+ "grad_norm": 0.2917250990867615,
1256
+ "learning_rate": 1.2239520958083832e-05,
1257
+ "loss": 0.2045,
1258
+ "step": 1750
1259
+ },
1260
+ {
1261
+ "epoch": 1.2259075476625751,
1262
+ "grad_norm": 0.35840919613838196,
1263
+ "learning_rate": 1.2191616766467067e-05,
1264
+ "loss": 0.1965,
1265
+ "step": 1760
1266
+ },
1267
+ {
1268
+ "epoch": 1.2328719421955254,
1269
+ "grad_norm": 0.2819244861602783,
1270
+ "learning_rate": 1.2143712574850299e-05,
1271
+ "loss": 0.2032,
1272
+ "step": 1770
1273
+ },
1274
+ {
1275
+ "epoch": 1.2398363367284757,
1276
+ "grad_norm": 0.3518809974193573,
1277
+ "learning_rate": 1.2095808383233534e-05,
1278
+ "loss": 0.2043,
1279
+ "step": 1780
1280
+ },
1281
+ {
1282
+ "epoch": 1.246800731261426,
1283
+ "grad_norm": 0.3232385516166687,
1284
+ "learning_rate": 1.2047904191616769e-05,
1285
+ "loss": 0.23,
1286
+ "step": 1790
1287
+ },
1288
+ {
1289
+ "epoch": 1.2537651257943763,
1290
+ "grad_norm": 0.23131519556045532,
1291
+ "learning_rate": 1.2e-05,
1292
+ "loss": 0.2106,
1293
+ "step": 1800
1294
+ },
1295
+ {
1296
+ "epoch": 1.2607295203273265,
1297
+ "grad_norm": 0.30877017974853516,
1298
+ "learning_rate": 1.1952095808383235e-05,
1299
+ "loss": 0.2088,
1300
+ "step": 1810
1301
+ },
1302
+ {
1303
+ "epoch": 1.2676939148602768,
1304
+ "grad_norm": 0.36684754490852356,
1305
+ "learning_rate": 1.1904191616766467e-05,
1306
+ "loss": 0.2024,
1307
+ "step": 1820
1308
+ },
1309
+ {
1310
+ "epoch": 1.274658309393227,
1311
+ "grad_norm": 0.21124888956546783,
1312
+ "learning_rate": 1.1856287425149702e-05,
1313
+ "loss": 0.1929,
1314
+ "step": 1830
1315
+ },
1316
+ {
1317
+ "epoch": 1.2816227039261774,
1318
+ "grad_norm": 0.2815339267253876,
1319
+ "learning_rate": 1.1808383233532934e-05,
1320
+ "loss": 0.1973,
1321
+ "step": 1840
1322
+ },
1323
+ {
1324
+ "epoch": 1.2885870984591277,
1325
+ "grad_norm": 0.3017849922180176,
1326
+ "learning_rate": 1.1760479041916169e-05,
1327
+ "loss": 0.233,
1328
+ "step": 1850
1329
+ },
1330
+ {
1331
+ "epoch": 1.295551492992078,
1332
+ "grad_norm": 0.3456547260284424,
1333
+ "learning_rate": 1.1712574850299404e-05,
1334
+ "loss": 0.1769,
1335
+ "step": 1860
1336
+ },
1337
+ {
1338
+ "epoch": 1.3025158875250282,
1339
+ "grad_norm": 0.28210344910621643,
1340
+ "learning_rate": 1.1664670658682635e-05,
1341
+ "loss": 0.204,
1342
+ "step": 1870
1343
+ },
1344
+ {
1345
+ "epoch": 1.3094802820579785,
1346
+ "grad_norm": 0.3701432943344116,
1347
+ "learning_rate": 1.161676646706587e-05,
1348
+ "loss": 0.1923,
1349
+ "step": 1880
1350
+ },
1351
+ {
1352
+ "epoch": 1.3164446765909288,
1353
+ "grad_norm": 0.3656926453113556,
1354
+ "learning_rate": 1.1568862275449102e-05,
1355
+ "loss": 0.2136,
1356
+ "step": 1890
1357
+ },
1358
+ {
1359
+ "epoch": 1.323409071123879,
1360
+ "grad_norm": 0.29142090678215027,
1361
+ "learning_rate": 1.1520958083832337e-05,
1362
+ "loss": 0.1957,
1363
+ "step": 1900
1364
+ },
1365
+ {
1366
+ "epoch": 1.3303734656568293,
1367
+ "grad_norm": 0.2799660563468933,
1368
+ "learning_rate": 1.1473053892215569e-05,
1369
+ "loss": 0.2009,
1370
+ "step": 1910
1371
+ },
1372
+ {
1373
+ "epoch": 1.3317663445634196,
1374
+ "eval_loss": 0.19295775890350342,
1375
+ "eval_runtime": 356.0153,
1376
+ "eval_samples_per_second": 3.396,
1377
+ "eval_steps_per_second": 1.699,
1378
+ "step": 1912
1379
+ },
1380
+ {
1381
+ "epoch": 1.3373378601897796,
1382
+ "grad_norm": 0.38173311948776245,
1383
+ "learning_rate": 1.1425149700598804e-05,
1384
+ "loss": 0.2133,
1385
+ "step": 1920
1386
+ },
1387
+ {
1388
+ "epoch": 1.34430225472273,
1389
+ "grad_norm": 0.38059180974960327,
1390
+ "learning_rate": 1.1377245508982037e-05,
1391
+ "loss": 0.203,
1392
+ "step": 1930
1393
+ },
1394
+ {
1395
+ "epoch": 1.3512666492556804,
1396
+ "grad_norm": 0.2743132710456848,
1397
+ "learning_rate": 1.132934131736527e-05,
1398
+ "loss": 0.196,
1399
+ "step": 1940
1400
+ },
1401
+ {
1402
+ "epoch": 1.3582310437886307,
1403
+ "grad_norm": 0.3110567331314087,
1404
+ "learning_rate": 1.1281437125748505e-05,
1405
+ "loss": 0.1868,
1406
+ "step": 1950
1407
+ },
1408
+ {
1409
+ "epoch": 1.365195438321581,
1410
+ "grad_norm": 0.4261178970336914,
1411
+ "learning_rate": 1.1233532934131737e-05,
1412
+ "loss": 0.1952,
1413
+ "step": 1960
1414
+ },
1415
+ {
1416
+ "epoch": 1.3721598328545312,
1417
+ "grad_norm": 0.3890218436717987,
1418
+ "learning_rate": 1.1185628742514972e-05,
1419
+ "loss": 0.1964,
1420
+ "step": 1970
1421
+ },
1422
+ {
1423
+ "epoch": 1.3791242273874815,
1424
+ "grad_norm": 0.3712151050567627,
1425
+ "learning_rate": 1.1137724550898203e-05,
1426
+ "loss": 0.1933,
1427
+ "step": 1980
1428
+ },
1429
+ {
1430
+ "epoch": 1.3860886219204318,
1431
+ "grad_norm": 0.2833101153373718,
1432
+ "learning_rate": 1.1089820359281439e-05,
1433
+ "loss": 0.1862,
1434
+ "step": 1990
1435
+ },
1436
+ {
1437
+ "epoch": 1.393053016453382,
1438
+ "grad_norm": 0.3696097731590271,
1439
+ "learning_rate": 1.1041916167664672e-05,
1440
+ "loss": 0.2142,
1441
+ "step": 2000
1442
+ },
1443
+ {
1444
+ "epoch": 1.4000174109863324,
1445
+ "grad_norm": 0.4144258499145508,
1446
+ "learning_rate": 1.0994011976047905e-05,
1447
+ "loss": 0.1915,
1448
+ "step": 2010
1449
+ },
1450
+ {
1451
+ "epoch": 1.4069818055192826,
1452
+ "grad_norm": 0.28359559178352356,
1453
+ "learning_rate": 1.0946107784431138e-05,
1454
+ "loss": 0.1861,
1455
+ "step": 2020
1456
+ },
1457
+ {
1458
+ "epoch": 1.413946200052233,
1459
+ "grad_norm": 0.327510803937912,
1460
+ "learning_rate": 1.0898203592814372e-05,
1461
+ "loss": 0.1961,
1462
+ "step": 2030
1463
+ },
1464
+ {
1465
+ "epoch": 1.4209105945851832,
1466
+ "grad_norm": 0.44245216250419617,
1467
+ "learning_rate": 1.0850299401197605e-05,
1468
+ "loss": 0.2091,
1469
+ "step": 2040
1470
+ },
1471
+ {
1472
+ "epoch": 1.4278749891181335,
1473
+ "grad_norm": 0.3903510570526123,
1474
+ "learning_rate": 1.0802395209580838e-05,
1475
+ "loss": 0.2058,
1476
+ "step": 2050
1477
+ },
1478
+ {
1479
+ "epoch": 1.4348393836510838,
1480
+ "grad_norm": 0.32461392879486084,
1481
+ "learning_rate": 1.0754491017964073e-05,
1482
+ "loss": 0.1793,
1483
+ "step": 2060
1484
+ },
1485
+ {
1486
+ "epoch": 1.441803778184034,
1487
+ "grad_norm": 0.4412670433521271,
1488
+ "learning_rate": 1.0706586826347307e-05,
1489
+ "loss": 0.1856,
1490
+ "step": 2070
1491
+ },
1492
+ {
1493
+ "epoch": 1.4487681727169845,
1494
+ "grad_norm": 0.2217404693365097,
1495
+ "learning_rate": 1.065868263473054e-05,
1496
+ "loss": 0.1628,
1497
+ "step": 2080
1498
+ },
1499
+ {
1500
+ "epoch": 1.4557325672499348,
1501
+ "grad_norm": 0.3572748899459839,
1502
+ "learning_rate": 1.0610778443113773e-05,
1503
+ "loss": 0.1904,
1504
+ "step": 2090
1505
+ },
1506
+ {
1507
+ "epoch": 1.462696961782885,
1508
+ "grad_norm": 0.23523126542568207,
1509
+ "learning_rate": 1.0562874251497007e-05,
1510
+ "loss": 0.1833,
1511
+ "step": 2100
1512
+ },
1513
+ {
1514
+ "epoch": 1.4696613563158354,
1515
+ "grad_norm": 0.35970646142959595,
1516
+ "learning_rate": 1.051497005988024e-05,
1517
+ "loss": 0.2018,
1518
+ "step": 2110
1519
+ },
1520
+ {
1521
+ "epoch": 1.4766257508487857,
1522
+ "grad_norm": 0.3383442759513855,
1523
+ "learning_rate": 1.0467065868263473e-05,
1524
+ "loss": 0.1986,
1525
+ "step": 2120
1526
+ },
1527
+ {
1528
+ "epoch": 1.483590145381736,
1529
+ "grad_norm": 0.33182451128959656,
1530
+ "learning_rate": 1.0419161676646707e-05,
1531
+ "loss": 0.194,
1532
+ "step": 2130
1533
+ },
1534
+ {
1535
+ "epoch": 1.4905545399146862,
1536
+ "grad_norm": 0.3276427090167999,
1537
+ "learning_rate": 1.0371257485029942e-05,
1538
+ "loss": 0.1945,
1539
+ "step": 2140
1540
+ },
1541
+ {
1542
+ "epoch": 1.4975189344476365,
1543
+ "grad_norm": 0.30687084794044495,
1544
+ "learning_rate": 1.0323353293413173e-05,
1545
+ "loss": 0.1839,
1546
+ "step": 2150
1547
+ },
1548
+ {
1549
+ "epoch": 1.5044833289805868,
1550
+ "grad_norm": 0.3758613169193268,
1551
+ "learning_rate": 1.0275449101796408e-05,
1552
+ "loss": 0.2035,
1553
+ "step": 2160
1554
+ },
1555
+ {
1556
+ "epoch": 1.511447723513537,
1557
+ "grad_norm": 0.34436604380607605,
1558
+ "learning_rate": 1.0227544910179642e-05,
1559
+ "loss": 0.1946,
1560
+ "step": 2170
1561
+ },
1562
+ {
1563
+ "epoch": 1.5184121180464873,
1564
+ "grad_norm": 0.31638774275779724,
1565
+ "learning_rate": 1.0179640718562875e-05,
1566
+ "loss": 0.1761,
1567
+ "step": 2180
1568
+ },
1569
+ {
1570
+ "epoch": 1.5253765125794376,
1571
+ "grad_norm": 0.3935607969760895,
1572
+ "learning_rate": 1.0131736526946108e-05,
1573
+ "loss": 0.2008,
1574
+ "step": 2190
1575
+ },
1576
+ {
1577
+ "epoch": 1.532340907112388,
1578
+ "grad_norm": 0.3549712598323822,
1579
+ "learning_rate": 1.0083832335329342e-05,
1580
+ "loss": 0.1814,
1581
+ "step": 2200
1582
+ },
1583
+ {
1584
+ "epoch": 1.5393053016453382,
1585
+ "grad_norm": 0.32955431938171387,
1586
+ "learning_rate": 1.0035928143712577e-05,
1587
+ "loss": 0.1736,
1588
+ "step": 2210
1589
+ },
1590
+ {
1591
+ "epoch": 1.5462696961782885,
1592
+ "grad_norm": 0.3109774589538574,
1593
+ "learning_rate": 9.988023952095808e-06,
1594
+ "loss": 0.169,
1595
+ "step": 2220
1596
+ },
1597
+ {
1598
+ "epoch": 1.5532340907112387,
1599
+ "grad_norm": 0.4271303117275238,
1600
+ "learning_rate": 9.940119760479042e-06,
1601
+ "loss": 0.1761,
1602
+ "step": 2230
1603
+ },
1604
+ {
1605
+ "epoch": 1.560198485244189,
1606
+ "grad_norm": 0.3970726728439331,
1607
+ "learning_rate": 9.892215568862277e-06,
1608
+ "loss": 0.1897,
1609
+ "step": 2240
1610
+ },
1611
+ {
1612
+ "epoch": 1.5671628797771393,
1613
+ "grad_norm": 0.4251217544078827,
1614
+ "learning_rate": 9.84431137724551e-06,
1615
+ "loss": 0.1771,
1616
+ "step": 2250
1617
+ },
1618
+ {
1619
+ "epoch": 1.5741272743100896,
1620
+ "grad_norm": 0.28587886691093445,
1621
+ "learning_rate": 9.796407185628743e-06,
1622
+ "loss": 0.1699,
1623
+ "step": 2260
1624
+ },
1625
+ {
1626
+ "epoch": 1.5810916688430399,
1627
+ "grad_norm": 0.36768022179603577,
1628
+ "learning_rate": 9.748502994011977e-06,
1629
+ "loss": 0.1933,
1630
+ "step": 2270
1631
+ },
1632
+ {
1633
+ "epoch": 1.5880560633759901,
1634
+ "grad_norm": 0.31612274050712585,
1635
+ "learning_rate": 9.70059880239521e-06,
1636
+ "loss": 0.1944,
1637
+ "step": 2280
1638
+ },
1639
+ {
1640
+ "epoch": 1.5950204579089404,
1641
+ "grad_norm": 0.34787702560424805,
1642
+ "learning_rate": 9.652694610778443e-06,
1643
+ "loss": 0.1758,
1644
+ "step": 2290
1645
+ },
1646
+ {
1647
+ "epoch": 1.6019848524418907,
1648
+ "grad_norm": 0.4170469045639038,
1649
+ "learning_rate": 9.604790419161677e-06,
1650
+ "loss": 0.1749,
1651
+ "step": 2300
1652
+ },
1653
+ {
1654
+ "epoch": 1.608949246974841,
1655
+ "grad_norm": 0.3099391460418701,
1656
+ "learning_rate": 9.556886227544912e-06,
1657
+ "loss": 0.1752,
1658
+ "step": 2310
1659
+ },
1660
+ {
1661
+ "epoch": 1.6159136415077913,
1662
+ "grad_norm": 0.3781738877296448,
1663
+ "learning_rate": 9.508982035928145e-06,
1664
+ "loss": 0.2032,
1665
+ "step": 2320
1666
+ },
1667
+ {
1668
+ "epoch": 1.6228780360407415,
1669
+ "grad_norm": 0.29017606377601624,
1670
+ "learning_rate": 9.461077844311378e-06,
1671
+ "loss": 0.1733,
1672
+ "step": 2330
1673
+ },
1674
+ {
1675
+ "epoch": 1.629842430573692,
1676
+ "grad_norm": 0.45764532685279846,
1677
+ "learning_rate": 9.413173652694612e-06,
1678
+ "loss": 0.1765,
1679
+ "step": 2340
1680
+ },
1681
+ {
1682
+ "epoch": 1.6368068251066423,
1683
+ "grad_norm": 0.4349108040332794,
1684
+ "learning_rate": 9.365269461077845e-06,
1685
+ "loss": 0.178,
1686
+ "step": 2350
1687
+ },
1688
+ {
1689
+ "epoch": 1.6437712196395926,
1690
+ "grad_norm": 0.39599794149398804,
1691
+ "learning_rate": 9.317365269461078e-06,
1692
+ "loss": 0.1792,
1693
+ "step": 2360
1694
+ },
1695
+ {
1696
+ "epoch": 1.6507356141725429,
1697
+ "grad_norm": 0.1882832944393158,
1698
+ "learning_rate": 9.269461077844312e-06,
1699
+ "loss": 0.1871,
1700
+ "step": 2370
1701
+ },
1702
+ {
1703
+ "epoch": 1.6577000087054932,
1704
+ "grad_norm": 0.323946475982666,
1705
+ "learning_rate": 9.221556886227547e-06,
1706
+ "loss": 0.1783,
1707
+ "step": 2380
1708
+ },
1709
+ {
1710
+ "epoch": 1.6646644032384434,
1711
+ "grad_norm": 0.36734339594841003,
1712
+ "learning_rate": 9.17365269461078e-06,
1713
+ "loss": 0.1591,
1714
+ "step": 2390
1715
+ },
1716
+ {
1717
+ "epoch": 1.6646644032384434,
1718
+ "eval_loss": 0.17429304122924805,
1719
+ "eval_runtime": 356.3726,
1720
+ "eval_samples_per_second": 3.393,
1721
+ "eval_steps_per_second": 1.698,
1722
+ "step": 2390
1723
+ }
1724
+ ],
1725
+ "logging_steps": 10,
1726
+ "max_steps": 4305,
1727
+ "num_input_tokens_seen": 0,
1728
+ "num_train_epochs": 3,
1729
+ "save_steps": 478,
1730
+ "stateful_callbacks": {
1731
+ "TrainerControl": {
1732
+ "args": {
1733
+ "should_epoch_stop": false,
1734
+ "should_evaluate": false,
1735
+ "should_log": false,
1736
+ "should_save": true,
1737
+ "should_training_stop": false
1738
+ },
1739
+ "attributes": {}
1740
+ }
1741
+ },
1742
+ "total_flos": 1.1148432793606226e+18,
1743
+ "train_batch_size": 2,
1744
+ "trial_name": null,
1745
+ "trial_params": null
1746
+ }
checkpoint-2390/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:956bd3fe04334201160cff0d2e3faf62e32c6c8f709339d63e01a7d4b73b24d0
3
+ size 5560
checkpoint-2868/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Salesforce/Llama-xLAM-2-8b-fc-r
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
checkpoint-2868/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "k_proj",
32
+ "up_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
checkpoint-2868/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec9f0d863dc3abd8902dd4ed843c361bd23b39006098c4932637619ebc429f7
3
+ size 671149168
checkpoint-2868/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a1cc3180b14b945a606fce84ee452c58beac7ee876f473622cc297c6b92e9b
3
+ size 1342555602
checkpoint-2868/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ff15f0e116f9cdd6bd365aee046c41884d38316eafb16ee442c6cfc286e73c
3
+ size 14244
checkpoint-2868/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2459b29e12012943fb8a735fbcda66f1d5e11ec6991497fe73fc6eb956e320d3
3
+ size 1064
checkpoint-2868/special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|eot_id|>",
4
+ "<|eom_id|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|begin_of_text|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|eot_id|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|eot_id|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
checkpoint-2868/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a55c1a4c5e3af7f2fb2bc0cd245a09dabd742dc24e7cb3741db1e11c7fe1a52
3
+ size 17210019
checkpoint-2868/tokenizer_config.json ADDED
@@ -0,0 +1,2070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "additional_special_tokens": [
2053
+ "<|eot_id|>",
2054
+ "<|eom_id|>"
2055
+ ],
2056
+ "bos_token": "<|begin_of_text|>",
2057
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- Extract system message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] | trim %}\n {%- set messages = messages[1:] %}\n {{- system_message + \"\\n\" }}\n{%- else %}\n {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n {{- system_message + \"\\n\" }}\n {%- if tools is not none %}\n {{- format_instruction + \"\\n\\n\" }}\n {%- endif %}\n{%- endif %}\n\n\n{%- if tools is not none %}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- \"<|eot_id|>\" }}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {%- if message['tool_calls'] %}\n {{- \"[\" }}\n {%- for tool_call_function in message.tool_calls %}\n {%- set tool_call = tool_call_function.function %}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message['content'] %}\n {{- message['content'] | trim + '<|eot_id|>' }}\n {%- else %}\n {{- \"[]\\n\" + '<|eot_id|>' }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>\" + \"ipython\" + \"<|end_header_id|>\\n\\n\" }}\n {%- set content = message[\"content\"] %}\n {%- if content is mapping or (content is iterable and content is not string) %}\n {{- content | tojson }}\n {%- else %}\n {{- content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}",
2058
+ "clean_up_tokenization_spaces": true,
2059
+ "eos_token": "<|eot_id|>",
2060
+ "extra_special_tokens": {},
2061
+ "model_input_names": [
2062
+ "input_ids",
2063
+ "attention_mask"
2064
+ ],
2065
+ "model_max_length": 16384,
2066
+ "pad_token": "<|eot_id|>",
2067
+ "padding_side": "right",
2068
+ "split_special_tokens": false,
2069
+ "tokenizer_class": "PreTrainedTokenizerFast"
2070
+ }
checkpoint-2868/trainer_state.json ADDED
@@ -0,0 +1,2083 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.15988580882549286,
3
+ "best_model_checkpoint": "./xlam_Qlora_out_eval/checkpoint-2868",
4
+ "epoch": 1.9975624619134673,
5
+ "eval_steps": 478,
6
+ "global_step": 2868,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.006964394532950292,
13
+ "grad_norm": 0.29273906350135803,
14
+ "learning_rate": 1.5384615384615387e-06,
15
+ "loss": 1.8834,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.013928789065900584,
20
+ "grad_norm": 0.3107926845550537,
21
+ "learning_rate": 3.0769230769230774e-06,
22
+ "loss": 1.876,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.020893183598850874,
27
+ "grad_norm": 0.3098163902759552,
28
+ "learning_rate": 4.615384615384616e-06,
29
+ "loss": 1.9099,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.027857578131801168,
34
+ "grad_norm": 0.36715859174728394,
35
+ "learning_rate": 6.153846153846155e-06,
36
+ "loss": 1.8142,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.034821972664751455,
41
+ "grad_norm": 0.3511175215244293,
42
+ "learning_rate": 7.692307692307694e-06,
43
+ "loss": 1.7271,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04178636719770175,
48
+ "grad_norm": 0.3458055853843689,
49
+ "learning_rate": 9.230769230769232e-06,
50
+ "loss": 1.6073,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.04875076173065204,
55
+ "grad_norm": 0.37719297409057617,
56
+ "learning_rate": 1.076923076923077e-05,
57
+ "loss": 1.4997,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.055715156263602336,
62
+ "grad_norm": 0.30273741483688354,
63
+ "learning_rate": 1.230769230769231e-05,
64
+ "loss": 1.2914,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.06267955079655263,
69
+ "grad_norm": 0.3690374195575714,
70
+ "learning_rate": 1.3846153846153847e-05,
71
+ "loss": 1.0086,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.06964394532950291,
76
+ "grad_norm": 0.3111755847930908,
77
+ "learning_rate": 1.5384615384615387e-05,
78
+ "loss": 0.7469,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.0766083398624532,
83
+ "grad_norm": 0.13778822124004364,
84
+ "learning_rate": 1.6923076923076924e-05,
85
+ "loss": 0.5848,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.0835727343954035,
90
+ "grad_norm": 0.10629754513502121,
91
+ "learning_rate": 1.8461538461538465e-05,
92
+ "loss": 0.5366,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.09053712892835379,
97
+ "grad_norm": 0.09681010991334915,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5038,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.09750152346130408,
104
+ "grad_norm": 0.10756956040859222,
105
+ "learning_rate": 1.9952095808383235e-05,
106
+ "loss": 0.5002,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.10446591799425438,
111
+ "grad_norm": 0.10488647222518921,
112
+ "learning_rate": 1.9904191616766468e-05,
113
+ "loss": 0.5107,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.11143031252720467,
118
+ "grad_norm": 0.09359394758939743,
119
+ "learning_rate": 1.98562874251497e-05,
120
+ "loss": 0.4604,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.11839470706015495,
125
+ "grad_norm": 0.10392932593822479,
126
+ "learning_rate": 1.9808383233532935e-05,
127
+ "loss": 0.484,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.12535910159310526,
132
+ "grad_norm": 0.12986360490322113,
133
+ "learning_rate": 1.9760479041916168e-05,
134
+ "loss": 0.4527,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.13232349612605554,
139
+ "grad_norm": 0.14294420182704926,
140
+ "learning_rate": 1.97125748502994e-05,
141
+ "loss": 0.4661,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.13928789065900582,
146
+ "grad_norm": 0.18483737111091614,
147
+ "learning_rate": 1.9664670658682635e-05,
148
+ "loss": 0.4395,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.14625228519195613,
153
+ "grad_norm": 0.21114414930343628,
154
+ "learning_rate": 1.961676646706587e-05,
155
+ "loss": 0.4071,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.1532166797249064,
160
+ "grad_norm": 0.21779198944568634,
161
+ "learning_rate": 1.95688622754491e-05,
162
+ "loss": 0.3846,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.16018107425785671,
167
+ "grad_norm": 0.1294960230588913,
168
+ "learning_rate": 1.9520958083832338e-05,
169
+ "loss": 0.3952,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.167145468790807,
174
+ "grad_norm": 0.0927898958325386,
175
+ "learning_rate": 1.9473053892215568e-05,
176
+ "loss": 0.4112,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.1741098633237573,
181
+ "grad_norm": 0.10358591377735138,
182
+ "learning_rate": 1.9425149700598805e-05,
183
+ "loss": 0.406,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.18107425785670758,
188
+ "grad_norm": 0.13144026696681976,
189
+ "learning_rate": 1.9377245508982038e-05,
190
+ "loss": 0.4058,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.18803865238965786,
195
+ "grad_norm": 0.12745942175388336,
196
+ "learning_rate": 1.932934131736527e-05,
197
+ "loss": 0.3938,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.19500304692260817,
202
+ "grad_norm": 0.09832354635000229,
203
+ "learning_rate": 1.9281437125748505e-05,
204
+ "loss": 0.3819,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.20196744145555845,
209
+ "grad_norm": 0.0927552729845047,
210
+ "learning_rate": 1.9233532934131738e-05,
211
+ "loss": 0.3944,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.20893183598850876,
216
+ "grad_norm": 0.09627388417720795,
217
+ "learning_rate": 1.918562874251497e-05,
218
+ "loss": 0.348,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.21589623052145904,
223
+ "grad_norm": 0.09034324437379837,
224
+ "learning_rate": 1.9137724550898205e-05,
225
+ "loss": 0.3521,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.22286062505440934,
230
+ "grad_norm": 0.09156423062086105,
231
+ "learning_rate": 1.9089820359281438e-05,
232
+ "loss": 0.3663,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.22982501958735962,
237
+ "grad_norm": 0.0996803268790245,
238
+ "learning_rate": 1.904191616766467e-05,
239
+ "loss": 0.3347,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.2367894141203099,
244
+ "grad_norm": 0.09962292015552521,
245
+ "learning_rate": 1.8994011976047905e-05,
246
+ "loss": 0.373,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.2437538086532602,
251
+ "grad_norm": 0.08211114257574081,
252
+ "learning_rate": 1.894610778443114e-05,
253
+ "loss": 0.3641,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.2507182031862105,
258
+ "grad_norm": 0.10449781268835068,
259
+ "learning_rate": 1.889820359281437e-05,
260
+ "loss": 0.3358,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.2576825977191608,
265
+ "grad_norm": 0.11216866970062256,
266
+ "learning_rate": 1.8850299401197608e-05,
267
+ "loss": 0.372,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.2646469922521111,
272
+ "grad_norm": 0.11125332862138748,
273
+ "learning_rate": 1.8802395209580838e-05,
274
+ "loss": 0.3521,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.27161138678506136,
279
+ "grad_norm": 0.1620815545320511,
280
+ "learning_rate": 1.8754491017964075e-05,
281
+ "loss": 0.3726,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.27857578131801164,
286
+ "grad_norm": 0.11439554393291473,
287
+ "learning_rate": 1.8706586826347305e-05,
288
+ "loss": 0.3287,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.285540175850962,
293
+ "grad_norm": 0.10733096301555634,
294
+ "learning_rate": 1.865868263473054e-05,
295
+ "loss": 0.3459,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.29250457038391225,
300
+ "grad_norm": 0.12426561117172241,
301
+ "learning_rate": 1.8610778443113775e-05,
302
+ "loss": 0.3705,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.29946896491686253,
307
+ "grad_norm": 0.1148887649178505,
308
+ "learning_rate": 1.8562874251497008e-05,
309
+ "loss": 0.3216,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.3064333594498128,
314
+ "grad_norm": 0.10842527449131012,
315
+ "learning_rate": 1.851497005988024e-05,
316
+ "loss": 0.3502,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.31339775398276315,
321
+ "grad_norm": 0.11736515164375305,
322
+ "learning_rate": 1.8467065868263475e-05,
323
+ "loss": 0.3394,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.32036214851571343,
328
+ "grad_norm": 0.1381833553314209,
329
+ "learning_rate": 1.8419161676646708e-05,
330
+ "loss": 0.3166,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.3273265430486637,
335
+ "grad_norm": 0.10973569750785828,
336
+ "learning_rate": 1.837125748502994e-05,
337
+ "loss": 0.3245,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.3328980586750239,
342
+ "eval_loss": 0.3075329661369324,
343
+ "eval_runtime": 356.0276,
344
+ "eval_samples_per_second": 3.396,
345
+ "eval_steps_per_second": 1.699,
346
+ "step": 478
347
+ },
348
+ {
349
+ "epoch": 0.334290937581614,
350
+ "grad_norm": 0.13474580645561218,
351
+ "learning_rate": 1.8323353293413175e-05,
352
+ "loss": 0.2914,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.34125533211456427,
357
+ "grad_norm": 0.12812362611293793,
358
+ "learning_rate": 1.827544910179641e-05,
359
+ "loss": 0.3327,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.3482197266475146,
364
+ "grad_norm": 0.11661785840988159,
365
+ "learning_rate": 1.822754491017964e-05,
366
+ "loss": 0.3468,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.3551841211804649,
371
+ "grad_norm": 0.12990142405033112,
372
+ "learning_rate": 1.8179640718562878e-05,
373
+ "loss": 0.3318,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.36214851571341516,
378
+ "grad_norm": 0.1309327632188797,
379
+ "learning_rate": 1.8131736526946108e-05,
380
+ "loss": 0.2942,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.36911291024636544,
385
+ "grad_norm": 0.12246479839086533,
386
+ "learning_rate": 1.8083832335329345e-05,
387
+ "loss": 0.3751,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.3760773047793157,
392
+ "grad_norm": 0.14134123921394348,
393
+ "learning_rate": 1.8035928143712575e-05,
394
+ "loss": 0.3248,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.38304169931226606,
399
+ "grad_norm": 0.1433170884847641,
400
+ "learning_rate": 1.798802395209581e-05,
401
+ "loss": 0.3344,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.39000609384521634,
406
+ "grad_norm": 0.13364745676517487,
407
+ "learning_rate": 1.7940119760479045e-05,
408
+ "loss": 0.2928,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.3969704883781666,
413
+ "grad_norm": 0.13446496427059174,
414
+ "learning_rate": 1.7892215568862278e-05,
415
+ "loss": 0.3421,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.4039348829111169,
420
+ "grad_norm": 0.1586250364780426,
421
+ "learning_rate": 1.784431137724551e-05,
422
+ "loss": 0.3099,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.4108992774440672,
427
+ "grad_norm": 0.1283930391073227,
428
+ "learning_rate": 1.7796407185628745e-05,
429
+ "loss": 0.3123,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.4178636719770175,
434
+ "grad_norm": 0.14261852204799652,
435
+ "learning_rate": 1.7748502994011978e-05,
436
+ "loss": 0.2886,
437
+ "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.4248280665099678,
441
+ "grad_norm": 0.13329675793647766,
442
+ "learning_rate": 1.770059880239521e-05,
443
+ "loss": 0.3255,
444
+ "step": 610
445
+ },
446
+ {
447
+ "epoch": 0.4317924610429181,
448
+ "grad_norm": 0.13880160450935364,
449
+ "learning_rate": 1.7652694610778445e-05,
450
+ "loss": 0.326,
451
+ "step": 620
452
+ },
453
+ {
454
+ "epoch": 0.43875685557586835,
455
+ "grad_norm": 0.14388258755207062,
456
+ "learning_rate": 1.7604790419161678e-05,
457
+ "loss": 0.3269,
458
+ "step": 630
459
+ },
460
+ {
461
+ "epoch": 0.4457212501088187,
462
+ "grad_norm": 0.1571163535118103,
463
+ "learning_rate": 1.755688622754491e-05,
464
+ "loss": 0.2919,
465
+ "step": 640
466
+ },
467
+ {
468
+ "epoch": 0.45268564464176897,
469
+ "grad_norm": 0.15307635068893433,
470
+ "learning_rate": 1.7508982035928145e-05,
471
+ "loss": 0.2773,
472
+ "step": 650
473
+ },
474
+ {
475
+ "epoch": 0.45965003917471925,
476
+ "grad_norm": 0.1438165158033371,
477
+ "learning_rate": 1.7461077844311378e-05,
478
+ "loss": 0.2847,
479
+ "step": 660
480
+ },
481
+ {
482
+ "epoch": 0.46661443370766953,
483
+ "grad_norm": 0.15620216727256775,
484
+ "learning_rate": 1.741317365269461e-05,
485
+ "loss": 0.29,
486
+ "step": 670
487
+ },
488
+ {
489
+ "epoch": 0.4735788282406198,
490
+ "grad_norm": 0.1414933055639267,
491
+ "learning_rate": 1.7365269461077845e-05,
492
+ "loss": 0.2754,
493
+ "step": 680
494
+ },
495
+ {
496
+ "epoch": 0.48054322277357014,
497
+ "grad_norm": 0.15853798389434814,
498
+ "learning_rate": 1.7317365269461078e-05,
499
+ "loss": 0.3062,
500
+ "step": 690
501
+ },
502
+ {
503
+ "epoch": 0.4875076173065204,
504
+ "grad_norm": 0.17159980535507202,
505
+ "learning_rate": 1.726946107784431e-05,
506
+ "loss": 0.2953,
507
+ "step": 700
508
+ },
509
+ {
510
+ "epoch": 0.4944720118394707,
511
+ "grad_norm": 0.16937299072742462,
512
+ "learning_rate": 1.7221556886227548e-05,
513
+ "loss": 0.2895,
514
+ "step": 710
515
+ },
516
+ {
517
+ "epoch": 0.501436406372421,
518
+ "grad_norm": 0.1455833464860916,
519
+ "learning_rate": 1.717365269461078e-05,
520
+ "loss": 0.2912,
521
+ "step": 720
522
+ },
523
+ {
524
+ "epoch": 0.5084008009053713,
525
+ "grad_norm": 0.15824872255325317,
526
+ "learning_rate": 1.7125748502994015e-05,
527
+ "loss": 0.3012,
528
+ "step": 730
529
+ },
530
+ {
531
+ "epoch": 0.5153651954383216,
532
+ "grad_norm": 0.1499851644039154,
533
+ "learning_rate": 1.7077844311377248e-05,
534
+ "loss": 0.263,
535
+ "step": 740
536
+ },
537
+ {
538
+ "epoch": 0.5223295899712719,
539
+ "grad_norm": 0.1611323356628418,
540
+ "learning_rate": 1.702994011976048e-05,
541
+ "loss": 0.2595,
542
+ "step": 750
543
+ },
544
+ {
545
+ "epoch": 0.5292939845042222,
546
+ "grad_norm": 0.16144199669361115,
547
+ "learning_rate": 1.6982035928143714e-05,
548
+ "loss": 0.2939,
549
+ "step": 760
550
+ },
551
+ {
552
+ "epoch": 0.5362583790371724,
553
+ "grad_norm": 0.13433364033699036,
554
+ "learning_rate": 1.6934131736526948e-05,
555
+ "loss": 0.2817,
556
+ "step": 770
557
+ },
558
+ {
559
+ "epoch": 0.5432227735701227,
560
+ "grad_norm": 0.15188777446746826,
561
+ "learning_rate": 1.688622754491018e-05,
562
+ "loss": 0.2804,
563
+ "step": 780
564
+ },
565
+ {
566
+ "epoch": 0.550187168103073,
567
+ "grad_norm": 0.1530751883983612,
568
+ "learning_rate": 1.6838323353293414e-05,
569
+ "loss": 0.298,
570
+ "step": 790
571
+ },
572
+ {
573
+ "epoch": 0.5571515626360233,
574
+ "grad_norm": 0.14582635462284088,
575
+ "learning_rate": 1.6790419161676648e-05,
576
+ "loss": 0.3177,
577
+ "step": 800
578
+ },
579
+ {
580
+ "epoch": 0.5641159571689737,
581
+ "grad_norm": 0.174430251121521,
582
+ "learning_rate": 1.674251497005988e-05,
583
+ "loss": 0.2624,
584
+ "step": 810
585
+ },
586
+ {
587
+ "epoch": 0.571080351701924,
588
+ "grad_norm": 0.1558169573545456,
589
+ "learning_rate": 1.6694610778443114e-05,
590
+ "loss": 0.2631,
591
+ "step": 820
592
+ },
593
+ {
594
+ "epoch": 0.5780447462348742,
595
+ "grad_norm": 0.17108450829982758,
596
+ "learning_rate": 1.6646706586826348e-05,
597
+ "loss": 0.2903,
598
+ "step": 830
599
+ },
600
+ {
601
+ "epoch": 0.5850091407678245,
602
+ "grad_norm": 0.16796821355819702,
603
+ "learning_rate": 1.659880239520958e-05,
604
+ "loss": 0.2928,
605
+ "step": 840
606
+ },
607
+ {
608
+ "epoch": 0.5919735353007748,
609
+ "grad_norm": 0.16556940972805023,
610
+ "learning_rate": 1.6550898203592814e-05,
611
+ "loss": 0.2882,
612
+ "step": 850
613
+ },
614
+ {
615
+ "epoch": 0.5989379298337251,
616
+ "grad_norm": 0.1716109961271286,
617
+ "learning_rate": 1.650299401197605e-05,
618
+ "loss": 0.2814,
619
+ "step": 860
620
+ },
621
+ {
622
+ "epoch": 0.6059023243666753,
623
+ "grad_norm": 0.1695253700017929,
624
+ "learning_rate": 1.645508982035928e-05,
625
+ "loss": 0.2909,
626
+ "step": 870
627
+ },
628
+ {
629
+ "epoch": 0.6128667188996256,
630
+ "grad_norm": 0.1896178424358368,
631
+ "learning_rate": 1.6407185628742518e-05,
632
+ "loss": 0.2909,
633
+ "step": 880
634
+ },
635
+ {
636
+ "epoch": 0.6198311134325759,
637
+ "grad_norm": 0.1624131202697754,
638
+ "learning_rate": 1.6359281437125748e-05,
639
+ "loss": 0.2672,
640
+ "step": 890
641
+ },
642
+ {
643
+ "epoch": 0.6267955079655263,
644
+ "grad_norm": 0.16821731626987457,
645
+ "learning_rate": 1.6311377245508984e-05,
646
+ "loss": 0.2626,
647
+ "step": 900
648
+ },
649
+ {
650
+ "epoch": 0.6337599024984766,
651
+ "grad_norm": 0.17026785016059875,
652
+ "learning_rate": 1.6263473053892214e-05,
653
+ "loss": 0.2735,
654
+ "step": 910
655
+ },
656
+ {
657
+ "epoch": 0.6407242970314269,
658
+ "grad_norm": 0.19429761171340942,
659
+ "learning_rate": 1.621556886227545e-05,
660
+ "loss": 0.2605,
661
+ "step": 920
662
+ },
663
+ {
664
+ "epoch": 0.6476886915643771,
665
+ "grad_norm": 0.16931040585041046,
666
+ "learning_rate": 1.6167664670658684e-05,
667
+ "loss": 0.2722,
668
+ "step": 930
669
+ },
670
+ {
671
+ "epoch": 0.6546530860973274,
672
+ "grad_norm": 0.20588335394859314,
673
+ "learning_rate": 1.6119760479041918e-05,
674
+ "loss": 0.301,
675
+ "step": 940
676
+ },
677
+ {
678
+ "epoch": 0.6616174806302777,
679
+ "grad_norm": 0.16699948906898499,
680
+ "learning_rate": 1.607185628742515e-05,
681
+ "loss": 0.2646,
682
+ "step": 950
683
+ },
684
+ {
685
+ "epoch": 0.6657961173500478,
686
+ "eval_loss": 0.2465677410364151,
687
+ "eval_runtime": 356.1016,
688
+ "eval_samples_per_second": 3.395,
689
+ "eval_steps_per_second": 1.699,
690
+ "step": 956
691
+ },
692
+ {
693
+ "epoch": 0.668581875163228,
694
+ "grad_norm": 0.20734217762947083,
695
+ "learning_rate": 1.6023952095808384e-05,
696
+ "loss": 0.2476,
697
+ "step": 960
698
+ },
699
+ {
700
+ "epoch": 0.6755462696961783,
701
+ "grad_norm": 0.21317210793495178,
702
+ "learning_rate": 1.5976047904191618e-05,
703
+ "loss": 0.2434,
704
+ "step": 970
705
+ },
706
+ {
707
+ "epoch": 0.6825106642291285,
708
+ "grad_norm": 0.19125515222549438,
709
+ "learning_rate": 1.592814371257485e-05,
710
+ "loss": 0.2823,
711
+ "step": 980
712
+ },
713
+ {
714
+ "epoch": 0.6894750587620788,
715
+ "grad_norm": 0.1651381254196167,
716
+ "learning_rate": 1.5880239520958084e-05,
717
+ "loss": 0.2769,
718
+ "step": 990
719
+ },
720
+ {
721
+ "epoch": 0.6964394532950292,
722
+ "grad_norm": 0.18061324954032898,
723
+ "learning_rate": 1.583233532934132e-05,
724
+ "loss": 0.2788,
725
+ "step": 1000
726
+ },
727
+ {
728
+ "epoch": 0.7034038478279795,
729
+ "grad_norm": 0.17788180708885193,
730
+ "learning_rate": 1.578443113772455e-05,
731
+ "loss": 0.2596,
732
+ "step": 1010
733
+ },
734
+ {
735
+ "epoch": 0.7103682423609298,
736
+ "grad_norm": 0.19635842740535736,
737
+ "learning_rate": 1.5736526946107788e-05,
738
+ "loss": 0.2391,
739
+ "step": 1020
740
+ },
741
+ {
742
+ "epoch": 0.71733263689388,
743
+ "grad_norm": 0.18000468611717224,
744
+ "learning_rate": 1.5688622754491018e-05,
745
+ "loss": 0.2589,
746
+ "step": 1030
747
+ },
748
+ {
749
+ "epoch": 0.7242970314268303,
750
+ "grad_norm": 0.18016283214092255,
751
+ "learning_rate": 1.5640718562874254e-05,
752
+ "loss": 0.2673,
753
+ "step": 1040
754
+ },
755
+ {
756
+ "epoch": 0.7312614259597806,
757
+ "grad_norm": 0.2085445076227188,
758
+ "learning_rate": 1.5592814371257484e-05,
759
+ "loss": 0.295,
760
+ "step": 1050
761
+ },
762
+ {
763
+ "epoch": 0.7382258204927309,
764
+ "grad_norm": 0.1937682181596756,
765
+ "learning_rate": 1.554491017964072e-05,
766
+ "loss": 0.2741,
767
+ "step": 1060
768
+ },
769
+ {
770
+ "epoch": 0.7451902150256812,
771
+ "grad_norm": 0.18496188521385193,
772
+ "learning_rate": 1.5497005988023954e-05,
773
+ "loss": 0.2485,
774
+ "step": 1070
775
+ },
776
+ {
777
+ "epoch": 0.7521546095586314,
778
+ "grad_norm": 0.21024686098098755,
779
+ "learning_rate": 1.5449101796407188e-05,
780
+ "loss": 0.2569,
781
+ "step": 1080
782
+ },
783
+ {
784
+ "epoch": 0.7591190040915818,
785
+ "grad_norm": 0.1960270255804062,
786
+ "learning_rate": 1.540119760479042e-05,
787
+ "loss": 0.2335,
788
+ "step": 1090
789
+ },
790
+ {
791
+ "epoch": 0.7660833986245321,
792
+ "grad_norm": 0.19557876884937286,
793
+ "learning_rate": 1.5353293413173654e-05,
794
+ "loss": 0.279,
795
+ "step": 1100
796
+ },
797
+ {
798
+ "epoch": 0.7730477931574824,
799
+ "grad_norm": 0.17300210893154144,
800
+ "learning_rate": 1.5305389221556888e-05,
801
+ "loss": 0.249,
802
+ "step": 1110
803
+ },
804
+ {
805
+ "epoch": 0.7800121876904327,
806
+ "grad_norm": 0.1768539994955063,
807
+ "learning_rate": 1.5257485029940121e-05,
808
+ "loss": 0.2415,
809
+ "step": 1120
810
+ },
811
+ {
812
+ "epoch": 0.786976582223383,
813
+ "grad_norm": 0.20632915198802948,
814
+ "learning_rate": 1.5209580838323354e-05,
815
+ "loss": 0.2312,
816
+ "step": 1130
817
+ },
818
+ {
819
+ "epoch": 0.7939409767563332,
820
+ "grad_norm": 0.1948348730802536,
821
+ "learning_rate": 1.516167664670659e-05,
822
+ "loss": 0.2415,
823
+ "step": 1140
824
+ },
825
+ {
826
+ "epoch": 0.8009053712892835,
827
+ "grad_norm": 0.16288821399211884,
828
+ "learning_rate": 1.511377245508982e-05,
829
+ "loss": 0.2571,
830
+ "step": 1150
831
+ },
832
+ {
833
+ "epoch": 0.8078697658222338,
834
+ "grad_norm": 0.21365194022655487,
835
+ "learning_rate": 1.5065868263473056e-05,
836
+ "loss": 0.2594,
837
+ "step": 1160
838
+ },
839
+ {
840
+ "epoch": 0.8148341603551841,
841
+ "grad_norm": 0.20960865914821625,
842
+ "learning_rate": 1.5017964071856287e-05,
843
+ "loss": 0.2537,
844
+ "step": 1170
845
+ },
846
+ {
847
+ "epoch": 0.8217985548881344,
848
+ "grad_norm": 0.20458345115184784,
849
+ "learning_rate": 1.4970059880239522e-05,
850
+ "loss": 0.2675,
851
+ "step": 1180
852
+ },
853
+ {
854
+ "epoch": 0.8287629494210847,
855
+ "grad_norm": 0.22388549149036407,
856
+ "learning_rate": 1.4922155688622754e-05,
857
+ "loss": 0.2525,
858
+ "step": 1190
859
+ },
860
+ {
861
+ "epoch": 0.835727343954035,
862
+ "grad_norm": 0.2139691859483719,
863
+ "learning_rate": 1.4874251497005989e-05,
864
+ "loss": 0.251,
865
+ "step": 1200
866
+ },
867
+ {
868
+ "epoch": 0.8426917384869853,
869
+ "grad_norm": 0.18761980533599854,
870
+ "learning_rate": 1.4826347305389224e-05,
871
+ "loss": 0.2328,
872
+ "step": 1210
873
+ },
874
+ {
875
+ "epoch": 0.8496561330199356,
876
+ "grad_norm": 0.183973029255867,
877
+ "learning_rate": 1.4778443113772456e-05,
878
+ "loss": 0.2239,
879
+ "step": 1220
880
+ },
881
+ {
882
+ "epoch": 0.8566205275528859,
883
+ "grad_norm": 0.23718492686748505,
884
+ "learning_rate": 1.473053892215569e-05,
885
+ "loss": 0.2423,
886
+ "step": 1230
887
+ },
888
+ {
889
+ "epoch": 0.8635849220858361,
890
+ "grad_norm": 0.23875045776367188,
891
+ "learning_rate": 1.4682634730538922e-05,
892
+ "loss": 0.2365,
893
+ "step": 1240
894
+ },
895
+ {
896
+ "epoch": 0.8705493166187864,
897
+ "grad_norm": 0.21570812165737152,
898
+ "learning_rate": 1.4634730538922157e-05,
899
+ "loss": 0.268,
900
+ "step": 1250
901
+ },
902
+ {
903
+ "epoch": 0.8775137111517367,
904
+ "grad_norm": 0.20839928090572357,
905
+ "learning_rate": 1.4586826347305389e-05,
906
+ "loss": 0.2502,
907
+ "step": 1260
908
+ },
909
+ {
910
+ "epoch": 0.884478105684687,
911
+ "grad_norm": 0.21605758368968964,
912
+ "learning_rate": 1.4538922155688624e-05,
913
+ "loss": 0.2365,
914
+ "step": 1270
915
+ },
916
+ {
917
+ "epoch": 0.8914425002176374,
918
+ "grad_norm": 0.23300880193710327,
919
+ "learning_rate": 1.4491017964071859e-05,
920
+ "loss": 0.2404,
921
+ "step": 1280
922
+ },
923
+ {
924
+ "epoch": 0.8984068947505877,
925
+ "grad_norm": 0.21796733140945435,
926
+ "learning_rate": 1.444311377245509e-05,
927
+ "loss": 0.2704,
928
+ "step": 1290
929
+ },
930
+ {
931
+ "epoch": 0.9053712892835379,
932
+ "grad_norm": 0.25555598735809326,
933
+ "learning_rate": 1.4395209580838326e-05,
934
+ "loss": 0.2674,
935
+ "step": 1300
936
+ },
937
+ {
938
+ "epoch": 0.9123356838164882,
939
+ "grad_norm": 0.22284413874149323,
940
+ "learning_rate": 1.4347305389221557e-05,
941
+ "loss": 0.2471,
942
+ "step": 1310
943
+ },
944
+ {
945
+ "epoch": 0.9193000783494385,
946
+ "grad_norm": 0.22234566509723663,
947
+ "learning_rate": 1.4299401197604792e-05,
948
+ "loss": 0.2529,
949
+ "step": 1320
950
+ },
951
+ {
952
+ "epoch": 0.9262644728823888,
953
+ "grad_norm": 0.221883624792099,
954
+ "learning_rate": 1.4251497005988024e-05,
955
+ "loss": 0.1932,
956
+ "step": 1330
957
+ },
958
+ {
959
+ "epoch": 0.9332288674153391,
960
+ "grad_norm": 0.22873520851135254,
961
+ "learning_rate": 1.4203592814371259e-05,
962
+ "loss": 0.2412,
963
+ "step": 1340
964
+ },
965
+ {
966
+ "epoch": 0.9401932619482893,
967
+ "grad_norm": 0.21307863295078278,
968
+ "learning_rate": 1.4155688622754492e-05,
969
+ "loss": 0.2384,
970
+ "step": 1350
971
+ },
972
+ {
973
+ "epoch": 0.9471576564812396,
974
+ "grad_norm": 0.21296364068984985,
975
+ "learning_rate": 1.4107784431137726e-05,
976
+ "loss": 0.2318,
977
+ "step": 1360
978
+ },
979
+ {
980
+ "epoch": 0.95412205101419,
981
+ "grad_norm": 0.21060685813426971,
982
+ "learning_rate": 1.405988023952096e-05,
983
+ "loss": 0.2323,
984
+ "step": 1370
985
+ },
986
+ {
987
+ "epoch": 0.9610864455471403,
988
+ "grad_norm": 0.23930418491363525,
989
+ "learning_rate": 1.4011976047904192e-05,
990
+ "loss": 0.2398,
991
+ "step": 1380
992
+ },
993
+ {
994
+ "epoch": 0.9680508400800906,
995
+ "grad_norm": 0.22837185859680176,
996
+ "learning_rate": 1.3964071856287427e-05,
997
+ "loss": 0.233,
998
+ "step": 1390
999
+ },
1000
+ {
1001
+ "epoch": 0.9750152346130408,
1002
+ "grad_norm": 0.21798169612884521,
1003
+ "learning_rate": 1.3916167664670659e-05,
1004
+ "loss": 0.2161,
1005
+ "step": 1400
1006
+ },
1007
+ {
1008
+ "epoch": 0.9819796291459911,
1009
+ "grad_norm": 0.2846814692020416,
1010
+ "learning_rate": 1.3868263473053894e-05,
1011
+ "loss": 0.2301,
1012
+ "step": 1410
1013
+ },
1014
+ {
1015
+ "epoch": 0.9889440236789414,
1016
+ "grad_norm": 0.22030217945575714,
1017
+ "learning_rate": 1.3820359281437127e-05,
1018
+ "loss": 0.2017,
1019
+ "step": 1420
1020
+ },
1021
+ {
1022
+ "epoch": 0.9959084182118917,
1023
+ "grad_norm": 0.25554022192955017,
1024
+ "learning_rate": 1.377245508982036e-05,
1025
+ "loss": 0.2296,
1026
+ "step": 1430
1027
+ },
1028
+ {
1029
+ "epoch": 0.9986941760250718,
1030
+ "eval_loss": 0.21637588739395142,
1031
+ "eval_runtime": 356.0721,
1032
+ "eval_samples_per_second": 3.395,
1033
+ "eval_steps_per_second": 1.699,
1034
+ "step": 1434
1035
+ },
1036
+ {
1037
+ "epoch": 1.0030469226081657,
1038
+ "grad_norm": 0.2862723469734192,
1039
+ "learning_rate": 1.3724550898203594e-05,
1040
+ "loss": 0.2538,
1041
+ "step": 1440
1042
+ },
1043
+ {
1044
+ "epoch": 1.010011317141116,
1045
+ "grad_norm": 0.2589089870452881,
1046
+ "learning_rate": 1.3676646706586827e-05,
1047
+ "loss": 0.2196,
1048
+ "step": 1450
1049
+ },
1050
+ {
1051
+ "epoch": 1.0169757116740663,
1052
+ "grad_norm": 0.27893784642219543,
1053
+ "learning_rate": 1.362874251497006e-05,
1054
+ "loss": 0.1935,
1055
+ "step": 1460
1056
+ },
1057
+ {
1058
+ "epoch": 1.0239401062070166,
1059
+ "grad_norm": 0.2530602514743805,
1060
+ "learning_rate": 1.3580838323353294e-05,
1061
+ "loss": 0.2248,
1062
+ "step": 1470
1063
+ },
1064
+ {
1065
+ "epoch": 1.0309045007399669,
1066
+ "grad_norm": 0.24902446568012238,
1067
+ "learning_rate": 1.3532934131736529e-05,
1068
+ "loss": 0.224,
1069
+ "step": 1480
1070
+ },
1071
+ {
1072
+ "epoch": 1.0378688952729171,
1073
+ "grad_norm": 0.27132269740104675,
1074
+ "learning_rate": 1.3485029940119762e-05,
1075
+ "loss": 0.2208,
1076
+ "step": 1490
1077
+ },
1078
+ {
1079
+ "epoch": 1.0448332898058674,
1080
+ "grad_norm": 0.2570977807044983,
1081
+ "learning_rate": 1.3437125748502996e-05,
1082
+ "loss": 0.2103,
1083
+ "step": 1500
1084
+ },
1085
+ {
1086
+ "epoch": 1.0517976843388177,
1087
+ "grad_norm": 0.2994561493396759,
1088
+ "learning_rate": 1.3389221556886229e-05,
1089
+ "loss": 0.2243,
1090
+ "step": 1510
1091
+ },
1092
+ {
1093
+ "epoch": 1.058762078871768,
1094
+ "grad_norm": 0.22994251549243927,
1095
+ "learning_rate": 1.3341317365269462e-05,
1096
+ "loss": 0.2217,
1097
+ "step": 1520
1098
+ },
1099
+ {
1100
+ "epoch": 1.0657264734047183,
1101
+ "grad_norm": 0.22706159949302673,
1102
+ "learning_rate": 1.3293413173652696e-05,
1103
+ "loss": 0.2219,
1104
+ "step": 1530
1105
+ },
1106
+ {
1107
+ "epoch": 1.0726908679376688,
1108
+ "grad_norm": 0.26584964990615845,
1109
+ "learning_rate": 1.3245508982035929e-05,
1110
+ "loss": 0.2329,
1111
+ "step": 1540
1112
+ },
1113
+ {
1114
+ "epoch": 1.079655262470619,
1115
+ "grad_norm": 0.2549090087413788,
1116
+ "learning_rate": 1.3197604790419162e-05,
1117
+ "loss": 0.2235,
1118
+ "step": 1550
1119
+ },
1120
+ {
1121
+ "epoch": 1.0866196570035693,
1122
+ "grad_norm": 0.27169135212898254,
1123
+ "learning_rate": 1.3149700598802397e-05,
1124
+ "loss": 0.2332,
1125
+ "step": 1560
1126
+ },
1127
+ {
1128
+ "epoch": 1.0935840515365196,
1129
+ "grad_norm": 0.26733773946762085,
1130
+ "learning_rate": 1.3101796407185629e-05,
1131
+ "loss": 0.2062,
1132
+ "step": 1570
1133
+ },
1134
+ {
1135
+ "epoch": 1.1005484460694699,
1136
+ "grad_norm": 0.3222082853317261,
1137
+ "learning_rate": 1.3053892215568864e-05,
1138
+ "loss": 0.2111,
1139
+ "step": 1580
1140
+ },
1141
+ {
1142
+ "epoch": 1.1075128406024202,
1143
+ "grad_norm": 0.20403257012367249,
1144
+ "learning_rate": 1.3005988023952097e-05,
1145
+ "loss": 0.226,
1146
+ "step": 1590
1147
+ },
1148
+ {
1149
+ "epoch": 1.1144772351353704,
1150
+ "grad_norm": 0.2658451199531555,
1151
+ "learning_rate": 1.295808383233533e-05,
1152
+ "loss": 0.2013,
1153
+ "step": 1600
1154
+ },
1155
+ {
1156
+ "epoch": 1.1214416296683207,
1157
+ "grad_norm": 0.2719573676586151,
1158
+ "learning_rate": 1.2910179640718564e-05,
1159
+ "loss": 0.2036,
1160
+ "step": 1610
1161
+ },
1162
+ {
1163
+ "epoch": 1.128406024201271,
1164
+ "grad_norm": 0.29076331853866577,
1165
+ "learning_rate": 1.2862275449101797e-05,
1166
+ "loss": 0.2162,
1167
+ "step": 1620
1168
+ },
1169
+ {
1170
+ "epoch": 1.1353704187342213,
1171
+ "grad_norm": 0.2324078530073166,
1172
+ "learning_rate": 1.2814371257485032e-05,
1173
+ "loss": 0.2002,
1174
+ "step": 1630
1175
+ },
1176
+ {
1177
+ "epoch": 1.1423348132671716,
1178
+ "grad_norm": 0.27219468355178833,
1179
+ "learning_rate": 1.2766467065868264e-05,
1180
+ "loss": 0.2238,
1181
+ "step": 1640
1182
+ },
1183
+ {
1184
+ "epoch": 1.1492992078001218,
1185
+ "grad_norm": 0.22101271152496338,
1186
+ "learning_rate": 1.2718562874251499e-05,
1187
+ "loss": 0.1845,
1188
+ "step": 1650
1189
+ },
1190
+ {
1191
+ "epoch": 1.1562636023330721,
1192
+ "grad_norm": 0.25101369619369507,
1193
+ "learning_rate": 1.267065868263473e-05,
1194
+ "loss": 0.2296,
1195
+ "step": 1660
1196
+ },
1197
+ {
1198
+ "epoch": 1.1632279968660224,
1199
+ "grad_norm": 0.25895142555236816,
1200
+ "learning_rate": 1.2622754491017965e-05,
1201
+ "loss": 0.2271,
1202
+ "step": 1670
1203
+ },
1204
+ {
1205
+ "epoch": 1.1701923913989727,
1206
+ "grad_norm": 0.26405826210975647,
1207
+ "learning_rate": 1.2574850299401197e-05,
1208
+ "loss": 0.2159,
1209
+ "step": 1680
1210
+ },
1211
+ {
1212
+ "epoch": 1.177156785931923,
1213
+ "grad_norm": 0.23231615126132965,
1214
+ "learning_rate": 1.2526946107784432e-05,
1215
+ "loss": 0.1916,
1216
+ "step": 1690
1217
+ },
1218
+ {
1219
+ "epoch": 1.1841211804648732,
1220
+ "grad_norm": 0.3211207985877991,
1221
+ "learning_rate": 1.2479041916167665e-05,
1222
+ "loss": 0.2154,
1223
+ "step": 1700
1224
+ },
1225
+ {
1226
+ "epoch": 1.1910855749978237,
1227
+ "grad_norm": 0.2948794662952423,
1228
+ "learning_rate": 1.2431137724550899e-05,
1229
+ "loss": 0.1999,
1230
+ "step": 1710
1231
+ },
1232
+ {
1233
+ "epoch": 1.198049969530774,
1234
+ "grad_norm": 0.28081706166267395,
1235
+ "learning_rate": 1.2383233532934134e-05,
1236
+ "loss": 0.2301,
1237
+ "step": 1720
1238
+ },
1239
+ {
1240
+ "epoch": 1.2050143640637243,
1241
+ "grad_norm": 0.296283096075058,
1242
+ "learning_rate": 1.2335329341317365e-05,
1243
+ "loss": 0.2164,
1244
+ "step": 1730
1245
+ },
1246
+ {
1247
+ "epoch": 1.2119787585966746,
1248
+ "grad_norm": 0.29658278822898865,
1249
+ "learning_rate": 1.22874251497006e-05,
1250
+ "loss": 0.1916,
1251
+ "step": 1740
1252
+ },
1253
+ {
1254
+ "epoch": 1.2189431531296249,
1255
+ "grad_norm": 0.2917250990867615,
1256
+ "learning_rate": 1.2239520958083832e-05,
1257
+ "loss": 0.2045,
1258
+ "step": 1750
1259
+ },
1260
+ {
1261
+ "epoch": 1.2259075476625751,
1262
+ "grad_norm": 0.35840919613838196,
1263
+ "learning_rate": 1.2191616766467067e-05,
1264
+ "loss": 0.1965,
1265
+ "step": 1760
1266
+ },
1267
+ {
1268
+ "epoch": 1.2328719421955254,
1269
+ "grad_norm": 0.2819244861602783,
1270
+ "learning_rate": 1.2143712574850299e-05,
1271
+ "loss": 0.2032,
1272
+ "step": 1770
1273
+ },
1274
+ {
1275
+ "epoch": 1.2398363367284757,
1276
+ "grad_norm": 0.3518809974193573,
1277
+ "learning_rate": 1.2095808383233534e-05,
1278
+ "loss": 0.2043,
1279
+ "step": 1780
1280
+ },
1281
+ {
1282
+ "epoch": 1.246800731261426,
1283
+ "grad_norm": 0.3232385516166687,
1284
+ "learning_rate": 1.2047904191616769e-05,
1285
+ "loss": 0.23,
1286
+ "step": 1790
1287
+ },
1288
+ {
1289
+ "epoch": 1.2537651257943763,
1290
+ "grad_norm": 0.23131519556045532,
1291
+ "learning_rate": 1.2e-05,
1292
+ "loss": 0.2106,
1293
+ "step": 1800
1294
+ },
1295
+ {
1296
+ "epoch": 1.2607295203273265,
1297
+ "grad_norm": 0.30877017974853516,
1298
+ "learning_rate": 1.1952095808383235e-05,
1299
+ "loss": 0.2088,
1300
+ "step": 1810
1301
+ },
1302
+ {
1303
+ "epoch": 1.2676939148602768,
1304
+ "grad_norm": 0.36684754490852356,
1305
+ "learning_rate": 1.1904191616766467e-05,
1306
+ "loss": 0.2024,
1307
+ "step": 1820
1308
+ },
1309
+ {
1310
+ "epoch": 1.274658309393227,
1311
+ "grad_norm": 0.21124888956546783,
1312
+ "learning_rate": 1.1856287425149702e-05,
1313
+ "loss": 0.1929,
1314
+ "step": 1830
1315
+ },
1316
+ {
1317
+ "epoch": 1.2816227039261774,
1318
+ "grad_norm": 0.2815339267253876,
1319
+ "learning_rate": 1.1808383233532934e-05,
1320
+ "loss": 0.1973,
1321
+ "step": 1840
1322
+ },
1323
+ {
1324
+ "epoch": 1.2885870984591277,
1325
+ "grad_norm": 0.3017849922180176,
1326
+ "learning_rate": 1.1760479041916169e-05,
1327
+ "loss": 0.233,
1328
+ "step": 1850
1329
+ },
1330
+ {
1331
+ "epoch": 1.295551492992078,
1332
+ "grad_norm": 0.3456547260284424,
1333
+ "learning_rate": 1.1712574850299404e-05,
1334
+ "loss": 0.1769,
1335
+ "step": 1860
1336
+ },
1337
+ {
1338
+ "epoch": 1.3025158875250282,
1339
+ "grad_norm": 0.28210344910621643,
1340
+ "learning_rate": 1.1664670658682635e-05,
1341
+ "loss": 0.204,
1342
+ "step": 1870
1343
+ },
1344
+ {
1345
+ "epoch": 1.3094802820579785,
1346
+ "grad_norm": 0.3701432943344116,
1347
+ "learning_rate": 1.161676646706587e-05,
1348
+ "loss": 0.1923,
1349
+ "step": 1880
1350
+ },
1351
+ {
1352
+ "epoch": 1.3164446765909288,
1353
+ "grad_norm": 0.3656926453113556,
1354
+ "learning_rate": 1.1568862275449102e-05,
1355
+ "loss": 0.2136,
1356
+ "step": 1890
1357
+ },
1358
+ {
1359
+ "epoch": 1.323409071123879,
1360
+ "grad_norm": 0.29142090678215027,
1361
+ "learning_rate": 1.1520958083832337e-05,
1362
+ "loss": 0.1957,
1363
+ "step": 1900
1364
+ },
1365
+ {
1366
+ "epoch": 1.3303734656568293,
1367
+ "grad_norm": 0.2799660563468933,
1368
+ "learning_rate": 1.1473053892215569e-05,
1369
+ "loss": 0.2009,
1370
+ "step": 1910
1371
+ },
1372
+ {
1373
+ "epoch": 1.3317663445634196,
1374
+ "eval_loss": 0.19295775890350342,
1375
+ "eval_runtime": 356.0153,
1376
+ "eval_samples_per_second": 3.396,
1377
+ "eval_steps_per_second": 1.699,
1378
+ "step": 1912
1379
+ },
1380
+ {
1381
+ "epoch": 1.3373378601897796,
1382
+ "grad_norm": 0.38173311948776245,
1383
+ "learning_rate": 1.1425149700598804e-05,
1384
+ "loss": 0.2133,
1385
+ "step": 1920
1386
+ },
1387
+ {
1388
+ "epoch": 1.34430225472273,
1389
+ "grad_norm": 0.38059180974960327,
1390
+ "learning_rate": 1.1377245508982037e-05,
1391
+ "loss": 0.203,
1392
+ "step": 1930
1393
+ },
1394
+ {
1395
+ "epoch": 1.3512666492556804,
1396
+ "grad_norm": 0.2743132710456848,
1397
+ "learning_rate": 1.132934131736527e-05,
1398
+ "loss": 0.196,
1399
+ "step": 1940
1400
+ },
1401
+ {
1402
+ "epoch": 1.3582310437886307,
1403
+ "grad_norm": 0.3110567331314087,
1404
+ "learning_rate": 1.1281437125748505e-05,
1405
+ "loss": 0.1868,
1406
+ "step": 1950
1407
+ },
1408
+ {
1409
+ "epoch": 1.365195438321581,
1410
+ "grad_norm": 0.4261178970336914,
1411
+ "learning_rate": 1.1233532934131737e-05,
1412
+ "loss": 0.1952,
1413
+ "step": 1960
1414
+ },
1415
+ {
1416
+ "epoch": 1.3721598328545312,
1417
+ "grad_norm": 0.3890218436717987,
1418
+ "learning_rate": 1.1185628742514972e-05,
1419
+ "loss": 0.1964,
1420
+ "step": 1970
1421
+ },
1422
+ {
1423
+ "epoch": 1.3791242273874815,
1424
+ "grad_norm": 0.3712151050567627,
1425
+ "learning_rate": 1.1137724550898203e-05,
1426
+ "loss": 0.1933,
1427
+ "step": 1980
1428
+ },
1429
+ {
1430
+ "epoch": 1.3860886219204318,
1431
+ "grad_norm": 0.2833101153373718,
1432
+ "learning_rate": 1.1089820359281439e-05,
1433
+ "loss": 0.1862,
1434
+ "step": 1990
1435
+ },
1436
+ {
1437
+ "epoch": 1.393053016453382,
1438
+ "grad_norm": 0.3696097731590271,
1439
+ "learning_rate": 1.1041916167664672e-05,
1440
+ "loss": 0.2142,
1441
+ "step": 2000
1442
+ },
1443
+ {
1444
+ "epoch": 1.4000174109863324,
1445
+ "grad_norm": 0.4144258499145508,
1446
+ "learning_rate": 1.0994011976047905e-05,
1447
+ "loss": 0.1915,
1448
+ "step": 2010
1449
+ },
1450
+ {
1451
+ "epoch": 1.4069818055192826,
1452
+ "grad_norm": 0.28359559178352356,
1453
+ "learning_rate": 1.0946107784431138e-05,
1454
+ "loss": 0.1861,
1455
+ "step": 2020
1456
+ },
1457
+ {
1458
+ "epoch": 1.413946200052233,
1459
+ "grad_norm": 0.327510803937912,
1460
+ "learning_rate": 1.0898203592814372e-05,
1461
+ "loss": 0.1961,
1462
+ "step": 2030
1463
+ },
1464
+ {
1465
+ "epoch": 1.4209105945851832,
1466
+ "grad_norm": 0.44245216250419617,
1467
+ "learning_rate": 1.0850299401197605e-05,
1468
+ "loss": 0.2091,
1469
+ "step": 2040
1470
+ },
1471
+ {
1472
+ "epoch": 1.4278749891181335,
1473
+ "grad_norm": 0.3903510570526123,
1474
+ "learning_rate": 1.0802395209580838e-05,
1475
+ "loss": 0.2058,
1476
+ "step": 2050
1477
+ },
1478
+ {
1479
+ "epoch": 1.4348393836510838,
1480
+ "grad_norm": 0.32461392879486084,
1481
+ "learning_rate": 1.0754491017964073e-05,
1482
+ "loss": 0.1793,
1483
+ "step": 2060
1484
+ },
1485
+ {
1486
+ "epoch": 1.441803778184034,
1487
+ "grad_norm": 0.4412670433521271,
1488
+ "learning_rate": 1.0706586826347307e-05,
1489
+ "loss": 0.1856,
1490
+ "step": 2070
1491
+ },
1492
+ {
1493
+ "epoch": 1.4487681727169845,
1494
+ "grad_norm": 0.2217404693365097,
1495
+ "learning_rate": 1.065868263473054e-05,
1496
+ "loss": 0.1628,
1497
+ "step": 2080
1498
+ },
1499
+ {
1500
+ "epoch": 1.4557325672499348,
1501
+ "grad_norm": 0.3572748899459839,
1502
+ "learning_rate": 1.0610778443113773e-05,
1503
+ "loss": 0.1904,
1504
+ "step": 2090
1505
+ },
1506
+ {
1507
+ "epoch": 1.462696961782885,
1508
+ "grad_norm": 0.23523126542568207,
1509
+ "learning_rate": 1.0562874251497007e-05,
1510
+ "loss": 0.1833,
1511
+ "step": 2100
1512
+ },
1513
+ {
1514
+ "epoch": 1.4696613563158354,
1515
+ "grad_norm": 0.35970646142959595,
1516
+ "learning_rate": 1.051497005988024e-05,
1517
+ "loss": 0.2018,
1518
+ "step": 2110
1519
+ },
1520
+ {
1521
+ "epoch": 1.4766257508487857,
1522
+ "grad_norm": 0.3383442759513855,
1523
+ "learning_rate": 1.0467065868263473e-05,
1524
+ "loss": 0.1986,
1525
+ "step": 2120
1526
+ },
1527
+ {
1528
+ "epoch": 1.483590145381736,
1529
+ "grad_norm": 0.33182451128959656,
1530
+ "learning_rate": 1.0419161676646707e-05,
1531
+ "loss": 0.194,
1532
+ "step": 2130
1533
+ },
1534
+ {
1535
+ "epoch": 1.4905545399146862,
1536
+ "grad_norm": 0.3276427090167999,
1537
+ "learning_rate": 1.0371257485029942e-05,
1538
+ "loss": 0.1945,
1539
+ "step": 2140
1540
+ },
1541
+ {
1542
+ "epoch": 1.4975189344476365,
1543
+ "grad_norm": 0.30687084794044495,
1544
+ "learning_rate": 1.0323353293413173e-05,
1545
+ "loss": 0.1839,
1546
+ "step": 2150
1547
+ },
1548
+ {
1549
+ "epoch": 1.5044833289805868,
1550
+ "grad_norm": 0.3758613169193268,
1551
+ "learning_rate": 1.0275449101796408e-05,
1552
+ "loss": 0.2035,
1553
+ "step": 2160
1554
+ },
1555
+ {
1556
+ "epoch": 1.511447723513537,
1557
+ "grad_norm": 0.34436604380607605,
1558
+ "learning_rate": 1.0227544910179642e-05,
1559
+ "loss": 0.1946,
1560
+ "step": 2170
1561
+ },
1562
+ {
1563
+ "epoch": 1.5184121180464873,
1564
+ "grad_norm": 0.31638774275779724,
1565
+ "learning_rate": 1.0179640718562875e-05,
1566
+ "loss": 0.1761,
1567
+ "step": 2180
1568
+ },
1569
+ {
1570
+ "epoch": 1.5253765125794376,
1571
+ "grad_norm": 0.3935607969760895,
1572
+ "learning_rate": 1.0131736526946108e-05,
1573
+ "loss": 0.2008,
1574
+ "step": 2190
1575
+ },
1576
+ {
1577
+ "epoch": 1.532340907112388,
1578
+ "grad_norm": 0.3549712598323822,
1579
+ "learning_rate": 1.0083832335329342e-05,
1580
+ "loss": 0.1814,
1581
+ "step": 2200
1582
+ },
1583
+ {
1584
+ "epoch": 1.5393053016453382,
1585
+ "grad_norm": 0.32955431938171387,
1586
+ "learning_rate": 1.0035928143712577e-05,
1587
+ "loss": 0.1736,
1588
+ "step": 2210
1589
+ },
1590
+ {
1591
+ "epoch": 1.5462696961782885,
1592
+ "grad_norm": 0.3109774589538574,
1593
+ "learning_rate": 9.988023952095808e-06,
1594
+ "loss": 0.169,
1595
+ "step": 2220
1596
+ },
1597
+ {
1598
+ "epoch": 1.5532340907112387,
1599
+ "grad_norm": 0.4271303117275238,
1600
+ "learning_rate": 9.940119760479042e-06,
1601
+ "loss": 0.1761,
1602
+ "step": 2230
1603
+ },
1604
+ {
1605
+ "epoch": 1.560198485244189,
1606
+ "grad_norm": 0.3970726728439331,
1607
+ "learning_rate": 9.892215568862277e-06,
1608
+ "loss": 0.1897,
1609
+ "step": 2240
1610
+ },
1611
+ {
1612
+ "epoch": 1.5671628797771393,
1613
+ "grad_norm": 0.4251217544078827,
1614
+ "learning_rate": 9.84431137724551e-06,
1615
+ "loss": 0.1771,
1616
+ "step": 2250
1617
+ },
1618
+ {
1619
+ "epoch": 1.5741272743100896,
1620
+ "grad_norm": 0.28587886691093445,
1621
+ "learning_rate": 9.796407185628743e-06,
1622
+ "loss": 0.1699,
1623
+ "step": 2260
1624
+ },
1625
+ {
1626
+ "epoch": 1.5810916688430399,
1627
+ "grad_norm": 0.36768022179603577,
1628
+ "learning_rate": 9.748502994011977e-06,
1629
+ "loss": 0.1933,
1630
+ "step": 2270
1631
+ },
1632
+ {
1633
+ "epoch": 1.5880560633759901,
1634
+ "grad_norm": 0.31612274050712585,
1635
+ "learning_rate": 9.70059880239521e-06,
1636
+ "loss": 0.1944,
1637
+ "step": 2280
1638
+ },
1639
+ {
1640
+ "epoch": 1.5950204579089404,
1641
+ "grad_norm": 0.34787702560424805,
1642
+ "learning_rate": 9.652694610778443e-06,
1643
+ "loss": 0.1758,
1644
+ "step": 2290
1645
+ },
1646
+ {
1647
+ "epoch": 1.6019848524418907,
1648
+ "grad_norm": 0.4170469045639038,
1649
+ "learning_rate": 9.604790419161677e-06,
1650
+ "loss": 0.1749,
1651
+ "step": 2300
1652
+ },
1653
+ {
1654
+ "epoch": 1.608949246974841,
1655
+ "grad_norm": 0.3099391460418701,
1656
+ "learning_rate": 9.556886227544912e-06,
1657
+ "loss": 0.1752,
1658
+ "step": 2310
1659
+ },
1660
+ {
1661
+ "epoch": 1.6159136415077913,
1662
+ "grad_norm": 0.3781738877296448,
1663
+ "learning_rate": 9.508982035928145e-06,
1664
+ "loss": 0.2032,
1665
+ "step": 2320
1666
+ },
1667
+ {
1668
+ "epoch": 1.6228780360407415,
1669
+ "grad_norm": 0.29017606377601624,
1670
+ "learning_rate": 9.461077844311378e-06,
1671
+ "loss": 0.1733,
1672
+ "step": 2330
1673
+ },
1674
+ {
1675
+ "epoch": 1.629842430573692,
1676
+ "grad_norm": 0.45764532685279846,
1677
+ "learning_rate": 9.413173652694612e-06,
1678
+ "loss": 0.1765,
1679
+ "step": 2340
1680
+ },
1681
+ {
1682
+ "epoch": 1.6368068251066423,
1683
+ "grad_norm": 0.4349108040332794,
1684
+ "learning_rate": 9.365269461077845e-06,
1685
+ "loss": 0.178,
1686
+ "step": 2350
1687
+ },
1688
+ {
1689
+ "epoch": 1.6437712196395926,
1690
+ "grad_norm": 0.39599794149398804,
1691
+ "learning_rate": 9.317365269461078e-06,
1692
+ "loss": 0.1792,
1693
+ "step": 2360
1694
+ },
1695
+ {
1696
+ "epoch": 1.6507356141725429,
1697
+ "grad_norm": 0.1882832944393158,
1698
+ "learning_rate": 9.269461077844312e-06,
1699
+ "loss": 0.1871,
1700
+ "step": 2370
1701
+ },
1702
+ {
1703
+ "epoch": 1.6577000087054932,
1704
+ "grad_norm": 0.323946475982666,
1705
+ "learning_rate": 9.221556886227547e-06,
1706
+ "loss": 0.1783,
1707
+ "step": 2380
1708
+ },
1709
+ {
1710
+ "epoch": 1.6646644032384434,
1711
+ "grad_norm": 0.36734339594841003,
1712
+ "learning_rate": 9.17365269461078e-06,
1713
+ "loss": 0.1591,
1714
+ "step": 2390
1715
+ },
1716
+ {
1717
+ "epoch": 1.6646644032384434,
1718
+ "eval_loss": 0.17429304122924805,
1719
+ "eval_runtime": 356.3726,
1720
+ "eval_samples_per_second": 3.393,
1721
+ "eval_steps_per_second": 1.698,
1722
+ "step": 2390
1723
+ },
1724
+ {
1725
+ "epoch": 1.6716287977713937,
1726
+ "grad_norm": 0.5177730321884155,
1727
+ "learning_rate": 9.125748502994013e-06,
1728
+ "loss": 0.1807,
1729
+ "step": 2400
1730
+ },
1731
+ {
1732
+ "epoch": 1.678593192304344,
1733
+ "grad_norm": 0.36497658491134644,
1734
+ "learning_rate": 9.077844311377247e-06,
1735
+ "loss": 0.1898,
1736
+ "step": 2410
1737
+ },
1738
+ {
1739
+ "epoch": 1.6855575868372943,
1740
+ "grad_norm": 0.38655078411102295,
1741
+ "learning_rate": 9.02994011976048e-06,
1742
+ "loss": 0.1973,
1743
+ "step": 2420
1744
+ },
1745
+ {
1746
+ "epoch": 1.6925219813702448,
1747
+ "grad_norm": 0.264782577753067,
1748
+ "learning_rate": 8.982035928143713e-06,
1749
+ "loss": 0.1733,
1750
+ "step": 2430
1751
+ },
1752
+ {
1753
+ "epoch": 1.699486375903195,
1754
+ "grad_norm": 0.38668733835220337,
1755
+ "learning_rate": 8.934131736526946e-06,
1756
+ "loss": 0.1716,
1757
+ "step": 2440
1758
+ },
1759
+ {
1760
+ "epoch": 1.7064507704361453,
1761
+ "grad_norm": 0.32163408398628235,
1762
+ "learning_rate": 8.886227544910181e-06,
1763
+ "loss": 0.1682,
1764
+ "step": 2450
1765
+ },
1766
+ {
1767
+ "epoch": 1.7134151649690956,
1768
+ "grad_norm": 0.41438040137290955,
1769
+ "learning_rate": 8.838323353293415e-06,
1770
+ "loss": 0.1839,
1771
+ "step": 2460
1772
+ },
1773
+ {
1774
+ "epoch": 1.720379559502046,
1775
+ "grad_norm": 0.3748877942562103,
1776
+ "learning_rate": 8.790419161676648e-06,
1777
+ "loss": 0.1948,
1778
+ "step": 2470
1779
+ },
1780
+ {
1781
+ "epoch": 1.7273439540349962,
1782
+ "grad_norm": 0.3265992999076843,
1783
+ "learning_rate": 8.742514970059881e-06,
1784
+ "loss": 0.1766,
1785
+ "step": 2480
1786
+ },
1787
+ {
1788
+ "epoch": 1.7343083485679465,
1789
+ "grad_norm": 0.34820470213890076,
1790
+ "learning_rate": 8.694610778443115e-06,
1791
+ "loss": 0.1854,
1792
+ "step": 2490
1793
+ },
1794
+ {
1795
+ "epoch": 1.7412727431008967,
1796
+ "grad_norm": 0.34207549691200256,
1797
+ "learning_rate": 8.646706586826348e-06,
1798
+ "loss": 0.1728,
1799
+ "step": 2500
1800
+ },
1801
+ {
1802
+ "epoch": 1.748237137633847,
1803
+ "grad_norm": 0.24247221648693085,
1804
+ "learning_rate": 8.598802395209581e-06,
1805
+ "loss": 0.1747,
1806
+ "step": 2510
1807
+ },
1808
+ {
1809
+ "epoch": 1.7552015321667973,
1810
+ "grad_norm": 0.3863085210323334,
1811
+ "learning_rate": 8.550898203592815e-06,
1812
+ "loss": 0.1744,
1813
+ "step": 2520
1814
+ },
1815
+ {
1816
+ "epoch": 1.7621659266997476,
1817
+ "grad_norm": 0.38425007462501526,
1818
+ "learning_rate": 8.50299401197605e-06,
1819
+ "loss": 0.1675,
1820
+ "step": 2530
1821
+ },
1822
+ {
1823
+ "epoch": 1.7691303212326979,
1824
+ "grad_norm": 0.4716668725013733,
1825
+ "learning_rate": 8.455089820359283e-06,
1826
+ "loss": 0.1812,
1827
+ "step": 2540
1828
+ },
1829
+ {
1830
+ "epoch": 1.7760947157656481,
1831
+ "grad_norm": 0.4646649956703186,
1832
+ "learning_rate": 8.407185628742516e-06,
1833
+ "loss": 0.1628,
1834
+ "step": 2550
1835
+ },
1836
+ {
1837
+ "epoch": 1.7830591102985984,
1838
+ "grad_norm": 0.37651118636131287,
1839
+ "learning_rate": 8.35928143712575e-06,
1840
+ "loss": 0.1615,
1841
+ "step": 2560
1842
+ },
1843
+ {
1844
+ "epoch": 1.7900235048315487,
1845
+ "grad_norm": 0.4965188801288605,
1846
+ "learning_rate": 8.311377245508983e-06,
1847
+ "loss": 0.1807,
1848
+ "step": 2570
1849
+ },
1850
+ {
1851
+ "epoch": 1.796987899364499,
1852
+ "grad_norm": 0.4462849497795105,
1853
+ "learning_rate": 8.263473053892216e-06,
1854
+ "loss": 0.1783,
1855
+ "step": 2580
1856
+ },
1857
+ {
1858
+ "epoch": 1.8039522938974493,
1859
+ "grad_norm": 0.3630557358264923,
1860
+ "learning_rate": 8.21556886227545e-06,
1861
+ "loss": 0.1763,
1862
+ "step": 2590
1863
+ },
1864
+ {
1865
+ "epoch": 1.8109166884303995,
1866
+ "grad_norm": 0.28600937128067017,
1867
+ "learning_rate": 8.167664670658683e-06,
1868
+ "loss": 0.168,
1869
+ "step": 2600
1870
+ },
1871
+ {
1872
+ "epoch": 1.8178810829633498,
1873
+ "grad_norm": 0.32010987401008606,
1874
+ "learning_rate": 8.119760479041916e-06,
1875
+ "loss": 0.1733,
1876
+ "step": 2610
1877
+ },
1878
+ {
1879
+ "epoch": 1.8248454774963,
1880
+ "grad_norm": 0.4362265467643738,
1881
+ "learning_rate": 8.07185628742515e-06,
1882
+ "loss": 0.1693,
1883
+ "step": 2620
1884
+ },
1885
+ {
1886
+ "epoch": 1.8318098720292504,
1887
+ "grad_norm": 0.3645702600479126,
1888
+ "learning_rate": 8.023952095808385e-06,
1889
+ "loss": 0.1715,
1890
+ "step": 2630
1891
+ },
1892
+ {
1893
+ "epoch": 1.8387742665622007,
1894
+ "grad_norm": 0.40849506855010986,
1895
+ "learning_rate": 7.976047904191618e-06,
1896
+ "loss": 0.1637,
1897
+ "step": 2640
1898
+ },
1899
+ {
1900
+ "epoch": 1.845738661095151,
1901
+ "grad_norm": 0.4219589829444885,
1902
+ "learning_rate": 7.928143712574851e-06,
1903
+ "loss": 0.1679,
1904
+ "step": 2650
1905
+ },
1906
+ {
1907
+ "epoch": 1.8527030556281012,
1908
+ "grad_norm": 0.3615240156650543,
1909
+ "learning_rate": 7.880239520958085e-06,
1910
+ "loss": 0.1757,
1911
+ "step": 2660
1912
+ },
1913
+ {
1914
+ "epoch": 1.8596674501610515,
1915
+ "grad_norm": 0.4644199311733246,
1916
+ "learning_rate": 7.832335329341318e-06,
1917
+ "loss": 0.1819,
1918
+ "step": 2670
1919
+ },
1920
+ {
1921
+ "epoch": 1.8666318446940018,
1922
+ "grad_norm": 0.3925911784172058,
1923
+ "learning_rate": 7.784431137724551e-06,
1924
+ "loss": 0.1722,
1925
+ "step": 2680
1926
+ },
1927
+ {
1928
+ "epoch": 1.873596239226952,
1929
+ "grad_norm": 0.4104118347167969,
1930
+ "learning_rate": 7.736526946107785e-06,
1931
+ "loss": 0.1738,
1932
+ "step": 2690
1933
+ },
1934
+ {
1935
+ "epoch": 1.8805606337599023,
1936
+ "grad_norm": 0.43617820739746094,
1937
+ "learning_rate": 7.688622754491018e-06,
1938
+ "loss": 0.1787,
1939
+ "step": 2700
1940
+ },
1941
+ {
1942
+ "epoch": 1.8875250282928526,
1943
+ "grad_norm": 0.3555070757865906,
1944
+ "learning_rate": 7.640718562874251e-06,
1945
+ "loss": 0.1682,
1946
+ "step": 2710
1947
+ },
1948
+ {
1949
+ "epoch": 1.8944894228258031,
1950
+ "grad_norm": 0.3605625629425049,
1951
+ "learning_rate": 7.592814371257485e-06,
1952
+ "loss": 0.172,
1953
+ "step": 2720
1954
+ },
1955
+ {
1956
+ "epoch": 1.9014538173587534,
1957
+ "grad_norm": 0.40475067496299744,
1958
+ "learning_rate": 7.544910179640719e-06,
1959
+ "loss": 0.135,
1960
+ "step": 2730
1961
+ },
1962
+ {
1963
+ "epoch": 1.9084182118917037,
1964
+ "grad_norm": 0.33572956919670105,
1965
+ "learning_rate": 7.497005988023953e-06,
1966
+ "loss": 0.154,
1967
+ "step": 2740
1968
+ },
1969
+ {
1970
+ "epoch": 1.915382606424654,
1971
+ "grad_norm": 0.4499852657318115,
1972
+ "learning_rate": 7.449101796407186e-06,
1973
+ "loss": 0.1675,
1974
+ "step": 2750
1975
+ },
1976
+ {
1977
+ "epoch": 1.9223470009576042,
1978
+ "grad_norm": 0.4837128221988678,
1979
+ "learning_rate": 7.4011976047904196e-06,
1980
+ "loss": 0.1795,
1981
+ "step": 2760
1982
+ },
1983
+ {
1984
+ "epoch": 1.9293113954905545,
1985
+ "grad_norm": 0.36540481448173523,
1986
+ "learning_rate": 7.353293413173654e-06,
1987
+ "loss": 0.1758,
1988
+ "step": 2770
1989
+ },
1990
+ {
1991
+ "epoch": 1.9362757900235048,
1992
+ "grad_norm": 0.6061664819717407,
1993
+ "learning_rate": 7.305389221556887e-06,
1994
+ "loss": 0.1699,
1995
+ "step": 2780
1996
+ },
1997
+ {
1998
+ "epoch": 1.943240184556455,
1999
+ "grad_norm": 0.4293859899044037,
2000
+ "learning_rate": 7.25748502994012e-06,
2001
+ "loss": 0.1487,
2002
+ "step": 2790
2003
+ },
2004
+ {
2005
+ "epoch": 1.9502045790894054,
2006
+ "grad_norm": 0.3456893563270569,
2007
+ "learning_rate": 7.209580838323354e-06,
2008
+ "loss": 0.167,
2009
+ "step": 2800
2010
+ },
2011
+ {
2012
+ "epoch": 1.9571689736223559,
2013
+ "grad_norm": 0.36705002188682556,
2014
+ "learning_rate": 7.161676646706588e-06,
2015
+ "loss": 0.1544,
2016
+ "step": 2810
2017
+ },
2018
+ {
2019
+ "epoch": 1.9641333681553061,
2020
+ "grad_norm": 0.4752703011035919,
2021
+ "learning_rate": 7.113772455089821e-06,
2022
+ "loss": 0.163,
2023
+ "step": 2820
2024
+ },
2025
+ {
2026
+ "epoch": 1.9710977626882564,
2027
+ "grad_norm": 0.2813783288002014,
2028
+ "learning_rate": 7.0658682634730545e-06,
2029
+ "loss": 0.1536,
2030
+ "step": 2830
2031
+ },
2032
+ {
2033
+ "epoch": 1.9780621572212067,
2034
+ "grad_norm": 0.39708635210990906,
2035
+ "learning_rate": 7.017964071856288e-06,
2036
+ "loss": 0.1596,
2037
+ "step": 2840
2038
+ },
2039
+ {
2040
+ "epoch": 1.985026551754157,
2041
+ "grad_norm": 0.37225341796875,
2042
+ "learning_rate": 6.970059880239521e-06,
2043
+ "loss": 0.1551,
2044
+ "step": 2850
2045
+ },
2046
+ {
2047
+ "epoch": 1.9919909462871073,
2048
+ "grad_norm": 0.4137498438358307,
2049
+ "learning_rate": 6.9221556886227545e-06,
2050
+ "loss": 0.1542,
2051
+ "step": 2860
2052
+ },
2053
+ {
2054
+ "epoch": 1.9975624619134673,
2055
+ "eval_loss": 0.15988580882549286,
2056
+ "eval_runtime": 355.9958,
2057
+ "eval_samples_per_second": 3.396,
2058
+ "eval_steps_per_second": 1.699,
2059
+ "step": 2868
2060
+ }
2061
+ ],
2062
+ "logging_steps": 10,
2063
+ "max_steps": 4305,
2064
+ "num_input_tokens_seen": 0,
2065
+ "num_train_epochs": 3,
2066
+ "save_steps": 478,
2067
+ "stateful_callbacks": {
2068
+ "TrainerControl": {
2069
+ "args": {
2070
+ "should_epoch_stop": false,
2071
+ "should_evaluate": false,
2072
+ "should_log": false,
2073
+ "should_save": true,
2074
+ "should_training_stop": false
2075
+ },
2076
+ "attributes": {}
2077
+ }
2078
+ },
2079
+ "total_flos": 1.338691815552811e+18,
2080
+ "train_batch_size": 2,
2081
+ "trial_name": null,
2082
+ "trial_params": null
2083
+ }
checkpoint-2868/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:956bd3fe04334201160cff0d2e3faf62e32c6c8f709339d63e01a7d4b73b24d0
3
+ size 5560
checkpoint-3346/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Salesforce/Llama-xLAM-2-8b-fc-r
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Salesforce/Llama-xLAM-2-8b-fc-r
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
checkpoint-3346/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Salesforce/Llama-xLAM-2-8b-fc-r",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 64,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "k_proj",
32
+ "up_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }