aklein4 commited on
Commit
006f4cd
·
verified ·
1 Parent(s): a9f8b74

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. 000000000827/config.json +66 -0
  2. 000000000827/model.pt +3 -0
000000000827/config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "oloop.OLoopModel",
3
+ "pretrained_url": "aklein4/Llama-3.2-1B-TPU",
4
+ "pretrained_step": 0,
5
+ "pretrained_strict": false,
6
+ "torch_dtype": "float32",
7
+ "vocab_size": 128256,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": 128001,
10
+ "pad_token_id": -100,
11
+ "hidden_size": 2048,
12
+ "num_hidden_layers": 16,
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "intermediate_size": 8192,
16
+ "hidden_act": "silu",
17
+ "max_position_embeddings": 131072,
18
+ "rope_theta": 500000.0,
19
+ "rope_scaling": {
20
+ "factor": 32.0,
21
+ "high_freq_factor": 4.0,
22
+ "low_freq_factor": 1.0,
23
+ "original_context_len": 8192
24
+ },
25
+ "initializer_range": 0.02,
26
+ "attention_dropout": false,
27
+ "attention_bias": false,
28
+ "rms_norm_eps": 1e-05,
29
+ "attention_kernel": "flash_attention",
30
+ "pure_modules": [],
31
+ "fast_weight_size": 2048,
32
+ "base_lr": 0.0,
33
+ "momentum_beta": 0.9,
34
+ "momentum_dtype": "bfloat16",
35
+ "state_dtype": "float32",
36
+ "sharding": {
37
+ "model.layers.*": [
38
+ [
39
+ "data",
40
+ "fsdp"
41
+ ],
42
+ null,
43
+ null
44
+ ],
45
+ "lm_head": [
46
+ [
47
+ "data",
48
+ "fsdp"
49
+ ],
50
+ null,
51
+ null
52
+ ]
53
+ },
54
+ "remat": {
55
+ "activation_checkpoint_layers": [
56
+ "LlamaDecoderLayer"
57
+ ],
58
+ "optimization_barrier_layers": [
59
+ "LlamaDecoderLayer"
60
+ ],
61
+ "scan_layers": "model.layers",
62
+ "offload_tensors": [
63
+ "decoder_input"
64
+ ]
65
+ }
66
+ }
000000000827/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f82194570066f1c9d2844ae56d2fbb9b35b3f1566bb6991dda03aff5e1a106
3
+ size 7067733759