File size: 1,718 Bytes
7560d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{
  "config": {
    "epochs": 10,
    "lr": 0.0003,
    "warmup_steps": 300,
    "grad_accum_steps": 8,
    "grad_clip": 1.0
  },
  "train": [
    {
      "epoch": 1,
      "avg_loss": 7.070409,
      "lr": 0.00027,
      "step": 270,
      "time_s": 971.0
    },
    {
      "epoch": 2,
      "avg_loss": 6.138723,
      "lr": 0.0002986039232296601,
      "step": 540,
      "time_s": 969.5
    },
    {
      "epoch": 3,
      "avg_loss": 5.511719,
      "lr": 0.00029373201732051774,
      "step": 810,
      "time_s": 970.5
    },
    {
      "epoch": 4,
      "avg_loss": 5.033876,
      "lr": 0.00028548319801432657,
      "step": 1080,
      "time_s": 966.4
    },
    {
      "epoch": 5,
      "avg_loss": 4.711938,
      "lr": 0.00027406193668993577,
      "step": 1350,
      "time_s": 977.1
    },
    {
      "epoch": 6,
      "avg_loss": 4.464801,
      "lr": 0.00027245653264858043,
      "step": 1620,
      "time_s": 973.7
    },
    {
      "epoch": 7,
      "avg_loss": 4.292098,
      "lr": 0.0002676801152109725,
      "step": 1890,
      "time_s": 977.7
    },
    {
      "epoch": 8,
      "avg_loss": 4.150779,
      "lr": 0.00025985108200750904,
      "step": 2160,
      "time_s": 975.7
    }
  ],
  "val": [
    {
      "epoch": 1,
      "avg_loss": 6.459693
    },
    {
      "epoch": 2,
      "avg_loss": 5.804065
    },
    {
      "epoch": 3,
      "avg_loss": 5.179338
    },
    {
      "epoch": 4,
      "avg_loss": 4.774045
    },
    {
      "epoch": 5,
      "avg_loss": 4.511194
    },
    {
      "epoch": 6,
      "avg_loss": 4.305547
    },
    {
      "epoch": 7,
      "avg_loss": 4.154681
    },
    {
      "epoch": 8,
      "avg_loss": 4.037178
    }
  ],
  "sanity": []
}