priyankrathore commited on
Commit
badbb1e
·
verified ·
1 Parent(s): 2fa7665

Training in progress, step 500

Browse files
config.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "_name_or_path": "google/flan-t5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
  "d_ff": 2048,
8
  "d_kv": 64,
9
- "d_model": 768,
10
  "decoder_start_token_id": 0,
11
- "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
@@ -54,9 +54,8 @@
54
  "prefix": "translate English to Romanian: "
55
  }
56
  },
57
- "tie_word_embeddings": false,
58
  "torch_dtype": "float32",
59
- "transformers_version": "4.46.3",
60
  "use_cache": true,
61
  "vocab_size": 32128
62
  }
 
1
  {
2
+ "_name_or_path": "t5-small",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
  "d_ff": 2048,
8
  "d_kv": 64,
9
+ "d_model": 512,
10
  "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
+ "num_decoder_layers": 6,
22
+ "num_heads": 8,
23
+ "num_layers": 6,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
 
54
  "prefix": "translate English to Romanian: "
55
  }
56
  },
 
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.45.1",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7009e4ea5f57e09c9ab1675519b8d258422383d881371160193b4c31b57cb49e
3
- size 990345064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a1806ec539e9a6eb463bb754b06d4f633704b985bf6df4e302916177837a94
3
+ size 242041896
runs/Dec08_10-06-25_3875563cc298/events.out.tfevents.1733652523.3875563cc298.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd997614c47b329cb1f26793f393d924e0125f1f98e712f6ed3f1f44a009e162
3
+ size 5905
runs/Dec08_10-09-35_3875563cc298/events.out.tfevents.1733652590.3875563cc298.30.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70fa7990c2e697dd08bada18814e4117e1fd045ab0c8987ae9c3ee977e5ad814
3
+ size 6641
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
@@ -928,13 +927,11 @@
928
  "<extra_id_98>",
929
  "<extra_id_99>"
930
  ],
931
- "clean_up_tokenization_spaces": false,
932
  "eos_token": "</s>",
933
  "extra_ids": 100,
934
- "legacy": true,
935
  "model_max_length": 512,
936
  "pad_token": "<pad>",
937
- "sp_model_kwargs": {},
938
  "tokenizer_class": "T5Tokenizer",
939
  "unk_token": "<unk>"
940
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
 
927
  "<extra_id_98>",
928
  "<extra_id_99>"
929
  ],
930
+ "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 512,
934
  "pad_token": "<pad>",
 
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
937
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d6e457c0a55e0213a861f8f02789df63a11ef50d618c97141fc9a05d83c636
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f786aca059a4da0efee4b01f25db05e7faca7c815713ddf07050b319c48856
3
+ size 5368