mikeliou commited on
Commit
9bedf79
verified
1 Parent(s): 9d20ac6

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: distilbert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # sample_qa_model
15
 
16
- This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.4025
19
 
20
  ## Model description
21
 
@@ -35,8 +35,8 @@ More information needed
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 2e-05
38
- - train_batch_size: 16
39
- - eval_batch_size: 16
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
@@ -46,9 +46,9 @@ The following hyperparameters were used during training:
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | 2.8874 | 1.0 | 500 | 1.7917 |
50
- | 1.5573 | 2.0 | 1000 | 1.4240 |
51
- | 1.1898 | 3.0 | 1500 | 1.4025 |
52
 
53
 
54
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # sample_qa_model
15
 
16
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.5846
19
 
20
  ## Model description
21
 
 
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 2e-05
38
+ - train_batch_size: 64
39
+ - eval_batch_size: 64
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | No log | 1.0 | 125 | 2.1583 |
50
+ | No log | 2.0 | 250 | 1.6478 |
51
+ | No log | 3.0 | 375 | 1.5846 |
52
 
53
 
54
  ### Framework versions
config.json CHANGED
@@ -1,24 +1,26 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
- "activation": "gelu",
4
  "architectures": [
5
- "DistilBertForQuestionAnswering"
6
  ],
7
- "attention_dropout": 0.1,
8
- "dim": 768,
9
- "dropout": 0.1,
10
- "hidden_dim": 3072,
 
 
11
  "initializer_range": 0.02,
 
 
12
  "max_position_embeddings": 512,
13
- "model_type": "distilbert",
14
- "n_heads": 12,
15
- "n_layers": 6,
16
  "pad_token_id": 0,
17
- "qa_dropout": 0.1,
18
- "seq_classif_dropout": 0.2,
19
- "sinusoidal_pos_embds": false,
20
- "tie_weights_": true,
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.41.2",
 
 
23
  "vocab_size": 30522
24
  }
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
 
3
  "architectures": [
4
+ "BertForQuestionAnswering"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
  "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
 
 
 
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.41.2",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
  "vocab_size": 30522
26
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02378736afb0d2f9e704504654aef5708d7eab8f8f69139b0fa1742a2d161cc3
3
- size 265470032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6b33b97fd97b587807f72446343918427f9225c7bc24b8999398dae24c349a6
3
+ size 435596088
runs/Jul12_09-13-53_612fd39bb34e/events.out.tfevents.1720775633.612fd39bb34e.536.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba511891c1dc967707495c0eee316ebe51406f539b374d7a676df413abbaf6d2
3
+ size 4184
runs/Jul12_09-14-07_612fd39bb34e/events.out.tfevents.1720775647.612fd39bb34e.536.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa37760b5db3e946f674c4e641a7805e58c676280f620ba372363481a33540a
3
+ size 4184
runs/Jul12_09-15-47_612fd39bb34e/events.out.tfevents.1720775748.612fd39bb34e.6543.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f912414462a722a7c02c951c88869df2c2ba74248005290f5f0cd5ee1a265cbf
3
+ size 5957
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f461eb11360de3dcad212018c04313edb4de58e478c102e2b8c1709f5ce2be
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd35cab08bc8922143b44935d627ae7ce908d82a79d9c2272d62fd7f82e790ca
3
  size 5112