schuler commited on
Commit
dee4646
·
verified ·
1 Parent(s): 929124e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -3
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: mit
4
  datasets:
5
  - MBZUAI/LaMini-instruction
 
 
6
  ---
7
  # Saving 77% of the Parameters in Large Language Models Technical Report
8
  This repository contains experiment results for the [Saving 77% of the Parameters in Large Language Models Technical Report (PDF)](https://www.researchgate.net/publication/388835829_SAVING_77_OF_THE_PARAMETERS_IN_LARGE_LANGUAGE_MODELS_TECHNICAL_REPORT).
@@ -36,6 +38,10 @@ The following table shows LaMini training results with the baseline and the opti
36
 
37
  ## Usage:
38
  ```
 
 
 
 
39
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
40
  from transformers import LlamaTokenizer
41
  import torch
@@ -45,8 +51,8 @@ REPO_NAME = 'schuler/experimental-JP47D56C'
45
  def load_model(local_repo_name):
46
  tokenizer = LlamaTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
47
  generator_conf = GenerationConfig.from_pretrained(local_repo_name)
48
- model = AutoModelForCausalLM.from_pretrained(local_repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="eager")
49
- # model.to('cuda')
50
  return tokenizer, generator_conf, model
51
 
52
  tokenizer, generator_conf, model = load_model(REPO_NAME)
@@ -57,7 +63,7 @@ except Exception as e:
57
  global_error = f"Failed to load model: {str(e)}"
58
 
59
  def PrintTest(str):
60
- print(generator(str, max_new_tokens=256, do_sample=True, top_p=0.25, repetition_penalty=1.2))
61
 
62
  PrintTest(f"<|user|>\nHello\n<|end|>\n<|assistant|>\n")
63
  PrintTest(f"<|user|>Hello\n<|end|><|assistant|>")
 
3
  license: mit
4
  datasets:
5
  - MBZUAI/LaMini-instruction
6
+ language:
7
+ - en
8
  ---
9
  # Saving 77% of the Parameters in Large Language Models Technical Report
10
  This repository contains experiment results for the [Saving 77% of the Parameters in Large Language Models Technical Report (PDF)](https://www.researchgate.net/publication/388835829_SAVING_77_OF_THE_PARAMETERS_IN_LARGE_LANGUAGE_MODELS_TECHNICAL_REPORT).
 
38
 
39
  ## Usage:
40
  ```
41
+ !pip install -q -U transformers
42
+ !pip install -q -U accelerate
43
+ !pip install -q -U flash-attn --no-build-isolation
44
+
45
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
46
  from transformers import LlamaTokenizer
47
  import torch
 
51
  def load_model(local_repo_name):
52
  tokenizer = LlamaTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
53
  generator_conf = GenerationConfig.from_pretrained(local_repo_name)
54
+ model = AutoModelForCausalLM.from_pretrained(local_repo_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16)
55
+ model.to('cuda')
56
  return tokenizer, generator_conf, model
57
 
58
  tokenizer, generator_conf, model = load_model(REPO_NAME)
 
63
  global_error = f"Failed to load model: {str(e)}"
64
 
65
  def PrintTest(str):
66
+ print(generator(str, max_new_tokens=256, do_sample=True, top_p=0.5, repetition_penalty=1.2))
67
 
68
  PrintTest(f"<|user|>\nHello\n<|end|>\n<|assistant|>\n")
69
  PrintTest(f"<|user|>Hello\n<|end|><|assistant|>")