Update README.md
Browse files
README.md
CHANGED
@@ -170,8 +170,8 @@ Install the latest transformers (>4.40)
|
|
170 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
171 |
device = "cuda" # the device to load the model onto
|
172 |
# use bfloat16 to ensure the best performance.
|
173 |
-
model = AutoModelForCausalLM.from_pretrained("
|
174 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
175 |
messages = [
|
176 |
{"role": "system", "content": "You are a helpful assistant."},
|
177 |
{"role": "user", "content": "Hello world"},
|
@@ -207,7 +207,10 @@ def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_p
|
|
207 |
sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
|
208 |
llm = LLM("SorawitChok/SeaLLM-7B-v2.5-AWQ", quantization="AWQ")
|
209 |
|
210 |
-
message =
|
|
|
|
|
|
|
211 |
prompt = seallm_chat_convo_format(message, True)
|
212 |
gen = llm.generate(prompt, sampling_params)
|
213 |
|
|
|
170 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
171 |
device = "cuda" # the device to load the model onto
|
172 |
# use bfloat16 to ensure the best performance.
|
173 |
+
model = AutoModelForCausalLM.from_pretrained("SorawitChok/SeaLLM-7B-v2.5-AWQ", torch_dtype=torch.bfloat16, device_map=device)
|
174 |
+
tokenizer = AutoTokenizer.from_pretrained("SorawitChok/SeaLLM-7B-v2.5-AWQ")
|
175 |
messages = [
|
176 |
{"role": "system", "content": "You are a helpful assistant."},
|
177 |
{"role": "user", "content": "Hello world"},
|
|
|
207 |
sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
|
208 |
llm = LLM("SorawitChok/SeaLLM-7B-v2.5-AWQ", quantization="AWQ")
|
209 |
|
210 |
+
message = [
|
211 |
+
{"role": "user", "content": "Explain general relativity in details."}
|
212 |
+
]
|
213 |
+
|
214 |
prompt = seallm_chat_convo_format(message, True)
|
215 |
gen = llm.generate(prompt, sampling_params)
|
216 |
|