run

#5
by rakmik - opened

from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForCausalLM

model_id = "microsoft/Phi-3.5-mini-instruct-onnx"

Change file_name to load the .onnx file instead of .onnx.data
model = ORTModelForCausalLM.from_pretrained(
model_id,
subfolder="cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4",
file_name="phi-3.5-mini-instruct-cpu-int4-awq-block-128-acc-level-4.onnx"
)

Use the original model id (without -onnx):
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
result = pipe("Who is Napoleon Bonaparte?")
print(result)

[{'generated_text': 'Who is Napoleon Bonaparte?\n\nNapoleon Bonaparte was a French military and political leader who rose to prom'}]

kvaishnavi changed discussion status to closed

Sign up or log in to comment