# from transformers import AutoModelForCausalLM, AutoTokenizer import time import datetime import streamlit as st question = "Name the planets in the solar system? A: " question = "Quais são os planetas do sistema solar?" question = "Qual é o maior planeta do sistema solar?" before = datetime.datetime.now() # Load model directly # from transformers import AutoTokenizer, AutoModelForCausalLM # tokenizer = AutoTokenizer.from_pretrained("01-ai/Yi-1.5-6B-Chat") # model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-1.5-6B-Chat") # Load model directly from transformers import AutoTokenizer, Phi3ForCausalLM model = Phi3ForCausalLM.from_pretrained("microsoft/phi-3-mini-4k-instruct") tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct") prompt = "Question: Qual é o maior planeta do sistema solar ?" inputs = tokenizer(prompt, return_tensors="pt") # Generate generate_ids = model.generate(inputs.input_ids, max_length=100) output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] with st.container(): st.write('\n\n') st.write('LLM-LANAChat') st.write('\n\n' + output) # tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base") # model = TFRobertaModel.from_pretrained("FacebookAI/roberta-base") # st.write('tokenizando...') # inputs = tokenizer(question, return_tensors="tf") # st.write('gerando a saida...') # outputs = model(inputs) # last_hidden_states = outputs.last_hidden_state # output = last_hidden_states # st.write(output) # st.write('tokenizando...') # prompt = "Qual é o maior planeta do sistema solar ?" # # inputs = tokenizer(prompt, return_tensors="pt") # # Generate # st.write('gerando a saida...') # # generate_ids = model.generate(inputs.input_ids, max_length=30) # # output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] # st.write('saída gerada') # st.write(output) # # Use a pipeline as a high-level helper # # from transformers import pipeline # # messages = [ # # {"role": "user", "content": question}, # # ] # print('gerando a saida...') # st.write('gerando a saida...') # pipe = pipeline("text-generation", model="01-ai/Yi-1.5-34B-Chat") # st.write('pipeline...') # output = pipe(messages) # st.write('saída gerada...') # st.write(output) # print('tokenizando...') # tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) # print('tokenizado.') # print('carregando o modelo...') # # Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM. # model = AutoModelForCausalLM.from_pretrained( # model_path, # device_map="auto", # torch_dtype='auto' # ).eval() # print('modelo carreegado.') # # Prompt content: "hi" # messages = [ # {"role": "user", "content": question} # ] # print('tokenizando o prompt...') # input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, return_tensors='pt') # print('prompt tokenizado.') # print('gerando a saida...') # output_ids = model.generate(input_ids, eos_token_id=tokenizer.eos_token_id, # max_new_tokens=10) #10 # 45 # # max_new_tokens=22) print('saida gerada.') # print('Decodificando a saida...') # response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) # print('saida decodificada.') # Model response: "Hello! How can I assist you today?" # print(response) # question = output['choices'][0]['text'].split('A:')[0] # answer = output['choices'][0]['text'].split('A:')[1] # answer = 'A: ' + answer print('\n\n') # print(question) # print(response) after = datetime.datetime.now() current_time = (after - before) # .strftime("%H:%M:%S") print("\nTime Elapsed: ", current_time)