Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Page config | |
st.set_page_config( | |
page_title="Zephyr Chat", | |
page_icon="π€", | |
layout="wide" | |
) | |
# Initialize session state | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Load model and tokenizer | |
def load_model(): | |
model_name = "HuggingFaceH4/zephyr-7b-beta" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
return model, tokenizer | |
# Main chat interface | |
st.title("Zephyr Chatbot π€") | |
try: | |
model, tokenizer = load_model() | |
# Display chat messages | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# Chat input | |
if prompt := st.chat_input("What's on your mind?"): | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
# Generate response | |
with st.chat_message("assistant"): | |
with st.spinner("Thinking..."): | |
# Prepare input | |
input_text = f"User: {prompt}\nAssistant:" | |
inputs = tokenizer(input_text, return_tensors="pt") | |
# Generate response | |
outputs = model.generate( | |
inputs.input_ids, | |
max_length=200, | |
num_return_sequences=1, | |
temperature=0.7, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# Decode and display response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
response = response.split("Assistant:")[-1].strip() | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
except Exception as e: | |
st.error(f"Error: {str(e)}") | |
st.info("Note: This app requires significant computational resources. Consider using a smaller model or upgrading your Space's resources.") | |