JARVIS2 / app.py
Alexander Leschik
Create app.py
8db01af verified
import os
import re
import gradio as gr
import edge_tts
import asyncio
import time
import tempfile
from huggingface_hub import InferenceClient
DESCRIPTION = """ # <center><b>ZARVIS⚡</b></center>
### <center>A personal voice assistant for YOU
### <center>I'm your ZEN Voice Assistant.</center>
"""
MORE = """ ## TRY Other Models
### https://zenai.biz
"""
Fast = """## Fastest Model"""
Complex = """## Best in Complex Question"""
Detail = """## Best for Detailed Generation or Long Answers"""
# ----------------- MODEL 1 ----------------- #
# Mistral-based model
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
system_instructions1 = (
"[SYSTEM] Answer as Real ZARVIS, made by 'ZEN'. "
"Keep conversation very short, clear, friendly, and concise. "
"The text provided is a request for a specific type of response from you, the virtual assistant. "
"The request asks you to provide friendly responses as if you are the character ZARVIS, made by Tony Stark. "
"Avoid introductions and start answering the query directly, elaborating on all aspects. "
"As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user, "
"not an AI-powered assistant. [USER]"
)
async def generate1(prompt):
generate_kwargs = dict(
temperature=0.6,
max_new_tokens=756,
top_p=0.95,
repetition_penalty=1,
do_sample=True,
seed=42,
)
formatted_prompt = system_instructions1 + prompt + "[ZARVIS]"
stream = client1.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True
)
output = ""
for response in stream:
output += response.token.text
communicate = edge_tts.Communicate(output)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
yield tmp_path
# ----------------- MODEL 2 ----------------- #
# Llama-based model for more complex tasks
client2 = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
system_instructions2 = (
"[SYSTEM] Answer as Real ZARVIS, made by 'ZEN'. "
"You must answer in a friendly style and easy manner. "
"You can answer complex questions. "
"Do not say who you are or greet; simply start answering. "
"Stop as soon as you have given the complete answer. [USER]"
)
async def generate2(prompt):
generate_kwargs = dict(
temperature=0.6,
max_new_tokens=512,
top_p=0.95,
repetition_penalty=1,
do_sample=True,
)
formatted_prompt = system_instructions2 + prompt + "[ASSISTANT]"
stream = client2.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True
)
output = ""
for response in stream:
output += response.token.text
communicate = edge_tts.Communicate(output)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
yield tmp_path
# ----------------- MODEL 3 ----------------- #
# Another Llama-based model for longer, more detailed answers
client3 = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
system_instructions3 = (
"[SYSTEM] The text provided is a request for a specific type of response from me, the virtual assistant. "
"I should provide detailed and friendly responses as if I am the character ZARVIS, inspired by Tony Stark. "
"Avoid introductions and start answering the query directly, elaborating on all aspects of the request. "
"As an AI-powered assistant, my task is to generate responses that appear as if they are created by the user, "
"not an AI-powered assistant. [USER]"
)
async def generate3(prompt):
generate_kwargs = dict(
temperature=0.6,
max_new_tokens=2048,
top_p=0.95,
repetition_penalty=1,
do_sample=True,
)
formatted_prompt = system_instructions3 + prompt + "[ASSISTANT]"
stream = client3.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True
)
output = ""
for response in stream:
output += response.token.text
communicate = edge_tts.Communicate(output)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
yield tmp_path
# ----------------- Gradio Interface ----------------- #
with gr.Blocks(css="style.css") as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
input_text = gr.Textbox(label="(Optional) Additional Input", elem_id="important")
output_audio = gr.Audio(
label="ZARVIS",
type="filepath",
interactive=False,
autoplay=True,
elem_classes="audio"
)
with gr.Row():
translate_btn = gr.Button("Response")
translate_btn.click(
fn=generate1,
inputs=user_input,
outputs=output_audio,
api_name="translate"
)
gr.Markdown(MORE)
if __name__ == "__main__":
demo.queue(max_size=200).launch()