Spaces:

ZENLLC
/

JARVIS2

Sleeping

JARVIS2 / app.py

Alexander Leschik

Create app.py

8db01af verified about 1 month ago

5.41 kB

	import os
	import re
	import gradio as gr
	import edge_tts
	import asyncio
	import time
	import tempfile
	from huggingface_hub import InferenceClient

	DESCRIPTION = """ # <center><b>ZARVIS⚡</b></center>
	### <center>A personal voice assistant for YOU
	### <center>I'm your ZEN Voice Assistant.</center>
	"""

	MORE = """ ## TRY Other Models
	### https://zenai.biz
	"""

	Fast = """## Fastest Model"""
	Complex = """## Best in Complex Question"""
	Detail = """## Best for Detailed Generation or Long Answers"""

	# ----------------- MODEL 1 ----------------- #
	# Mistral-based model
	client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

	system_instructions1 = (
	"[SYSTEM] Answer as Real ZARVIS, made by 'ZEN'. "
	"Keep conversation very short, clear, friendly, and concise. "
	"The text provided is a request for a specific type of response from you, the virtual assistant. "
	"The request asks you to provide friendly responses as if you are the character ZARVIS, made by Tony Stark. "
	"Avoid introductions and start answering the query directly, elaborating on all aspects. "
	"As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user, "
	"not an AI-powered assistant. [USER]"
	)

	async def generate1(prompt):
	generate_kwargs = dict(
	temperature=0.6,
	max_new_tokens=756,
	top_p=0.95,
	repetition_penalty=1,
	do_sample=True,
	seed=42,
	)
	formatted_prompt = system_instructions1 + prompt + "[ZARVIS]"
	stream = client1.text_generation(
	formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True
	)
	output = ""
	for response in stream:
	output += response.token.text

	communicate = edge_tts.Communicate(output)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)
	yield tmp_path

	# ----------------- MODEL 2 ----------------- #
	# Llama-based model for more complex tasks
	client2 = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")

	system_instructions2 = (
	"[SYSTEM] Answer as Real ZARVIS, made by 'ZEN'. "
	"You must answer in a friendly style and easy manner. "
	"You can answer complex questions. "
	"Do not say who you are or greet; simply start answering. "
	"Stop as soon as you have given the complete answer. [USER]"
	)

	async def generate2(prompt):
	generate_kwargs = dict(
	temperature=0.6,
	max_new_tokens=512,
	top_p=0.95,
	repetition_penalty=1,
	do_sample=True,
	)
	formatted_prompt = system_instructions2 + prompt + "[ASSISTANT]"
	stream = client2.text_generation(
	formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True
	)
	output = ""
	for response in stream:
	output += response.token.text

	communicate = edge_tts.Communicate(output)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)
	yield tmp_path

	# ----------------- MODEL 3 ----------------- #
	# Another Llama-based model for longer, more detailed answers
	client3 = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")

	system_instructions3 = (
	"[SYSTEM] The text provided is a request for a specific type of response from me, the virtual assistant. "
	"I should provide detailed and friendly responses as if I am the character ZARVIS, inspired by Tony Stark. "
	"Avoid introductions and start answering the query directly, elaborating on all aspects of the request. "
	"As an AI-powered assistant, my task is to generate responses that appear as if they are created by the user, "
	"not an AI-powered assistant. [USER]"
	)

	async def generate3(prompt):
	generate_kwargs = dict(
	temperature=0.6,
	max_new_tokens=2048,
	top_p=0.95,
	repetition_penalty=1,
	do_sample=True,
	)
	formatted_prompt = system_instructions3 + prompt + "[ASSISTANT]"
	stream = client3.text_generation(
	formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True
	)
	output = ""
	for response in stream:
	output += response.token.text

	communicate = edge_tts.Communicate(output)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)
	yield tmp_path

	# ----------------- Gradio Interface ----------------- #
	with gr.Blocks(css="style.css") as demo:
	gr.Markdown(DESCRIPTION)
	with gr.Row():
	user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
	input_text = gr.Textbox(label="(Optional) Additional Input", elem_id="important")
	output_audio = gr.Audio(
	label="ZARVIS",
	type="filepath",
	interactive=False,
	autoplay=True,
	elem_classes="audio"
	)
	with gr.Row():
	translate_btn = gr.Button("Response")
	translate_btn.click(
	fn=generate1,
	inputs=user_input,
	outputs=output_audio,
	api_name="translate"
	)

	gr.Markdown(MORE)

	if __name__ == "__main__":
	demo.queue(max_size=200).launch()