import os import gradio as gr from openai import OpenAI def predict( message, history, system_prompt, model, api_url, api_key, max_tk, temp, top_p, ): if not api_key: return "Please set valid api keys in settings first." # Format history with a given chat template msgs = [{"role": "system", "content": system_prompt}] for user, assistant in history: msgs.append({"role": "user", "content": user}) msgs.append({"role": "system", "content": assistant}) msgs.append({"role": "user", "content": message}) try: client = OpenAI(api_key=api_key, base_url=api_url) response = client.chat.completions.create( model=model, messages=msgs, max_tokens=max_tk, temperature=temp, top_p=top_p, stream=False, ).to_dict()["choices"][0]["message"]["content"] except Exception as e: response = f"{e}" return response def deepseek( message, history, model, api_key, system_prompt, max_tk, temp, top_p, ): response = predict( message, history, system_prompt, model, "https://api.deepseek.com", api_key, max_tk, temp, top_p, ) outputs = [] for new_token in response: outputs.append(new_token) yield "".join(outputs) def kimi( message, history, model, api_key, system_prompt, max_tk, temp, top_p, ): response = predict( message, history, system_prompt, model, "https://api.moonshot.cn/v1", api_key, max_tk, temp, top_p, ) outputs = [] for new_token in response: outputs.append(new_token) yield "".join(outputs) if __name__ == "__main__": with gr.Blocks() as demo: # Create Gradio interface gr.Markdown("# LLM API Aggregation Deployment") with gr.Tab("DeepSeek"): with gr.Accordion(label="⚙️ Settings", open=False) as ds_acc: ds_model = gr.Dropdown( choices=["deepseek-chat", "deepseek-reasoner"], value="deepseek-chat", label="Select a model", ) ds_key = gr.Textbox( os.getenv("ds_api_key"), type="password", label="API key", ) ds_sys = gr.Textbox( "You are a useful assistant. first recognize user request and then reply carfuly and thinking", label="System prompt", ) ds_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens") ds_temp = gr.Slider(0, 1, 0.3, label="Temperature") ds_topp = gr.Slider(0, 1, 0.95, label="Top P sampling") gr.ChatInterface( deepseek, additional_inputs=[ ds_model, ds_key, ds_sys, ds_maxtk, ds_temp, ds_topp, ], ) with gr.Tab("Kimi"): with gr.Accordion(label="⚙️ Settings", open=False) as kimi_acc: kimi_model = gr.Dropdown( choices=["moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"], value="moonshot-v1-32k", label="Select a model", ) kimi_key = gr.Textbox( os.getenv("kimi_api_key"), type="password", label="API key", ) kimi_sys = gr.Textbox( "You are a useful assistant. first recognize user request and then reply carfuly and thinking", label="System prompt", ) kimi_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens") kimi_temp = gr.Slider(0, 1, 0.3, label="Temperature") kimi_topp = gr.Slider(0, 1, 0.95, label="Top P sampling") gr.ChatInterface( kimi, additional_inputs=[ kimi_model, kimi_key, kimi_sys, kimi_maxtk, kimi_temp, kimi_topp, ], ) demo.queue().launch()