Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from wenet.cli.model import load_model | |
from huggingface_hub import hf_hub_download | |
#import spaces | |
REPO_ID = "Revai/reverb-asr" | |
files = ['reverb_asr_v1.jit.zip', 'tk.units.txt'] | |
downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files] | |
model = load_model(downloaded_files[0], downloaded_files[1]) | |
def process_cat_embs(style): | |
device = torch.device("cpu") | |
cat_embs = torch.tensor([float(c) for c in style.split(',')]).to(device) | |
return cat_embs | |
#@spaces.GPU | |
def transcribe_audio(audio, style=0): | |
if not audio: | |
return "Input Error! Please enter one audio!" | |
cat_embs = process_cat_embs(f'{style},{1-style}') | |
result = model.transcribe(audio, cat_embs=cat_embs) | |
if not result or 'text' not in result: | |
return "ERROR! No text output! Please try again!" | |
text_output = result['text'].replace('β', ' ') | |
return text_output | |
audio_input = gr.Audio(type="filepath", label="Upload or Record Audio") | |
style_slider = gr.Slider(0, 1, value=0, step=0.1, label="Transcription Style", | |
info="Adjust the transcription style: 0 (casual) to 1 (formal).") | |
output_textbox = gr.Textbox(label="Transcription Output") | |
description = "This tool transcribes audio using a customizable transcription style ranging from casual to formal. Upload or record an audio file to begin." | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=[audio_input, style_slider], | |
outputs=output_textbox, | |
title="Audio Transcription", | |
description=description, | |
theme="default" | |
) | |
iface.launch() | |