Xuratron's picture
Create app.py
6616d67
raw
history blame
2.81 kB
#https://huggingface.co/spaces/Xuratron/abstract-speech-summarizer
# Here are the imports
import PyPDF2
import re
import torch
from transformers import pipeline
import soundfile as sf
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
import gradio as gr
# Here is the code
def extract_and_clean_abstract(uploaded_file):
"""
Extracts and cleans the abstract from the uploaded PDF file.
"""
reader = PyPDF2.PdfReader(uploaded_file.file)
text = ""
for page in reader.pages:
text += page.extract_text() or ""
# Regular expression pattern to find the abstract
pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
match = re.search(pattern, text, re.DOTALL)
if match:
abstract = match.group(2).strip()
else:
abstract = "Abstract not found."
# Clean the abstract text
cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
return cleaned_abstract
def summarize_text(hf_model_name, text):
"""
Summarizes the given text using a Hugging Face model.
"""
summarizer = pipeline("summarization", model=hf_model_name)
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
return summary
def text_to_speech(text):
"""
Converts text to speech using a Hugging Face model.
"""
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
"facebook/fastspeech2-en-ljspeech",
arg_overrides={"vocoder": "hifigan", "fp16": False}
)
model = models[0]
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
generator = task.build_generator([model], cfg)
sample = TTSHubInterface.get_model_input(task, text)
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
return wav, rate
def process_pdf(uploaded_file, hf_model_name):
"""
Processes the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
"""
abstract = extract_and_clean_abstract(uploaded_file)
summary = summarize_text(hf_model_name, abstract)
wav, rate = text_to_speech(summary)
sf.write('/tmp/speech_output.wav', wav, rate)
return '/tmp/speech_output.wav'
iface = gr.Interface(
fn=process_pdf,
inputs=[
gr.inputs.File(label="Upload PDF", type="pdf"),
gr.inputs.Textbox(label="Hugging Face Model Name for Summarization")
],
outputs=gr.outputs.Audio(label="Audio Summary"),
title="PDF Abstract to Speech",
description="Extracts and summarizes the abstract from a PDF file and converts it to speech."
)
if __name__ == "__main__":
iface.launch()