#https://huggingface.co/spaces/Xuratron/abstract-speech-summarizer # Here are the imports import PyPDF2 import re import torch from transformers import pipeline import soundfile as sf from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub from fairseq.models.text_to_speech.hub_interface import TTSHubInterface import gradio as gr # Here is the code def extract_and_clean_abstract(uploaded_file): """ Extracts and cleans the abstract from the uploaded PDF file. """ reader = PyPDF2.PdfReader(uploaded_file.file) text = "" for page in reader.pages: text += page.extract_text() or "" # Regular expression pattern to find the abstract pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)" match = re.search(pattern, text, re.DOTALL) if match: abstract = match.group(2).strip() else: abstract = "Abstract not found." # Clean the abstract text cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '') return cleaned_abstract def summarize_text(hf_model_name, text): """ Summarizes the given text using a Hugging Face model. """ summarizer = pipeline("summarization", model=hf_model_name) summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] return summary def text_to_speech(text): """ Converts text to speech using a Hugging Face model. """ models, cfg, task = load_model_ensemble_and_task_from_hf_hub( "facebook/fastspeech2-en-ljspeech", arg_overrides={"vocoder": "hifigan", "fp16": False} ) model = models[0] TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) generator = task.build_generator([model], cfg) sample = TTSHubInterface.get_model_input(task, text) wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) return wav, rate def process_pdf(uploaded_file, hf_model_name): """ Processes the uploaded PDF file to extract, summarize the abstract, and convert it to speech. """ abstract = extract_and_clean_abstract(uploaded_file) summary = summarize_text(hf_model_name, abstract) wav, rate = text_to_speech(summary) sf.write('/tmp/speech_output.wav', wav, rate) return '/tmp/speech_output.wav' iface = gr.Interface( fn=process_pdf, inputs=[ gr.inputs.File(label="Upload PDF", type="pdf"), gr.inputs.Textbox(label="Hugging Face Model Name for Summarization") ], outputs=gr.outputs.Audio(label="Audio Summary"), title="PDF Abstract to Speech", description="Extracts and summarizes the abstract from a PDF file and converts it to speech." ) if __name__ == "__main__": iface.launch()