Spaces:

mrsk1883
/

AAIapp

Sleeping

App Files Files Community

mrsk1883 commited on Dec 8, 2023

Commit

97b49ea

•

1 Parent(s): 1636cb6

Create utils.py

Browse files

Files changed (1) hide show

utils.py +58 -0

utils.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from PyPDF2 import PdfReader
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from gtts import gTTS
+import os
+# Download the summarization model and tokenizer
+model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def summarize_and_speak_pdf_abstract(pdf_path):
+    """
+    Reads a PDF file, extracts the abstract, summarizes it in one sentence, and generates an audio file of the summary.
+    Args:
+        pdf_path: Path to the PDF file.
+    """
+    # Summarize the abstract
+    summary = summarize_pdf_abstract(pdf_path)
+    # Define language and audio format
+    language = "en"  # Change this to your desired language
+    audio_format = "mp3"
+    # Create the text-to-speech object
+    tts = gTTS(text=summary, lang=language)
+    # Generate the audio file
+    audio_file_name = f"summary.{audio_format}"
+    tts.save(audio_file_name)
+    print(f"Audio file created: {audio_file_name}")
+    # Play the audio file (optional)
+    # os.system(f"play {audio_file_name}")
+def summarize_pdf_abstract(pdf_path):
+    """
+    Reads a PDF file, extracts the abstract, and summarizes it in one sentence.
+    Args:
+        pdf_path: Path to the PDF file.
+    Returns:
+        A string containing the one-sentence summary of the abstract.
+    """
+    # Read the PDF file
+    reader = PdfReader(open(pdf_path, "rb"))
+    # Extract the abstract
+    abstract_text = ""
+    for page in reader.pages:
+        # Search for keywords like "Abstract" or "Introduction"
+        if (
+            "Abstract" in page.extract_text