Spaces:

kumar989
/

Abstractive_Summarizer

Runtime error

kumar989 commited on Oct 25, 2023

Commit

4cc1f8a

1 Parent(s): f575c3b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,28 @@
 import os
 import PyPDF2
 from PIL import Image
-import pytesseract
-# Directory for storing PDF resumes and job applications
-pdf_directory = '/content/pdf_files'
-# Directory for storing extracted text from PDFs
-text_directory = '/content/extracted_text'
-# OCR output directory for scanned PDFs
-ocr_directory = '/content/ocr_output'
-# Create directories if they don't exist
-os.makedirs(pdf_directory, exist_ok=True)
-os.makedirs(text_directory, exist_ok=True)
-os.makedirs(ocr_directory, exist_ok=True)

 import os
 import PyPDF2
 from PIL import Image
+import streamlit as st
+def read_pdf(pdf):
+    reader=PyPDF2.PdfReader(pdf)
+    text=''
+    for page in reader.pages:
+        text+=page.extract_text()
+    # text_file_name = 'text.txt'
+    # text_file_path = '/content/text.txt'
+    # with open(text_file_path, 'w') as text_file:
+    #     text_file.write(text)
+    return text
+def summarizer(text):
+    model = T5ForConditionalGeneration.from_pretrained("t5-base")
+    tokenizer = T5TokenizerFast.from_pretrained("t5-base")
+    inputs = tokenizer.encode("summarize: " + text,return_tensors="pt", max_length=1000,truncation=True)
+    outputs = model.generate(inputs,max_length=1000, min_length=100,length_penalty=2.0, num_beams=4,early_stopping=True)
+    summary = tokenizer.decode(outputs[0])
+    return summary
+st.title(':blue[Abstractive Summarizer]')
+st.header('by: _Team_ _Rare_ _species_')