kumar989 commited on
Commit
4cc1f8a
1 Parent(s): f575c3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -1,18 +1,28 @@
1
  import os
2
  import PyPDF2
3
  from PIL import Image
4
- import pytesseract
5
 
6
- # Directory for storing PDF resumes and job applications
7
- pdf_directory = '/content/pdf_files'
 
 
 
 
 
 
 
 
8
 
9
- # Directory for storing extracted text from PDFs
10
- text_directory = '/content/extracted_text'
11
 
12
- # OCR output directory for scanned PDFs
13
- ocr_directory = '/content/ocr_output'
 
 
 
 
 
14
 
15
- # Create directories if they don't exist
16
- os.makedirs(pdf_directory, exist_ok=True)
17
- os.makedirs(text_directory, exist_ok=True)
18
- os.makedirs(ocr_directory, exist_ok=True)
 
1
  import os
2
  import PyPDF2
3
  from PIL import Image
4
+ import streamlit as st
5
 
6
+ def read_pdf(pdf):
7
+ reader=PyPDF2.PdfReader(pdf)
8
+ text=''
9
+ for page in reader.pages:
10
+ text+=page.extract_text()
11
+ # text_file_name = 'text.txt'
12
+ # text_file_path = '/content/text.txt'
13
+ # with open(text_file_path, 'w') as text_file:
14
+ # text_file.write(text)
15
+ return text
16
 
 
 
17
 
18
+ def summarizer(text):
19
+ model = T5ForConditionalGeneration.from_pretrained("t5-base")
20
+ tokenizer = T5TokenizerFast.from_pretrained("t5-base")
21
+ inputs = tokenizer.encode("summarize: " + text,return_tensors="pt", max_length=1000,truncation=True)
22
+ outputs = model.generate(inputs,max_length=1000, min_length=100,length_penalty=2.0, num_beams=4,early_stopping=True)
23
+ summary = tokenizer.decode(outputs[0])
24
+ return summary
25
 
26
+
27
+ st.title(':blue[Abstractive Summarizer]')
28
+ st.header('by: _Team_ _Rare_ _species_')