harish199 commited on
Commit
52cdc58
1 Parent(s): f8dbc1d
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain.chains.summarize import load_summarize_chain
5
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
6
+ from transformers import pipeline
7
+ import torch
8
+ import base64
9
+
10
+ #model and tokenizer loading
11
+ checkpoint = "LaMini-Flan-T5-248M"
12
+ tokenizer = T5Tokenizer.from_pretrained(checkpoint)
13
+ base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
14
+
15
+ #file loader and preprocessing
16
+ def file_preprocessing(file):
17
+ loader = PyPDFLoader(file)
18
+ pages = loader.load_and_split()
19
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
20
+ texts = text_splitter.split_documents(pages)
21
+ final_texts = ""
22
+ for text in texts:
23
+ print(text)
24
+ final_texts = final_texts + text.page_content
25
+ return final_texts
26
+
27
+ #LLM pipeline
28
+ def llm_pipeline(filepath):
29
+ pipe_sum = pipeline(
30
+ 'summarization',
31
+ model = base_model,
32
+ tokenizer = tokenizer,
33
+ max_length = 500,
34
+ min_length = 50)
35
+ input_text = file_preprocessing(filepath)
36
+ result = pipe_sum(input_text)
37
+ result = result[0]['summary_text']
38
+ return result
39
+
40
+ @st.cache_data
41
+ #function to display the PDF of a given file
42
+ def displayPDF(file):
43
+ # Opening file from file path
44
+ with open(file, "rb") as f:
45
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
46
+
47
+ # Embedding PDF in HTML
48
+ pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
49
+
50
+ # Displaying File
51
+ st.markdown(pdf_display, unsafe_allow_html=True)
52
+
53
+ #streamlit code
54
+ st.set_page_config(layout="wide")
55
+
56
+ def main():
57
+ st.title("Document Summarization App using Langauge Model")
58
+
59
+ uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
60
+
61
+ if uploaded_file is not None:
62
+ if st.button("Summarize"):
63
+ col1, col2 = st.columns(2)
64
+ filepath = "uploaded_file.name"
65
+ with open(filepath, "wb") as temp_file:
66
+ temp_file.write(uploaded_file.read())
67
+ with col1:
68
+ st.info("Uploaded File")
69
+ pdf_view = displayPDF(filepath)
70
+
71
+ with col2:
72
+ summary = llm_pipeline(filepath)
73
+ st.info("Summarization Complete")
74
+ st.success(summary)
75
+
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()