aps19 commited on
Commit
d239c1e
1 Parent(s): f32ed8b

added application file

Browse files
Files changed (2) hide show
  1. app.py +60 -0
  2. requirements.txt +18 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
5
+ from transformers import pipeline
6
+ import base64
7
+ from huggingface_hub import login
8
+ import torch
9
+ import fitz # PyMuPDF
10
+
11
+
12
+ # model and tokenizer loading
13
+ checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
14
+
15
+
16
+ # Model and tokenizer loading
17
+ # checkpoint = "model/google-flan-t5-base"
18
+ tokenizer = T5Tokenizer.from_pretrained(checkpoint)
19
+ base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
20
+
21
+ # LLM pipeline
22
+ def llm_pipeline(pdf_contents):
23
+ # Extract text from the PDF contents
24
+ pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
25
+ pdf_text = ""
26
+ for page_num in range(pdf_document.page_count):
27
+ page = pdf_document.load_page(page_num)
28
+ pdf_text += page.get_text()
29
+
30
+ # Use the pipeline to generate the summary
31
+ pipe_sum = pipeline(
32
+ 'summarization',
33
+ model=base_model,
34
+ tokenizer=tokenizer,
35
+ max_length=500,
36
+ min_length=50
37
+ )
38
+
39
+ result = pipe_sum(pdf_text)
40
+ summary = result[0]['summary_text']
41
+ return summary
42
+
43
+ # Streamlit code
44
+ st.set_page_config(layout="wide")
45
+
46
+ def main():
47
+ st.title("Document Summarization App using Language Model")
48
+
49
+ uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
50
+
51
+ if uploaded_file is not None:
52
+ if st.button("Summarize"):
53
+ summary = llm_pipeline(uploaded_file.read())
54
+
55
+ # Display the summary
56
+ st.info("Summarization Complete")
57
+ st.success(summary)
58
+
59
+ if __name__ == "__main__":
60
+ main()
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ langchain
4
+ sentence_transformers
5
+ torch
6
+ sentencepiece
7
+ transformers
8
+ accelerate
9
+ chromadb
10
+ pypdf
11
+ tiktoken
12
+ streamlit
13
+ fastapi
14
+ uvicorn
15
+ python-multipart
16
+ aiofiles
17
+ pdfminer.six
18
+