iohanngrig commited on
Commit
f39f2e1
β€’
1 Parent(s): 01ccd2f

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitignore +14 -0
  2. app.py +51 -0
  3. pages/1_using_LLM.py +41 -0
  4. pages/2_using_LLM_QA.py +50 -0
  5. requirements.txt +0 -0
.gitignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ .mypy_cache
3
+
4
+ data/
5
+ credential/
6
+ artifacts/
7
+ model/
8
+ .streamlit/
9
+ .streamlit/secrets.toml
10
+ *.toml
11
+
12
+ # ignore cache
13
+ *.pyc
14
+
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import pipeline
4
+ from utils.process_data import generate_chunks, pdf_to_text
5
+
6
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7
+
8
+ st.set_page_config(page_title="Summarizer", page_icon="βš–οΈ")
9
+ st.title("Summarize Text")
10
+ st.subheader("πŸš— πŸ”— Transformers Summarization Pipeline")
11
+
12
+ max = st.slider('Select max', 50, 500, step=10, value=150)
13
+ min = st.slider('Select min', 10, 450, step=10, value=50)
14
+ do_sample = st.checkbox("Do sample", value=False)
15
+
16
+ sentence = st.text_area('Please paste your article:', height=50)
17
+ button = st.button("Summarize")
18
+
19
+ @st.cache_data
20
+ def load_summarizer():
21
+ model = pipeline("summarization", model=st.secrets["SUM_MODEL"], device=device)
22
+ return model
23
+
24
+ with st.spinner("Generating Summary.."):
25
+ if button and sentence:
26
+ chunks = generate_chunks(sentence)
27
+ summarizer = load_summarizer()
28
+ res = summarizer(chunks,
29
+ max_length=max,
30
+ min_length=min,
31
+ do_sample=do_sample)
32
+ text = ' '.join([summ['summary_text'] for summ in res])
33
+ st.write(text)
34
+
35
+ st.divider()
36
+
37
+ st.subheader('πŸš™πŸ”— Summarize PDF')
38
+ pdf_path = st.file_uploader('Upload your PDF Document', type='pdf')
39
+ button2 = st.button("Summarize PDF")
40
+
41
+ if pdf_path is not None and button2:
42
+ text = pdf_to_text(pdf_path)
43
+ with st.spinner("Generating PDF Summary.."):
44
+ chunks = generate_chunks(text)
45
+ summarizer = load_summarizer()
46
+ res = summarizer(chunks,
47
+ max_length=max,
48
+ min_length=min,
49
+ do_sample=do_sample)
50
+ text_sum = ' '.join([summ['summary_text'] for summ in res])
51
+ st.write(text_sum)
pages/1_using_LLM.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain import OpenAI
3
+ from langchain.docstore.document import Document
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.chains.summarize import load_summarize_chain
6
+ from utils.process_data import pdf_to_text
7
+
8
+ MODEL = st.secrets["MODEL4"]
9
+
10
+ def generate_response(txt):
11
+ llm = OpenAI(temperature=0.1, openai_api_key=st.secrets["OPENAI_API_KEY"])
12
+ text_splitter = CharacterTextSplitter()
13
+ texts = text_splitter.split_text(txt)
14
+ docs = [Document(page_content=t) for t in texts]
15
+ chain = load_summarize_chain(llm, chain_type='map_reduce')
16
+ return chain.run(docs)
17
+
18
+
19
+ st.set_page_config(page_title="Summarizer with LLM", page_icon="βš–οΈ")
20
+ st.title("Summarize Text")
21
+ st.subheader('πŸš•πŸ”— LLM/LoadSummarizeChain')
22
+ sentence = st.text_area('Please paste your article:', height=100)
23
+ button = st.button("Summarize")
24
+
25
+ with st.spinner("Generating Summary.."):
26
+ if button and sentence:
27
+ response = generate_response(sentence)
28
+ st.write(response)
29
+
30
+ st.divider()
31
+
32
+ st.subheader('πŸš™πŸ”— Summarize PDF')
33
+ pdf_path = st.file_uploader('Upload your PDF Document', type='pdf')
34
+ button2 = st.button("Summarize PDF")
35
+
36
+ if pdf_path is not None and button2:
37
+ text = pdf_to_text(pdf_path)
38
+ with st.spinner("Generating PDF Summary.."):
39
+ response2 = generate_response(text)
40
+ st.subheader('Summary Results:')
41
+ st.write(response2)
pages/2_using_LLM_QA.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.chat_models import ChatOpenAI
3
+ from langchain_community.callbacks import get_openai_callback
4
+ from langchain.chains.question_answering import load_qa_chain
5
+
6
+ from utils.process_data import process_text, pdf_to_text
7
+
8
+ MODEL = st.secrets["MODEL4"]
9
+
10
+ st.set_page_config(page_title="Summarizer with LLM QA", page_icon="βš–οΈ")
11
+ st.title("Summarize Text")
12
+ st.subheader("πŸš— πŸ”— LLM/Question Answering")
13
+
14
+ maxw = st.slider('MAX words', 50, 1000, step=10, value=200)
15
+ minw = st.slider('MIN words', 10, 500, step=10, value=50)
16
+
17
+ sentence = st.text_area('Please paste your article:', height=50)
18
+ button = st.button("Summarize")
19
+ query = f"Summarize the content of the uploaded PDF file in more that {minw} words and less than {maxw} words. Focus on capturing the main ideas and key points discussed in the document. Use your own words and ensure clarity and coherence in the summary."
20
+
21
+ with st.spinner("Generating Summary.."):
22
+ if button and sentence:
23
+ knowledgeBase = process_text(sentence)
24
+ docs = knowledgeBase.similarity_search(query)
25
+ llm = ChatOpenAI(model=MODEL, temperature=0.1, openai_api_key=st.secrets["OPENAI_API_KEY"])
26
+ chain = load_qa_chain(llm, chain_type='stuff')
27
+ with get_openai_callback() as cost:
28
+ response = chain.run(input_documents=docs, question=query)
29
+ print(cost)
30
+ st.subheader('Summary Results:')
31
+ st.write(response)
32
+
33
+ st.divider()
34
+
35
+ st.subheader('πŸš™πŸ”— Summarize PDF')
36
+ pdf_path = st.file_uploader('Upload your PDF Document', type='pdf')
37
+ button2 = st.button("Summarize PDF")
38
+
39
+ if pdf_path is not None and button2:
40
+ text = pdf_to_text(pdf_path)
41
+ knowledgeBase = process_text(text)
42
+ with st.spinner("Generating PDF Summary.."):
43
+ docs = knowledgeBase.similarity_search(query)
44
+ llm = ChatOpenAI(model=MODEL, temperature=0.1, openai_api_key=st.secrets["OPENAI_API_KEY"])
45
+ chain = load_qa_chain(llm, chain_type='stuff')
46
+ with get_openai_callback() as cost:
47
+ response2 = chain.run(input_documents=docs, question=query)
48
+ print(cost)
49
+ st.subheader('Summary Results:')
50
+ st.write(response2)
requirements.txt ADDED
Binary file (23.6 kB). View file