Spaces:
Build error
Build error
llamazookeeper
commited on
Commit
β’
3d803ed
1
Parent(s):
d36b7c3
article
Browse files- app.py +68 -115
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,116 +1,69 @@
|
|
1 |
-
#
|
|
|
2 |
import streamlit as st
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
#
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
#
|
20 |
-
|
21 |
-
from
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
#
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
model=model,
|
70 |
-
tokenizer=tokenizer)
|
71 |
-
|
72 |
-
# Create and dl embeddings instance
|
73 |
-
embeddings=LangchainEmbedding(
|
74 |
-
HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
75 |
-
)
|
76 |
-
|
77 |
-
# Create new service context instance
|
78 |
-
service_context = ServiceContext.from_defaults(
|
79 |
-
chunk_size=1024,
|
80 |
-
llm=llm,
|
81 |
-
embed_model=embeddings
|
82 |
-
)
|
83 |
-
# And set the service context
|
84 |
-
set_global_service_context(service_context)
|
85 |
-
|
86 |
-
# Download PDF Loader
|
87 |
-
PyMuPDFReader = download_loader("PyMuPDFReader")
|
88 |
-
# Create PDF Loader
|
89 |
-
loader = PyMuPDFReader()
|
90 |
-
# Load documents
|
91 |
-
documents = loader.load(file_path=Path('/content/*.pdf'), metadata=True)
|
92 |
-
|
93 |
-
# Create an index - we'll be able to query this in a sec
|
94 |
-
index = VectorStoreIndex.from_documents(documents)
|
95 |
-
# Setup index query engine using LLM
|
96 |
-
query_engine = index.as_query_engine()
|
97 |
-
|
98 |
-
|
99 |
-
# Create centered main title
|
100 |
-
#st.title('π¦ Llama Banker')
|
101 |
-
# Create a text input box for the user
|
102 |
-
prompt = st.text_input('Input your prompt here')
|
103 |
-
|
104 |
-
# If the user hits enter
|
105 |
-
if prompt:
|
106 |
-
response = query_engine.query(prompt)
|
107 |
-
# ...and write it out to the screen
|
108 |
-
st.write(response)
|
109 |
-
|
110 |
-
# Display raw response object
|
111 |
-
with st.expander('Response Object'):
|
112 |
-
st.write(response)
|
113 |
-
# Display source text
|
114 |
-
with st.expander('Source Text'):
|
115 |
-
st.write(response.get_formatted_sources())
|
116 |
-
|
|
|
1 |
+
# import
|
2 |
+
from tensorflow.python.keras.utils.generic_utils import default
|
3 |
import streamlit as st
|
4 |
+
from newspaper import Article
|
5 |
+
from transformers import pipeline
|
6 |
+
|
7 |
+
# set config
|
8 |
+
st.set_page_config(layout="wide", page_title="SummarizeLink")
|
9 |
+
|
10 |
+
# load the summarization model (cache for faster loading)
|
11 |
+
@st.cache(allow_output_mutation=True)
|
12 |
+
def load_summarize_model():
|
13 |
+
# model = pipeline("summarization", model='sshleifer/distilbart-cnn-12-6')
|
14 |
+
model = pipeline("summarization")
|
15 |
+
return model
|
16 |
+
|
17 |
+
# loading the model
|
18 |
+
summ = load_summarize_model()
|
19 |
+
|
20 |
+
# define the down functions
|
21 |
+
def download_and_parse_article(url):
|
22 |
+
"""Downloads and parses an article from a URL.
|
23 |
+
Parameters
|
24 |
+
----------
|
25 |
+
url : str
|
26 |
+
The URL of the article to download and parse.
|
27 |
+
Returns
|
28 |
+
-------
|
29 |
+
article : newspaper.Article
|
30 |
+
The article downloaded and parsed.
|
31 |
+
"""
|
32 |
+
# define the article
|
33 |
+
article = Article(url)
|
34 |
+
# download and parse the article
|
35 |
+
article.download()
|
36 |
+
article.parse()
|
37 |
+
# return the article
|
38 |
+
return article.text
|
39 |
+
|
40 |
+
# APP
|
41 |
+
# set title and subtitle
|
42 |
+
st.title("SummarizeLink")
|
43 |
+
st.markdown("Paste any article link below and click on the 'Summarize' button.")
|
44 |
+
st.markdown("*Note:* We truncate the text incase the article is lengthy! π")
|
45 |
+
# create the input text box and setting panel
|
46 |
+
link = st.text_area('Paste your link here...', "https://towardsdatascience.com/a-guide-to-the-knowledge-graphs-bfb5c40272f1", height=50)
|
47 |
+
button = st.button("Summarize")
|
48 |
+
min_length = st.sidebar.slider('Min summary length', min_value=10, max_value=100, value=50, step=10)
|
49 |
+
max_length = st.sidebar.slider('Max summary length', min_value=30, max_value=700, value=100, step=10)
|
50 |
+
num_beams = st.sidebar.slider('Beam length', min_value=1, max_value=10, value=5, step=1)
|
51 |
+
|
52 |
+
# if button is clicked
|
53 |
+
with st.spinner("Parsing article and Summarizing..."):
|
54 |
+
if button and link:
|
55 |
+
# get the text
|
56 |
+
text = download_and_parse_article(link)
|
57 |
+
# summarize the text
|
58 |
+
summary = summ(text,
|
59 |
+
truncation=True,
|
60 |
+
max_length = max_length,
|
61 |
+
min_length = min_length,
|
62 |
+
num_beams=num_beams,
|
63 |
+
do_sample=True,
|
64 |
+
early_stopping=True,
|
65 |
+
repetition_penalty=1.5,
|
66 |
+
length_penalty=1.5)[0]
|
67 |
+
# display the summary
|
68 |
+
st.markdown("**Summary:**")
|
69 |
+
st.write(summary['summary_text'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -5,3 +5,4 @@ transformers
|
|
5 |
accelerate
|
6 |
bitsandbytes
|
7 |
requests
|
|
|
|
5 |
accelerate
|
6 |
bitsandbytes
|
7 |
requests
|
8 |
+
newspaper
|