Tuana commited on
Commit
4de8fd3
1 Parent(s): 3645e70

first attempt

Browse files
Files changed (2) hide show
  1. app.py +32 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from haystack.nodes.connector import Crawler
3
+ from haystack.nodes import TransformersSummarizer
4
+ import validators
5
+ import json
6
+
7
+ output_dir = "crawled_files"
8
+ crawler = Crawler(output_dir=output_dir)
9
+
10
+ summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
11
+
12
+ documents = []
13
+
14
+ def crawl_url_and_write_content(url):
15
+ docs = crawler.crawl(urls=['https://www.deepset.ai/blog/haystack-node-for-information-extraction'], crawler_depth=0, overwrite_existing_files=True)
16
+ for doc in docs:
17
+ jsonObject = json.load(doc.open())
18
+ documents[0] = jsonObject
19
+ # Streamlit App
20
+
21
+ st.title('Summarizer Demo with Haystack Summarizer')
22
+
23
+ url_text = st.text_input("Please Enter a url here",value="https://www.rba.gov.au/media-releases/2022/mr-22-12.html")
24
+
25
+ if validators.url(url_text):
26
+ crawl_url_and_write_content(url_text)
27
+
28
+ summarize = st.button('Summarize')
29
+
30
+ if summarize:
31
+ summary = summarizer.predict(documents=documents)
32
+ st.write(summary)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ farm-haystack==1.4.0
2
+ validators==0.18.2