Subhang Bhatti commited on
Commit
659741a
1 Parent(s): 2ff1f41

Add Gradio implementation

Browse files
Files changed (2) hide show
  1. app.py +117 -0
  2. requirements.txt +133 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nltk
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from fake_headers import Headers
6
+ from newspaper import Article
7
+ from nltk.tokenize import sent_tokenize
8
+ from summa.summarizer import summarize
9
+ from transformers import pipeline
10
+
11
+ nltk.download("punkt")
12
+
13
+
14
+ def get_news(stock, n_results):
15
+ header_gen = Headers(headers=True)
16
+ headers = header_gen.generate()
17
+
18
+ params = {"q": stock + " stock news"}
19
+
20
+ response = requests.get(
21
+ "https://news.google.com/search", headers=headers, params=params
22
+ )
23
+ soup = BeautifulSoup(response.text, "lxml")
24
+
25
+ url_elems = soup.find_all(class_="VDXfz")
26
+ urls = []
27
+
28
+ for elem in url_elems:
29
+ urls.append("https://news.google.com" + elem["href"][1:])
30
+
31
+ articles = []
32
+ total_results = 0
33
+
34
+ for url in urls:
35
+ if total_results == n_results:
36
+ break
37
+ try:
38
+ article = Article(url)
39
+ article.download()
40
+ article.parse()
41
+ article = article.text
42
+
43
+ if len(article.split()) > 50:
44
+ articles.append(article)
45
+ total_results += 1
46
+ except:
47
+ continue
48
+
49
+ return articles
50
+
51
+
52
+ def gen_corpus(articles, stock, length):
53
+ raw_corpus = ""
54
+
55
+ for article in articles:
56
+ article_sent = sent_tokenize(article)
57
+
58
+ for sent in article_sent:
59
+ if len(sent.split()) > 5 and stock.lower() in sent.lower():
60
+ raw_corpus += sent
61
+
62
+ corpus = summarize(raw_corpus, words=length)
63
+ return corpus
64
+
65
+
66
+ def gen_summary(corpus, length):
67
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
68
+
69
+ min_length = int(0.8 * length)
70
+ max_length = int(1.2 * length)
71
+ raw_summary = summarizer(
72
+ corpus[:1024], min_length=min_length, max_length=max_length
73
+ )[0]["summary_text"]
74
+
75
+ summary_sent = sent_tokenize(raw_summary)
76
+ summary = ""
77
+
78
+ for sent in summary_sent:
79
+ summary += f"-{sent}\n"
80
+
81
+ return summary
82
+
83
+
84
+ def gen_sentiment(corpus):
85
+ analyzer = pipeline("sentiment-analysis", model="ProsusAI/finbert")
86
+ raw_sentiment = analyzer(corpus[:512], return_all_scores=True)
87
+
88
+ sentiment = {}
89
+
90
+ for sentiment_dict in raw_sentiment[0]:
91
+ label = sentiment_dict["label"]
92
+ value = sentiment_dict["score"]
93
+
94
+ if label == "positive":
95
+ label = "bull"
96
+ elif label == "negative":
97
+ label = "bear"
98
+ else:
99
+ label = "neutral"
100
+
101
+ sentiment[label] = value
102
+
103
+ return sentiment
104
+
105
+
106
+ def gen_report(stock, length):
107
+ news = get_news(stock, length / 5)
108
+ corpus = gen_corpus(news, stock, length * 3)
109
+ summary = gen_summary(corpus, length)
110
+ sentiment = gen_sentiment(corpus)
111
+
112
+ return summary, sentiment
113
+
114
+
115
+ iface = gr.Interface(
116
+ fn=gen_report, inputs=["text", "number"], outputs=["text", "label"]
117
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # These requirements were autogenerated by pipenv
3
+ # To regenerate from the project's Pipfile, run:
4
+ #
5
+ # pipenv lock --requirements
6
+ #
7
+
8
+ -i https://pypi.org/simple
9
+ absl-py==1.0.0; python_version >= '3.6'
10
+ aiohttp==3.8.1; python_version >= '3.6'
11
+ aiosignal==1.2.0; python_version >= '3.6'
12
+ analytics-python==1.4.0
13
+ anyio==3.5.0; python_full_version >= '3.6.2'
14
+ asgiref==3.5.0; python_version >= '3.7'
15
+ astunparse==1.6.3
16
+ async-timeout==4.0.2; python_version >= '3.6'
17
+ attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
18
+ backoff==1.10.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
19
+ bcrypt==3.2.0; python_version >= '3.6'
20
+ beautifulsoup4==4.10.0
21
+ black==22.1.0
22
+ bs4==0.0.1
23
+ cachetools==5.0.0; python_version ~= '3.7'
24
+ certifi==2021.10.8
25
+ cffi==1.15.0
26
+ charset-normalizer==2.0.12; python_version >= '3'
27
+ click==8.0.4; python_version >= '3.6'
28
+ cryptography==36.0.1; python_version >= '3.6'
29
+ cssselect==1.1.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
30
+ cycler==0.11.0; python_version >= '3.6'
31
+ fake-headers==1.0.2
32
+ fastapi==0.74.1; python_full_version >= '3.6.1'
33
+ feedfinder2==0.0.4
34
+ feedparser==6.0.8; python_version >= '3.6'
35
+ ffmpy==0.3.0
36
+ filelock==3.6.0; python_version >= '3.7'
37
+ flatbuffers==2.0
38
+ fonttools==4.29.1; python_version >= '3.7'
39
+ frozenlist==1.3.0; python_version >= '3.7'
40
+ gast==0.5.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
41
+ google-auth-oauthlib==0.4.6; python_version >= '3.6'
42
+ google-auth==2.6.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
43
+ google-pasta==0.2.0
44
+ gradio==2.8.5
45
+ grpcio==1.44.0; python_version >= '3.6'
46
+ h11==0.13.0; python_version >= '3.6'
47
+ h5py==3.6.0; python_version >= '3.7'
48
+ html5lib==1.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
49
+ huggingface-hub==0.4.0; python_version >= '3.6'
50
+ idna==3.3; python_version >= '3'
51
+ importlib-metadata==4.11.2; python_version < '3.10'
52
+ isort==5.10.1
53
+ jieba3k==0.35.1
54
+ jinja2==3.0.3
55
+ joblib==1.1.0; python_version >= '3.6'
56
+ keras-preprocessing==1.1.2
57
+ keras==2.8.0
58
+ kiwisolver==1.3.2; python_version >= '3.7'
59
+ libclang==13.0.0
60
+ linkify-it-py==1.0.3
61
+ lxml==4.8.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
62
+ markdown-it-py[linkify,plugins]==2.0.1; python_version ~= '3.6'
63
+ markdown==3.3.6; python_version >= '3.6'
64
+ markupsafe==2.1.0; python_version >= '3.7'
65
+ matplotlib==3.5.1; python_version >= '3.7'
66
+ mdit-py-plugins==0.3.0
67
+ mdurl==0.1.0; python_version >= '3.6'
68
+ monotonic==1.6
69
+ multidict==6.0.2; python_version >= '3.7'
70
+ mypy-extensions==0.4.3
71
+ newspaper3k==0.2.8
72
+ nltk==3.7
73
+ numpy==1.22.2; python_version >= '3.8'
74
+ oauthlib==3.2.0; python_version >= '3.6'
75
+ opt-einsum==3.3.0; python_version >= '3.5'
76
+ orjson==3.6.7; python_version >= '3.7'
77
+ packaging==21.3; python_version >= '3.6'
78
+ pandas==1.4.1; python_version >= '3.8'
79
+ paramiko==2.9.2
80
+ pathspec==0.9.0
81
+ pillow==9.0.1; python_version >= '3.7'
82
+ platformdirs==2.5.1; python_version >= '3.7'
83
+ protobuf==3.19.4; python_version >= '3.5'
84
+ pyasn1-modules==0.2.8
85
+ pyasn1==0.4.8
86
+ pycparser==2.21
87
+ pycryptodome==3.14.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
88
+ pydantic==1.9.0; python_full_version >= '3.6.1'
89
+ pydub==0.25.1
90
+ pynacl==1.5.0; python_version >= '3.6'
91
+ pyparsing==3.0.7; python_version >= '3.6'
92
+ python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
93
+ python-multipart==0.0.5
94
+ pytz==2021.3
95
+ pyyaml==6.0; python_version >= '3.6'
96
+ regex==2022.1.18
97
+ requests-file==1.5.1
98
+ requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
99
+ requests==2.27.1
100
+ rsa==4.8; python_version >= '3.6'
101
+ sacremoses==0.0.47
102
+ scipy==1.8.0; python_version < '3.11' and python_version >= '3.8'
103
+ setuptools==60.9.3; python_version >= '3.7'
104
+ sgmllib3k==1.0.0
105
+ six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
106
+ sniffio==1.2.0; python_version >= '3.5'
107
+ soupsieve==2.3.1; python_version >= '3.6'
108
+ starlette==0.17.1; python_version >= '3.6'
109
+ summa==1.2.0
110
+ tensorboard-data-server==0.6.1; python_version >= '3.6'
111
+ tensorboard-plugin-wit==1.8.1
112
+ tensorboard==2.8.0; python_version >= '3.6'
113
+ tensorflow-io-gcs-filesystem==0.24.0; python_version < '3.11' and python_version >= '3.7'
114
+ tensorflow==2.8.0
115
+ termcolor==1.1.0
116
+ tf-estimator-nightly==2.8.0.dev2021122109
117
+ tinysegmenter==0.3
118
+ tldextract==3.2.0; python_version >= '3.7'
119
+ tokenizers==0.11.5
120
+ tomli==2.0.1; python_version >= '3.7'
121
+ torch==1.10.2
122
+ tqdm==4.62.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
123
+ transformers==4.16.2
124
+ typing-extensions==4.1.1; python_version < '3.10'
125
+ uc-micro-py==1.0.1; python_version >= '3.6'
126
+ urllib3==1.26.8; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
127
+ uvicorn==0.17.5; python_version >= '3.7'
128
+ webencodings==0.5.1
129
+ werkzeug==2.0.3; python_version >= '3.6'
130
+ wheel==0.37.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
131
+ wrapt==1.13.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
132
+ yarl==1.7.2; python_version >= '3.6'
133
+ zipp==3.7.0; python_version >= '3.7'