ndiy commited on
Commit
d08bb4e
β€’
1 Parent(s): c2286d1

add application file

Browse files
Files changed (2) hide show
  1. app.py +75 -0
  2. pages/01_debug.py +37 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import streamlit as st
16
+ from streamlit.logger import get_logger
17
+ import pandas as pd
18
+ from utils import plotly_line_chart
19
+ import plotly.graph_objects as go
20
+
21
+
22
+ LOGGER = get_logger(__name__)
23
+
24
+
25
+ def run():
26
+ st.set_page_config(
27
+ page_title="Restaurant review analysis",
28
+ page_icon="πŸ‘‹",
29
+ )
30
+
31
+ st.write("# Restaurant review analysis")
32
+ start_year = st.sidebar.slider('Year', 2016, 2023, 2017)
33
+ smooth = st.sidebar.selectbox('Rolling window', [7, 14, 30, 60, 90], index=2)
34
+ reviews = pd.read_pickle('reviews')
35
+ trend = pd.DataFrame(
36
+ {'sentiment': (reviews.groupby('period')['s'].sum().rolling(smooth).sum()/reviews.groupby('period')['s'].count().rolling(30).sum()),
37
+ 'emotion': (reviews.groupby('period')['s'].count().rolling(smooth).sum()/reviews.groupby('period')['starRating'].count().rolling(30).sum()),
38
+ 'rating': reviews.groupby('period')['rating'].mean().rolling(smooth).mean()}
39
+ )[str(start_year):]
40
+ trend.index = pd.to_datetime(trend.index.astype(str))
41
+ with st.expander('Customer sentiment and emotions', expanded=True):
42
+ st.write('''emotion defined as percentage of response with review comment''')
43
+ plotly_line_chart(trend, columns=['sentiment', 'emotion'],
44
+ styles={'emotion': dict(dash='dot', color=('rgb(128, 128, 128)'))}
45
+ )
46
+ with st.expander('score rating', expanded=False):
47
+ plotly_line_chart(trend, columns=['rating'],
48
+ #styles={'sentiment': dict(dash='dot', color=('rgb(128, 128, 128)'))},
49
+ yaxis={'sentiment': 'y2'}
50
+ )
51
+ st.write(trend)
52
+
53
+ absa = pd.read_pickle('stats')
54
+ cols = st.sidebar.multiselect('customer view point', options=['food', 'service', 'atmosphere', 'staff', 'dish', 'price', 'restaurant', 'owner', 'cuisine', 'rice', 'drinks'], default=['food', 'service', 'price'])
55
+
56
+
57
+ positivity = absa.groupby('Period').sum().rolling(smooth).sum()/absa.groupby('Period').count().rolling(smooth).sum()
58
+ positivity = positivity[str(start_year):]
59
+ new_cols = [c for c in cols if c in positivity]
60
+ positivity.index = pd.to_datetime(positivity.index.astype(str))
61
+ plotly_line_chart(positivity, columns=new_cols,)
62
+
63
+ top_words = st.slider('top X customer concerns', 5, 50, 30)
64
+ top_mentions = absa.count().sort_values(ascending=False).head(top_words)
65
+ with st.expander('Customer top mentions', expanded=True):
66
+ fig = go.Figure()
67
+ fig.add_trace(
68
+ go.Bar(x=top_mentions.index, y=top_mentions.values)
69
+ )
70
+ st.plotly_chart(fig)
71
+ #st.write(top_mentions)
72
+
73
+
74
+ if __name__ == "__main__":
75
+ run()
pages/01_debug.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit.logger import get_logger
3
+ import pandas as pd
4
+ from transformers import pipeline
5
+ from setfit import AbsaModel
6
+ import translators as ts
7
+ from config import base_model, senti_map, absa_model, senti_color
8
+ from annotated_text import annotated_text
9
+
10
+
11
+ LOGGER = get_logger(__name__)
12
+ #ts.preaccelerate_and_speedtest()
13
+ senti_task = pipeline("sentiment-analysis", model=base_model, tokenizer=base_model)
14
+ absa = AbsaModel.from_pretrained(absa_model[0], absa_model[1])
15
+
16
+ def run():
17
+ st.write('Copy and paste comment into below text box.')
18
+ txt = st.text_area('customer review')
19
+ if len(txt.strip()) > 0:
20
+ lang = st.selectbox('pick output language', ['en', 'hi', 'zh'], index=0)
21
+ with st.spinner(f'translate to {lang}'):
22
+ txt_en = ts.translate_text(txt, to_language=lang, translator='google')
23
+
24
+ with st.spinner('working on comment sentiment, please wait...'):
25
+ sentiment = senti_task(txt_en)
26
+ st.write(f"it's {senti_map[sentiment[0]['label']]} feedback with a confidence of {sentiment[0]['score']:.1%}")
27
+ with st.spinner('detecting aspect sentiment...'):
28
+ preds = absa(txt_en)
29
+ st.write(f"the comment talks about: {','.join([t['span'] for t in preds])}, detailed sentiments as follow:")
30
+ #st.write(f'Customer says: {txt_en}')
31
+ preds = {p['span']: p['polarity'] for p in preds}
32
+ annotated_text(
33
+ [(t + ' ', preds[t], senti_color[preds[t]]) if t in preds else t+' ' for t in txt_en.split(' ')]
34
+ )
35
+
36
+ if __name__ == "__main__":
37
+ run()