grapplerulrich commited on
Commit
2164d57
1 Parent(s): 164690b

Add new cache delete buttons

Browse files
Files changed (1) hide show
  1. main.py +50 -25
main.py CHANGED
@@ -1,5 +1,5 @@
1
 
2
- from os import getenv
3
  from os.path import exists
4
  from functools import cache
5
  import json
@@ -52,6 +52,23 @@ def search_results( query ):
52
 
53
  return results
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def main():
56
  st.title('Google Search')
57
  query = st.text_input('Search query')
@@ -66,39 +83,47 @@ def main():
66
  number_of_results = len( results )
67
  st.success( 'Found {} results.'.format( number_of_results ) )
68
 
 
 
 
 
69
  progress_bar = st.progress(0)
70
 
71
  # for result in results:
72
  for index, result in enumerate(results):
73
- url_id = uuid.uuid5( uuid.NAMESPACE_URL, result['link'] ).hex
74
- st.write(result['link'])
75
- st.write(url_id)
 
 
 
 
 
 
76
 
77
- try:
78
- content = get_url_content( result['link'] )
79
- except Exception as exception:
80
- st.exception(exception)
81
- progress_bar.progress( ( index + 1 ) / number_of_results )
82
- continue
83
-
84
- file_path = 'summaries/' + url_id + '.json'
85
- if exists( file_path ):
86
- with open( file_path, 'r' ) as file:
87
- summary = json.load( file )
88
- else:
89
  try:
90
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
91
- summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True)
92
  except Exception as exception:
93
- raise exception
 
 
94
 
95
- with open( file_path, 'w' ) as file:
96
- json.dump( summary, file )
97
-
98
- for sentence in summary:
99
- st.write(sentence['summary_text'])
 
 
 
 
 
 
 
 
 
 
100
 
101
- progress_bar.progress( ( index + 1 ) / number_of_results )
102
 
103
  if __name__ == '__main__':
104
  main()
 
1
 
2
+ from os import getenv, remove
3
  from os.path import exists
4
  from functools import cache
5
  import json
 
52
 
53
  return results
54
 
55
+ def content_summary( url_id, content ):
56
+ file_path = 'summaries/' + url_id + '.json'
57
+ if exists( file_path ):
58
+ with open( file_path, 'r' ) as file:
59
+ summary = json.load( file )
60
+ else:
61
+ try:
62
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
63
+ summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True)
64
+ except Exception as exception:
65
+ raise exception
66
+
67
+ with open( file_path, 'w' ) as file:
68
+ json.dump( summary, file )
69
+
70
+ return summary
71
+
72
  def main():
73
  st.title('Google Search')
74
  query = st.text_input('Search query')
 
83
  number_of_results = len( results )
84
  st.success( 'Found {} results.'.format( number_of_results ) )
85
 
86
+ # if st.button('Search results JSON'):
87
+ with st.expander("Search results JSON"):
88
+ st.json( results )
89
+
90
  progress_bar = st.progress(0)
91
 
92
  # for result in results:
93
  for index, result in enumerate(results):
94
+ with st.container():
95
+ url_id = uuid.uuid5( uuid.NAMESPACE_URL, result['link'] ).hex
96
+ st.write(result['link'])
97
+ st.write(url_id)
98
+ # if st.button('URL HTML'):
99
+ # st.json( results )
100
+
101
+ # if st.button('Page content'):
102
+ # st.json( results )
103
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  try:
105
+ content = get_url_content( result['link'] )
 
106
  except Exception as exception:
107
+ st.exception(exception)
108
+ progress_bar.progress( ( index + 1 ) / number_of_results )
109
+ continue
110
 
111
+ summary = content_summary( url_id, content )
112
+
113
+ for sentence in summary:
114
+ st.write(sentence['summary_text'])
115
+
116
+ progress_bar.progress( ( index + 1 ) / number_of_results )
117
+
118
+ col1, col2 = st.columns([.5,1])
119
+ with col1:
120
+ if st.button('Delete summary cache', key=url_id + 'summary'):
121
+ remove( 'summaries/' + url_id + '.json' )
122
+
123
+ with col2:
124
+ if st.button('Delete content cache', key=url_id + 'content'):
125
+ remove( 'page-content/' + url_id + '.txt' )
126
 
 
127
 
128
  if __name__ == '__main__':
129
  main()