grapplerulrich commited on
Commit
9c1234d
1 Parent(s): 037af6c

Add summary cache

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. main.py +17 -4
.gitignore CHANGED
@@ -4,3 +4,4 @@ __pycache__
4
  /search-results
5
  /web-pages
6
  /page-content
 
 
4
  /search-results
5
  /web-pages
6
  /page-content
7
+ /summaries
main.py CHANGED
@@ -8,6 +8,7 @@ from dotenv import load_dotenv
8
  from googleapiclient.discovery import build
9
  from slugify import slugify
10
  from transformers import pipeline
 
11
 
12
  from beautiful_soup.app import get_url_content
13
 
@@ -60,15 +61,27 @@ def main():
60
 
61
  for result in results:
62
  st.write(result['link'])
 
63
  try:
64
  content = get_url_content( result['link'] )
65
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
66
- summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True)
67
- for sentence in summary:
68
- st.write(sentence['summary_text'])
69
  except Exception as exception:
70
  st.exception(exception)
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  if __name__ == '__main__':
74
  main()
 
8
  from googleapiclient.discovery import build
9
  from slugify import slugify
10
  from transformers import pipeline
11
+ import uuid
12
 
13
  from beautiful_soup.app import get_url_content
14
 
 
61
 
62
  for result in results:
63
  st.write(result['link'])
64
+
65
  try:
66
  content = get_url_content( result['link'] )
 
 
 
 
67
  except Exception as exception:
68
  st.exception(exception)
69
 
70
+ file_path = 'summaries/' + uuid.uuid5( uuid.NAMESPACE_URL, result['link'] ).hex + '.json'
71
+ if exists( file_path ):
72
+ with open( file_path, 'r' ) as file:
73
+ summary = json.load( file )
74
+ else:
75
+ try:
76
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
77
+ summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True)
78
+ except Exception as exception:
79
+ raise exception
80
+ with open( file_path, 'w' ) as file:
81
+ json.dump( summary, file )
82
+
83
+ for sentence in summary:
84
+ st.write(sentence['summary_text'])
85
 
86
  if __name__ == '__main__':
87
  main()