lalithadevi commited on
Commit
a3697ff
1 Parent(s): 45e0954

Update find_similar_news.py

Browse files
Files changed (1) hide show
  1. find_similar_news.py +12 -5
find_similar_news.py CHANGED
@@ -43,17 +43,16 @@ def get_milvus_collection():
43
  logger.warning('Exiting get_milvus_collection()')
44
  return collection
45
 
46
- def find_similar_news(text, search_vec, collection, vectorizer, sent_model, ce_model, top_n: int=5):
47
  logger.warning('Entering find_similar_news')
48
  search_params = {"metric_type": "IP"}
49
- # search_vec = vectorizer.vectorize_(text)
50
  logger.warning('Querying Milvus for most similar results')
51
  results = collection.search([search_vec],
52
  anns_field='article_embed', # annotations field specified in the schema definition
53
  param=search_params,
54
  limit=top_n,
55
  guarantee_timestamp=1,
56
- output_fields=['article_title', 'article_src', 'article_url', 'article_date'])[0] # which fields to return in output
57
 
58
  logger.warning('retrieved search results from Milvus')
59
  logger.warning('Computing cross encoder similarity scores')
@@ -64,11 +63,19 @@ def find_similar_news(text, search_vec, collection, vectorizer, sent_model, ce_m
64
 
65
  logger.warning('Generating HTML output')
66
  html_output = ""
 
67
  for n, i in enumerate(similarity_idxs):
68
  title_ = results[i].entity.get('article_title')
69
  url_ = results[i].entity.get('article_url')
70
- html_output += f'''<a style="font-weight: bold; font-size:14px; color: black;" href="{url_}" target="_blank">{title_}</a><br>
71
- '''
 
 
 
 
 
 
 
72
  logger.warning('Successfully generated HTML output')
73
  logger.warning('Exiting find_similar_news')
74
  return html_output
 
43
  logger.warning('Exiting get_milvus_collection()')
44
  return collection
45
 
46
+ def find_similar_news(text, search_vec, collection, vectorizer, sent_model, ce_model, top_n: int=10):
47
  logger.warning('Entering find_similar_news')
48
  search_params = {"metric_type": "IP"}
 
49
  logger.warning('Querying Milvus for most similar results')
50
  results = collection.search([search_vec],
51
  anns_field='article_embed', # annotations field specified in the schema definition
52
  param=search_params,
53
  limit=top_n,
54
  guarantee_timestamp=1,
55
+ output_fields=['article_title', 'article_url'])[0] # which fields to return in output
56
 
57
  logger.warning('retrieved search results from Milvus')
58
  logger.warning('Computing cross encoder similarity scores')
 
63
 
64
  logger.warning('Generating HTML output')
65
  html_output = ""
66
+ article_count = 0
67
  for n, i in enumerate(similarity_idxs):
68
  title_ = results[i].entity.get('article_title')
69
  url_ = results[i].entity.get('article_url')
70
+ if title_ != text:
71
+ html_output += f'''<a class="similar-news-item" href="{url_}" target="_blank">{title_}</a><br>
72
+ '''
73
+ article_count += 1
74
+
75
+ if article_count == 5 :
76
+ break
77
+
78
+
79
  logger.warning('Successfully generated HTML output')
80
  logger.warning('Exiting find_similar_news')
81
  return html_output