Upload feed_to_llm_v2.py
Browse files- feed_to_llm_v2.py +40 -21
feed_to_llm_v2.py
CHANGED
@@ -65,33 +65,52 @@ def feed_articles_to_gpt_with_links(information, question):
|
|
65 |
print("LLM Response Content:", response_content)
|
66 |
|
67 |
# Extract sources from the response content
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
71 |
return response_content, [], [], []
|
72 |
|
73 |
-
|
74 |
-
|
|
|
|
|
75 |
|
76 |
-
# Create citation
|
|
|
77 |
citations = []
|
78 |
-
for idx,
|
79 |
-
|
|
|
|
|
80 |
citations.append(citation)
|
81 |
|
82 |
-
# Replace article
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
#
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
if __name__ == "__main__":
|
97 |
question = "How is United States fighting against tobacco addiction?"
|
|
|
65 |
print("LLM Response Content:", response_content)
|
66 |
|
67 |
# Extract sources from the response content
|
68 |
+
inline_matches = re.findall(r'Article \d+', response_content)
|
69 |
+
parenthetical_matches = re.findall(r'\(Article \d+\)', response_content)
|
70 |
+
|
71 |
+
if not (inline_matches or parenthetical_matches):
|
72 |
return response_content, [], [], []
|
73 |
|
74 |
+
# Combine and get unique article numbers
|
75 |
+
all_matches = inline_matches + [m.strip('()') for m in parenthetical_matches]
|
76 |
+
unique_articles = list(set(all_matches))
|
77 |
+
used_article_nums = [int(re.findall(r'\d+', match)[0]) - 1 for match in unique_articles]
|
78 |
|
79 |
+
# Create citation mapping
|
80 |
+
citation_map = {}
|
81 |
citations = []
|
82 |
+
for idx, article_num in enumerate(used_article_nums, start=1):
|
83 |
+
original = f"Article {article_num + 1}"
|
84 |
+
citation_map[original] = f"[{idx}]"
|
85 |
+
citation = f"[{idx}] {titles_list[article_num]} ({domains_list[article_num]})"
|
86 |
citations.append(citation)
|
87 |
|
88 |
+
# Replace all article references with citation numbers
|
89 |
+
modified_response = response_content
|
90 |
+
for original, citation_num in citation_map.items():
|
91 |
+
# Replace both inline and parenthetical references
|
92 |
+
modified_response = modified_response.replace(f"({original})", citation_num)
|
93 |
+
modified_response = modified_response.replace(original, citation_num)
|
94 |
+
|
95 |
+
# Format final response with citations
|
96 |
+
response_with_citations = (
|
97 |
+
f"{modified_response}\n\n"
|
98 |
+
f"References:\n"
|
99 |
+
f"{chr(10).join(citations)}"
|
100 |
+
)
|
101 |
+
|
102 |
+
# Prepare links only for cited articles
|
103 |
+
cited_links = []
|
104 |
+
cited_titles = []
|
105 |
+
cited_domains = []
|
106 |
+
for article_num in used_article_nums:
|
107 |
+
uuid = uuids[article_num]
|
108 |
+
link = f"https://tobaccowatcher.globaltobaccocontrol.org/articles/{uuid}/"
|
109 |
+
cited_links.append(link)
|
110 |
+
cited_titles.append(titles_list[article_num])
|
111 |
+
cited_domains.append(domains_list[article_num])
|
112 |
+
|
113 |
+
return response_with_citations, cited_links, cited_titles, cited_domains
|
114 |
|
115 |
if __name__ == "__main__":
|
116 |
question = "How is United States fighting against tobacco addiction?"
|