Spaces:
Running
Running
Update feed_to_llm_v2.py
Browse files- feed_to_llm_v2.py +17 -13
feed_to_llm_v2.py
CHANGED
@@ -61,26 +61,30 @@ def feed_articles_to_gpt_with_links(information, question):
|
|
61 |
response_content = response.content # Access the content of the AIMessage
|
62 |
print("LLM Response Content:", response_content)
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
67 |
print("No sources found in the response.")
|
68 |
return response_content, [], [], []
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
# Get integers from source
|
73 |
-
source = re.findall(r'\d+', source)
|
74 |
-
used_article_num = [int(i) - 1 for i in source]
|
75 |
-
|
76 |
links = [f"https://tobaccowatcher.globaltobaccocontrol.org/articles/{uuid}/" for uuid in uuids]
|
77 |
titles = [titles for score, contents, uuids, titles, domains in information]
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
|
84 |
return response_without_source, links, titles, domains
|
85 |
|
86 |
if __name__ == "__main__":
|
|
|
61 |
response_content = response.content # Access the content of the AIMessage
|
62 |
print("LLM Response Content:", response_content)
|
63 |
|
64 |
+
# Use regex to extract Sources field
|
65 |
+
sources_pattern = r"Sources:\s*([\d,]+)"
|
66 |
+
sources_match = re.search(sources_pattern, response_content)
|
67 |
+
if sources_match:
|
68 |
+
# Extract the list of article numbers
|
69 |
+
source_numbers = sources_match.group(1).split(',')
|
70 |
+
used_article_num = [int(num.strip()) - 1 for num in source_numbers]
|
71 |
+
else:
|
72 |
print("No sources found in the response.")
|
73 |
return response_content, [], [], []
|
74 |
|
75 |
+
# Prepare links, titles, and domains for the cited articles
|
|
|
|
|
|
|
|
|
|
|
76 |
links = [f"https://tobaccowatcher.globaltobaccocontrol.org/articles/{uuid}/" for uuid in uuids]
|
77 |
titles = [titles for score, contents, uuids, titles, domains in information]
|
78 |
|
79 |
+
# Filter to only the cited articles
|
80 |
+
links = [links[i] for i in used_article_num if 0 <= i < len(links)]
|
81 |
+
titles = [titles[i] for i in used_article_num if 0 <= i < len(titles)]
|
82 |
+
domains = [domains[i] for i in used_article_num if 0 <= i < len(domains)]
|
83 |
+
|
84 |
+
# Remove the Sources field from the response if needed
|
85 |
+
response_without_source = re.sub(r"Sources:\s*[\d,]+", "", response_content).strip()
|
86 |
|
87 |
+
# Return the cleaned response and the citations
|
88 |
return response_without_source, links, titles, domains
|
89 |
|
90 |
if __name__ == "__main__":
|