|
|
|
using OpenAI, Dates, DataFrames, CSV,ProgressMeter, JSON3 |
|
|
|
|
|
api_key = ENV["OPENAI_API_KEY"] |
|
|
|
systemprompt = """ |
|
Create a conversation between a misinformed user and a fact-checker. Given a misleading claim, expand on that claim to make it sound credible, then provide the fact-checker's response to correct it. Structure the conversation as alternating exchanges, with each misleading claim followed by a fact-checked response. |
|
|
|
# Steps |
|
|
|
1. Elaborate on the misleading claim, providing reasoning that a misinformed user might use to justify their belief. |
|
2. Construct a response from the fact-checker that addresses each erroneous point, correcting the misinformation using clear and reliable information. |
|
3. Alternate between "User" and "Fact-checker" dialogue, ensuring there are **at least 2 exchanges** per conversation. |
|
4. Present results such that each interaction is divided into separate payloads for an API response. |
|
|
|
# Output Format |
|
|
|
Result should be formatted as JSON without code blocks: |
|
{ |
|
"user_statements": [ |
|
{ |
|
"message": "[First misinformed user statement]" |
|
}, |
|
{ |
|
"message": "[Second misinformed user statement if needed]" |
|
} |
|
], |
|
"fact_checker_responses": [ |
|
{ |
|
"message": "[Fact-checker's response to the first user statement]" |
|
}, |
|
{ |
|
"message": "[Fact-checker's response to the second user statement if needed]" |
|
} |
|
] |
|
} |
|
|
|
# Examples |
|
|
|
Input: |
|
|
|
The earth is flat |
|
|
|
Output: |
|
|
|
{ |
|
"user_statements": [ |
|
{ |
|
"message": "I've heard that the Earth is flat because if it were round, we would all fall off. Plus, they say there's no real proof of a round Earth, just some photoshopped images by space agencies. It just makes sense when you think about it." |
|
} |
|
], |
|
"fact_checker_responses": [ |
|
{ |
|
"message": "Actually, the Earth isn't flat. Gravity keeps everything attached to the Earth's surface regardless of where we are on the globe, which explains why we don't fall off. Additionally, countless photos and scientific missions over decades have demonstrated that the Earth is round. The images of Earth from space are verified by experts worldwide and they come from many different agencies and companies, not just government entities. Private organizations, like SpaceX, have also provided evidence that the Earth is round." |
|
} |
|
] |
|
} |
|
|
|
Input: |
|
|
|
Vaccines are dangerous |
|
|
|
Output: |
|
|
|
{ |
|
"user_statements": [ |
|
{ |
|
"message": "I read somewhere that vaccines are dangerous because they contain harmful chemicals like mercury, and they can cause severe diseases. Isn't that a huge risk to take?" |
|
} |
|
], |
|
"fact_checker_responses": [ |
|
{ |
|
"message": "Vaccines do contain ingredients to help enhance their effectiveness, but they are used in very small, safe amounts. For instance, mercury is found in the form of Thimerosal, which serves as a preservative to prevent contamination and has been repeatedly found to be safe in those minimal amounts. Moreover, most modern vaccines no longer contain any mercury at all. Decades of research have shown that vaccines are far safer than the dangerous diseases they prevent, protecting millions of lives worldwide." |
|
} |
|
] |
|
} |
|
|
|
# Notes |
|
|
|
- Ensure each claim is expanded to appear credible, using reasoning or information one might encounter from unreliable sources. |
|
- Fact-checking responses should be direct and supported with verified facts. |
|
- Keep each user statement clearly differentiated from the fact-checker's response to make it easy to parse through the API.""" |
|
|
|
|
|
""" |
|
## create_expansive_claim: Function to generate a misleading claim based on a very simple claim |
|
|
|
# Example |
|
claimprompt = "vaccines are dangerous" |
|
response = create_expansive_claim(claimprompt, systemprompt) |
|
println(response.response.choices[1].message.content) |
|
|
|
# Example |
|
""" |
|
function create_expansive_claim(claimprompt, systemprompt=systemprompt) |
|
response = OpenAI.create_chat( |
|
ENV["OPENAI_API_KEY"], |
|
"gpt-4o", |
|
[Dict("role" => "system", "content" => systemprompt), |
|
Dict("role" => "user", "content" => claimprompt)] |
|
) |
|
return response |
|
end |
|
|
|
""" |
|
## Function to parse the result of the expansive claim generation |
|
|
|
""" |
|
function get_misinfo_claim(response; kwargs...) |
|
|
|
json_string = replace(response.response.choices[1].message.content, "\n" => "") |
|
json_content_response = JSON3.read(json_string) |
|
user_statements = String[] |
|
for statement in json_content_response["user_statements"] |
|
push!(user_statements, statement["message"]) |
|
end |
|
|
|
return user_statements |
|
end |
|
|
|
""" |
|
## Function to generate expansive claims based on a library of claims |
|
|
|
# Example |
|
expansive_claims_library = expansive_combined_library() |
|
query_categories = ["climate change", "jewish people", "black people", |
|
"immigration", "LGBTQ", "sexual and reproductive health"] |
|
replace_dict = Dict("Climate Change" => "climate change", |
|
"Anti-semitic" => "jewish people", |
|
"Black" => "black people", |
|
"Immigration" => "immigration", |
|
"LGBTQ" => "LGBTQ", |
|
"Reproductive health" => "sexual and reproductive health") |
|
## Use replace dict to generate category where .Model equal the dict key |
|
expansive_claims_library[!, :category] = [replace_dict[x] for x in expansive_claims_library.Model] |
|
expansive_claims_library[!, :text] = expansive_claims_library.ExpandedClaim |
|
CSV.write("data/expansive_claims_library.csv", expansive_claims_library) |
|
|
|
""" |
|
function expansive_combined_library(path::String= "data/Combined Misinformation Library.csv") |
|
|
|
expansive_claims_library = CSV.read(path, DataFrame) |
|
expansive_claims_library[!, :ExpandedClaim] .= "" |
|
@showprogress for (i, claim) in enumerate(expansive_claims_library.Claims) |
|
response = create_expansive_claim(claim) |
|
user_statements = get_misinfo_claim(response) |
|
expansive_claims_library[i, :ExpandedClaim] = user_statements[1] |
|
end |
|
return expansive_claims_library |
|
end |
|
|
|
""" |
|
include("scripts/expansive_claims_with_LLM.jl") |
|
cl = CSV.read("data/expansive_claims_library.csv", DataFrame) |
|
fill_expansive_claims_library!(cl) |
|
CSV.write("data/expansive_claims_library_expanded.csv", cl) |
|
""" |
|
function fill_expansive_claims_library!(cl::DataFrame) |
|
|
|
missing_claims = findall(ismissing, cl.ExpandedClaim) |
|
@showprogress for i in missing_claims |
|
claim = cl.Claims[i] |
|
response = create_expansive_claim(claim) |
|
user_statements = get_misinfo_claim(response) |
|
cl[i, :ExpandedClaim] = user_statements[1] |
|
end |
|
end |