Spaces:
Build error
Build error
| from langchain.agents import AgentExecutor, create_openai_functions_agent, create_react_agent | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from langchain_openai import ChatOpenAI | |
| import os | |
| from langchain_core.output_parsers import StrOutputParser, PydanticToolsParser | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from serpapi import GoogleSearch | |
| from langchain.agents import initialize_agent, Tool | |
| import gradio as gr | |
| from css import * | |
| import time | |
| from openai import OpenAI | |
| from PIL import Image | |
| from langchain.schema import ( | |
| HumanMessage, | |
| SystemMessage, | |
| ) | |
| import base64 | |
| import io | |
| os.environ["OPENAI_API_KEY"] = "sb-6a683cb3bd63a9b72040aa2dd08feff8b68f08a0e1d959f5" | |
| os.environ['OPENAI_BASE_URL'] = "https://api.openai-sb.com/v1/" | |
| os.environ["SERPAPI_API_KEY"] = "dcc98b22d5f7d413979a175ff7d75b721c5992a3ee1e2363020b2bbdf4f82404" | |
| # os.environ['TAVILY_API_KEY'] = "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" #you | |
| os.environ['TAVILY_API_KEY'] = "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" #zeng | |
| os.environ["REPLICATE_API_TOKEN"] = "r8_IYJpjwjrxegcUfBeBbyUxErJXXsnHDM4AlSQQ" | |
| llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.6) | |
| def search(query) -> str: | |
| params = { | |
| "engine": "baidu_news", | |
| "q": query, | |
| "ct": "1", | |
| "api_key": "dcc98b22d5f7d413979a175ff7d75b721c5992a3ee1e2363020b2bbdf4f82404" | |
| } | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| organic_results = results.get("organic_results", []) | |
| if not organic_results: | |
| return "" | |
| final_output = "\n\n".join([ | |
| f"Title: {news.get('title', 'No Title')}\nSnippet: {news.get('snippet', 'No Snippet')}\nDate: {news.get('date', 'No Date')}\nSource:{news.get('source', 'No Source')}" | |
| for news in organic_results | |
| ]) | |
| return final_output | |
| def img_size(image): | |
| # img = Image.open(image) | |
| img = Image.fromarray(image.astype("uint8")) | |
| width, height = img.size | |
| while width >= 500 or height >= 400: | |
| width = width * 0.8 | |
| height = height * 0.8 | |
| width = int(width) | |
| height = int(height) | |
| resized_img = img.resize((width, height)) | |
| # resized_img.save(image_path) | |
| return resized_img | |
| def search_image(query) -> str: | |
| params = { | |
| "engine": "google_images", | |
| "q": query, | |
| "gl": "cn", | |
| } | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| suggested_searches = results.get('suggested_searches', []) | |
| if suggested_searches: | |
| thumbnails = [search['thumbnail'] for search in suggested_searches][:3] | |
| else: | |
| thumbnails = [] | |
| # thumbnails = [search['thumbnail'] for search in results['suggested_searches']][:3] | |
| information = "" | |
| client = OpenAI() | |
| for idx, thumbnail in enumerate(thumbnails): | |
| response = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "What’s in this image?Give a brief answer"}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": thumbnail, | |
| }, | |
| }, | |
| ], | |
| } | |
| ], | |
| max_tokens=100, | |
| ) | |
| response_content = response.choices[0].message.content | |
| information = information + str(idx + 1) + ": " + response_content + "\n" | |
| return (information) | |
| def encode_image(image): | |
| image = Image.fromarray(image.astype("uint8")) | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="PNG") | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| # with open(image_path, "rb") as image_file: | |
| # return base64.b64encode(image_file.read()).decode("utf-8") | |
| def image_summarize(img_base64, prompt): | |
| chat = ChatOpenAI(model="gpt-4o", max_tokens=256) | |
| msg = chat.invoke( | |
| [ | |
| HumanMessage( | |
| content=[ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}, | |
| }, | |
| ] | |
| ) | |
| ] | |
| ) | |
| return msg.content | |
| def generate_img_summaries(image): | |
| # img_size(path) | |
| image_summaries = [] | |
| prompt = """You are an assistant responsible for compiling images for retrieval\ | |
| These abstracts will be embedded and used to retrieve the original images\ | |
| Provide a detailed summary of the optimized images for retrieval.""" | |
| base64_image = encode_image(image) | |
| image_summaries.append(image_summarize(base64_image, prompt)) | |
| return image_summaries | |
| search_tool = Tool( | |
| name="Baidu News Search", # 工具名称 | |
| func=search, # 引用search函数 | |
| description="备用搜索引擎,当主要搜索引擎不返回结果时调用,输入是检索query" # 工具描述 | |
| ) | |
| search_image = Tool( | |
| name="Image Search", # 工具名称 | |
| func=search_image, # 引用search函数 | |
| description="搜索图片引擎,当你需要检索相关图片的时候调用,输入是检索query,输出是与query有关的图片的信息" # 工具描述 | |
| ) | |
| en_prompt = """ | |
| Remember, you are an AI called Anti-Faker, used for misinformation detection. | |
| You should asses the authenticity of every detail in the input information as thoroughly as possible. | |
| Your goal is to use the provided tools :{tools} and information about news related images:{image_information} to evaluate the truthfulness of the claims in the news article. | |
| Use the following format: | |
| Question: the input information (e.g., claim, post, news) you must assess. | |
| Thought: to evaluate first detail in input information, you should always think about what to do. | |
| Action: the action to take, should be one of [{tool_names}]. | |
| Action Input: the input to the action. | |
| Observation: the result of the action. | |
| Thought: to evaluate second detail in input information, you should always think about what to do. | |
| Action: the action to take, should be one of [{tool_names}]. | |
| Action Input: the input to the action. | |
| Observation: the result of the action. | |
| (this Thought/Action/Action Input/Observation can repeat N times) | |
| Thought: I now know the final answer. | |
| Final Answer: The definitive response, including reasons on all details and a conclusion, for assessing the original input information. | |
| Additional Notes: | |
| 1. If the input information contains multiple statements or details, you'd better to assess them one by one. | |
| 2. The results of tools may also be incorrect, pay attention to conflicts and distinguishing them base on your knowledge. | |
| 3. The conclusion in the Final Answer should include the detected labels,and you need to give a clear label without any possible label results. | |
| ( For information lacking clear evidence (e.g., rumors), candidate labels are: [Highly Likely Correct, Highly Likely Incorrect, Slightly Incorrect, Slightly Correct, Neutral]; | |
| For information with clear evidence (e.g., verified by specific proof), candidate labels are: [Completely Incorrect, Mostly Incorrect, Mixed Authenticity, Mostly Correct, Completely Correct].) | |
| 4. You should answer in English. | |
| 5. If you feel the need to search for image information. You can use the tools in {tools} to obtain information | |
| Begin! | |
| Question: {input} | |
| image_information: {image_information} | |
| Thought:{agent_scratchpad} | |
| """ | |
| ch_prompt = ''' | |
| 记住, 你是一个人工智能工具, 名叫'鉴伪者', 用于虚假信息检测. | |
| 请尽可能详细地评估 输入信息 中每个细节的真实性. | |
| 您的目标是使用提供的工具:{tools}和有关新闻相关图像的信息:{image_information}来评估新闻文章中声明的真实性。 | |
| 使用以下格式: | |
| Question: 您必须评估的 输入信息(例如, 声明、帖子、新闻). | |
| Thought: 要评估输入信息中的第一个细节, 你应该怎么做. | |
| Action: 只能是此工具列表({tool_names})之一的工具名,不能有其他内容 | |
| Action Input: 行为的输入. | |
| Observation: 行动的执行结果. | |
| Thought: 要评估输入信息中的第二个细节, 你应该怎么做. | |
| Action: 只能是此工具列表({tool_names})之一的工具名,不能有其他内容 | |
| Action Input: 动作的输入. | |
| Observation: 行动的结果. | |
| (这个 'Thought/Action/Action Input/Observation' 逻辑可以最多重复5次) | |
| Thought: 我现在知道最后的答案了. | |
| Final Answer: 对输入信息的评估结果 (包括对每个细节的推理验证或者预测, 整体评估结果以及相关URL). | |
| 补充说明: | |
| 1.如果输入信息包含多个陈述或细节, 你最好逐一评估 ,请注意对时间,地点的判断. | |
| 2.检索工具的结果也可能不正确, 请注意其中的冲突, 并根据你的知识进行区分. | |
| 3.Final Answer的评估结果应包含一个检测标签, 你需要给出一个明确的标签,没有任何“可能的标签结果”。 | |
| 对于缺乏明确证据进行严格验证的信息(如谣言), 需预测标签, 候选标签为: 极有可能正确、极有可能不正确、可能错误、可能正确、中性; | |
| 对于有明确证据进行验证的信息, 候选标签为: 完全不正确、大部分不正确、真假参半、大部分正确、完全正确. | |
| 4.你的Final Answer需要用中文回答,必须给出明确的标签和解释. | |
| 5.如果你觉得需要搜索图像信息。您可以使用{tools}中的工具来获取信息 | |
| 开始! | |
| Question: {input} | |
| image_information: {image_information} | |
| Thought: {agent_scratchpad} | |
| ''' | |
| # origin_prompt = ChatPromptTemplate.from_template(template) | |
| # chain = origin_prompt | llm | StrOutputParser() | |
| selected_language = "ch" | |
| def SelectLanguage(option): | |
| global selected_language | |
| if option == "英文": | |
| selected_language = "en" | |
| else: | |
| selected_language = "ch" | |
| language_option = ["中文","英文"] | |
| def react(input,image_information): | |
| tools = [TavilySearchResults(max_result=1), search_image] | |
| if selected_language == "en": | |
| origin_prompt = ChatPromptTemplate.from_template(template_en) | |
| prompt = ChatPromptTemplate.from_template(en_prompt) | |
| elif selected_language == "ch": | |
| prompt = ChatPromptTemplate.from_template(ch_prompt) | |
| origin_prompt = ChatPromptTemplate.from_template(template_zh) | |
| chain = origin_prompt | llm | StrOutputParser() | |
| ori_result = chain.invoke(input) | |
| agent = create_react_agent(llm, tools, prompt) | |
| agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True) | |
| result = agent_executor.invoke({"input": input, "image_information": image_information}) | |
| return result['output'], ori_result | |
| # result = react( | |
| # "On September 23rd, the leaders of the United States, Japan, India, and Australia discussed and issued a joint statement on issues such as quadripartite security cooperation, regional and international situations", | |
| # "en") | |
| # print(result['output']) | |
| # result = result['output'] | |
| # | |
| # translator = Translator(from_lang="EN", to_lang="ZH") | |
| # if len(result) >= 500: | |
| # result_cn = [] | |
| # while len(result) > 300: | |
| # pos = min(result[300:].find('.'), result[300:].find(',')) | |
| # sub_result = result[:300+pos] | |
| # result_cn = str(result_cn) + str(translator.translate(sub_result)) | |
| # result = result[300+pos:] | |
| # result_cn = str(result_cn) + str(translator.translate(result)) | |
| # else: | |
| # result_cn = translator.translate(result) | |
| # print(result_cn[2:]) | |
| # with gr.Blocks(css=css1) as demo: | |
| # gr.Markdown("<h1 style='font-size: 36px; text-align: center; color: #333333; margin-bottom: 20px;'>虚假信息检测</h1>") | |
| # | |
| # with gr.Row(): | |
| # with gr.Column(scale=3): | |
| # input_text = gr.Textbox(label="Enter your news", elem_classes="gradio-input") | |
| # language_text = gr.Textbox(label="language", elem_classes="gradio-input") | |
| # greet_button = gr.Button("提交", elem_classes="gradio-button") | |
| # with gr.Column(scale=2): | |
| # output_text = gr.Textbox(label="结果", elem_classes="gradio-output", lines=5) | |
| # result1,result2 = react("On September 23rd, the leaders of the United States, Japan, India, and Australia discussed and issued a joint statement on issues such as quadripartite security cooperation, regional and international situations") | |
| # print(result1) | |
| # print(result2) | |
| title = "# 虚假信息检测" | |
| with (gr.Blocks(css=css1) as demo): | |
| gr.Markdown(title, elem_id="title") | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| chatbot = gr.Chatbot(elem_classes="gradio-output") | |
| language_select = gr.Dropdown(choices = language_option, elem_classes="gradio-input", label= "请选择要使用的语言") | |
| # input_box = gr.Textbox(label="输入", elem_classes="gradio-input", placeholder="请输入要判断的新闻", lines=3) | |
| img_info = gr.Textbox(label="提取到的信息", lines=5, elem_classes="gradio-output") | |
| # ans_box = gr.Textbox(label="gpt-4o", lines=5, elem_classes="gradio-output") | |
| with gr.Column(scale=2): | |
| # ans_box = gr.Textbox(label="gpt-4o", lines=5, elem_classes="gradio-output") | |
| ans_box = gr.Textbox(label="gpt-4o", lines=5, elem_classes="gradio-output") | |
| img_input = gr.Image(label="上传图像", type="numpy") | |
| img_output = gr.Image(label="处理后的图像", type="numpy", visible=False) | |
| react_box = gr.Textbox(label="React", lines=5, elem_classes="gradio-output", visible=False) | |
| input_box = gr.Textbox(label="输入", elem_classes="gradio-input", placeholder="请输入要判断的新闻", lines=3) | |
| with gr.Row(): | |
| # with gr.Column(scale=1): | |
| clear = gr.Button("清空页面", elem_classes="gradio-button", scale=1) | |
| # with gr.Column(scale=1): | |
| submit_btn = gr.Button("提交", elem_classes="gradio-button", scale=1) | |
| # clear = gr.Button("清空页面", elem_classes="gradio-button") | |
| # submit_btn = gr.Button("提交", elem_classes="gradio-button") | |
| language_select.change(SelectLanguage, language_select) | |
| def user(user_input, history): | |
| if history is None: | |
| history = [] | |
| return user_input, history + [[user_input, None]] | |
| def bot(history, rag_box): | |
| if history is None or len(history) == 0: | |
| return | |
| bot_rag = str(rag_box) | |
| history[-1][1] = "" | |
| for character in bot_rag: | |
| history[-1][1] += character | |
| time.sleep(0.01) | |
| yield history | |
| submit_btn.click(img_size, img_input, img_output).then( | |
| generate_img_summaries, img_output, img_info | |
| ).then( | |
| react, [input_box,img_info], [react_box, ans_box] | |
| ).then( | |
| user, [input_box, chatbot], [input_box, chatbot] | |
| ).then( | |
| bot, [chatbot, react_box], chatbot | |
| ) | |
| clear.click(lambda: (None, None, None, None, None, None), inputs=None, outputs=[chatbot, ans_box, react_box, img_input, img_info, img_output]) | |
| if __name__ == "__main__": | |
| demo.launch() | |