SP151 commited on
Commit
a2e33f7
1 Parent(s): e8a6868

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +126 -0
main.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.runnables import RunnablePassthrough
5
+ from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
6
+ from langchain.schema.runnable import RunnableLambda
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ from dotenv import load_dotenv
10
+ import os
11
+ import json
12
+ import streamlit as st
13
+
14
+ load_dotenv()
15
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
16
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
17
+ os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
18
+
19
+
20
+ RESULTS_PER_QUESTION = 3
21
+ ddg_search = DuckDuckGoSearchAPIWrapper()
22
+
23
+ def web_search(query: str, num_results: int = RESULTS_PER_QUESTION):
24
+ results = ddg_search.results(query, num_results)
25
+ return [r["link"] for r in results]
26
+
27
+ summary_template = """
28
+ {text}
29
+
30
+ ---------------------------
31
+ Using the above text, answer in short the following question:
32
+
33
+ > {question}
34
+ ---------------------------
35
+ If the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats, etc if available.
36
+ """
37
+
38
+ summary_prompt = ChatPromptTemplate.from_template(summary_template)
39
+
40
+ def scrape_text(url: str):
41
+ try:
42
+ response = requests.get(url)
43
+ if response.status_code == 200:
44
+ soup = BeautifulSoup(response.text, "html.parser")
45
+ page_text = soup.get_text(separator = " ", strip = True)
46
+ return page_text
47
+ else:
48
+ return f"Failed to retrieve the webpage: Status code {response.status_code}"
49
+ except Exception as e:
50
+ print(e)
51
+ return f"Failed to retrieve the webpage: {e}"
52
+
53
+ url = "https://blog.langchain.dev/announcing-langsmith/"
54
+
55
+ scrape_and_summarize_chain = RunnablePassthrough.assign(
56
+ summary = RunnablePassthrough.assign(
57
+ text = lambda x: scrape_text(x["url"])[:10000]
58
+ ) | summary_prompt | ChatOpenAI(model = "gpt-4o") | StrOutputParser()
59
+ ) | (lambda x: f"URL : {x["url"]}\n\nSummary:\n\n{x['summary']}")
60
+
61
+ web_search_chain = RunnablePassthrough.assign(
62
+ urls = lambda x: web_search(x["question"]),
63
+ ) | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]]) | scrape_and_summarize_chain.map()
64
+
65
+ search_prompt = ChatPromptTemplate.from_messages(
66
+ [
67
+ (
68
+ "user",
69
+ """
70
+ Write 3 google search queries to search online that form an objective
71
+ opinion from the following: {question}\n
72
+ You must respond with a list of strings in the following format:
73
+ [["query1"], ["query2"], ["query3"]]
74
+ """,
75
+ ),
76
+ ]
77
+ )
78
+
79
+ search_question_chain = search_prompt | ChatOpenAI(model = "gpt-4o") | StrOutputParser() | json.loads
80
+
81
+ full_research_chain = search_question_chain | (lambda x: list(map(lambda y: {"question": y[0]}, x))) | web_search_chain.map()
82
+
83
+ WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."
84
+
85
+ RESEARCH_REPORT_TEMPLATE = """
86
+ Information:
87
+ --------
88
+ {research_summary}
89
+ --------
90
+ Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
91
+ The report should focus on the answer to the question, should be well structured, informative, \
92
+ in depth, with facts and numbers if available and a minimum of 1,200 words.
93
+ You should strive to write the report as long as you can using all relevant and necessary information provided.
94
+ You must write the report with markdown syntax.
95
+ You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
96
+ Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
97
+ You must write the report in apa format.
98
+ Please do your best, this is very important to my career.
99
+ """
100
+
101
+ prompt = ChatPromptTemplate.from_messages(
102
+ [
103
+ ("system", WRITER_SYSTEM_PROMPT),
104
+ ("user", RESEARCH_REPORT_TEMPLATE),
105
+ ]
106
+ )
107
+
108
+ def collapse_list_of_lists(list_of_lists):
109
+ content = []
110
+ for l in list_of_lists:
111
+ content.append("\n\n".join(l))
112
+ return "\n\n".join(content)
113
+
114
+ chain = RunnablePassthrough.assign(
115
+ research_summary = full_research_chain | collapse_list_of_lists
116
+ ) | prompt | ChatOpenAI(model = "gpt-4o") | StrOutputParser()
117
+
118
+ st.set_page_config(page_title="Research Assistant/Report Generation")
119
+ st.header("Research -> Report")
120
+ input = st.text_input("Input Question:", key = "input")
121
+ response = chain.invoke(input)
122
+ submit = st.button("Ask Question")
123
+
124
+ if submit:
125
+ st.subheader("Response:")
126
+ st.write(response.strip())