jeroenherczeg
commited on
Commit
·
b1d9047
1
Parent(s):
699f5da
Search
Browse files
app.py
CHANGED
@@ -20,77 +20,77 @@ with st.form(key='search_form'):
|
|
20 |
search_query = st.text_input("", value="How long is a day on the moon?")
|
21 |
submit_button = st.form_submit_button(label='Search')
|
22 |
|
23 |
-
if
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
20 |
search_query = st.text_input("", value="How long is a day on the moon?")
|
21 |
submit_button = st.form_submit_button(label='Search')
|
22 |
|
23 |
+
if submit_button:
|
24 |
+
with st.status(":blue[Generating an answer...]", expanded=True) as status:
|
25 |
+
st.write(":blue[Searching google...]")
|
26 |
+
params = {
|
27 |
+
"api_key": st.secrets["SERPAPI_API_KEY"],
|
28 |
+
"engine": "google",
|
29 |
+
"q": search_query,
|
30 |
+
"location": "Belgium",
|
31 |
+
"google_domain": "google.com",
|
32 |
+
"gl": "us",
|
33 |
+
"hl": "en"
|
34 |
+
}
|
35 |
+
|
36 |
+
search = GoogleSearch(params)
|
37 |
+
search_results = search.get_dict()
|
38 |
+
for s in search_results['organic_results']:
|
39 |
+
st.write(s['title'])
|
40 |
+
|
41 |
+
st.write(":blue[Scraping first result...]")
|
42 |
+
scrape_response = requests.get('https://r.jina.ai/'+search_results['organic_results'][0]['link'])
|
43 |
+
scraped_content = scrape_response.text
|
44 |
+
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
45 |
+
num_tokens = len(encoding.encode(scraped_content))
|
46 |
+
|
47 |
+
if num_tokens > 4096:
|
48 |
+
st.write("The scraped content is too long for the model. Truncating to 4096 tokens.")
|
49 |
+
scraped_content = encoding.decode(encoding.encode(scraped_content)[:4096])
|
50 |
+
|
51 |
+
st.write("Scraped content:")
|
52 |
+
st.write(scraped_content)
|
53 |
+
|
54 |
+
st.write(":blue[Providing LLM with the scraped data...]")
|
55 |
+
|
56 |
+
system_prompt = """
|
57 |
+
You are an AI designed to provide concise (maximum one sentence), accurate, and clear responses in the style of Google's Answer Box. You must strictly use only the information provided within the user's prompt. Do not add any information, assumptions, or external knowledge. Your response should be brief, focused, and directly answer the question or provide the requested information. Format your answer as a clear, informative snippet, suitable for immediate user consumption.
|
58 |
+
|
59 |
+
Example user prompt:
|
60 |
+
Prompt:
|
61 |
+
- What is the capital of France?
|
62 |
+
Information: The capital of France is Paris. It is known for its cultural and historical landmarks, including the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral.
|
63 |
+
|
64 |
+
Example response:
|
65 |
+
The capital of France is Paris.
|
66 |
+
|
67 |
+
Remember, your responses should always follow this format and strictly use only the provided information.
|
68 |
+
If you do not know the answer, you can respond with "I do not know."
|
69 |
+
"""
|
70 |
+
st.write("System prompt:")
|
71 |
+
st.write(system_prompt)
|
72 |
+
|
73 |
+
user_content = f"Prompt: {search_query}\nInformation: {scraped_content}"
|
74 |
+
st.write("User content:")
|
75 |
+
st.write(user_content)
|
76 |
+
|
77 |
+
llm_response = client.chat.completions.create(
|
78 |
+
model="gpt-3.5-turbo",
|
79 |
+
messages=[
|
80 |
+
{"role": "system", "content": system_prompt},
|
81 |
+
{"role": "user", "content": user_content}
|
82 |
+
],
|
83 |
+
stream=False,
|
84 |
+
)
|
85 |
+
|
86 |
+
status.update(label="Pipeline completed!", state="complete", expanded=False)
|
87 |
+
|
88 |
+
st.subheader('Question-Answering System - Answer Box')
|
89 |
+
st.write(llm_response.choices[0].message.content)
|
90 |
+
st.subheader('Search Results')
|
91 |
+
|
92 |
+
for result in search_results['organic_results']:
|
93 |
+
st.write(result['title'])
|
94 |
+
st.write(result['snippet'])
|
95 |
+
st.write(result['link'])
|
96 |
+
st.divider()
|