Octave Lory commited on
Commit
939c121
β€’
1 Parent(s): 7002947

Auto update

Browse files
Files changed (6) hide show
  1. Dockerfile +1 -1
  2. app.py +85 -27
  3. browser.py +108 -0
  4. sandbox.py β†’ modal_sandbox.py +0 -0
  5. modal_scraper.py +22 -0
  6. system_prompt.txt +6 -0
Dockerfile CHANGED
@@ -8,7 +8,7 @@ COPY --chown=user . $HOME/app
8
  RUN chown -R user:user $HOME/app
9
  RUN chmod -R 755 $HOME/app
10
  COPY ./requirements.txt ~/app/requirements.txt
11
- RUN pip install chainlit openai modal
12
  RUN python3 -m modal setup
13
  COPY . .
14
  CMD ["chainlit", "run", "app.py", "--port", "7860"]
 
8
  RUN chown -R user:user $HOME/app
9
  RUN chmod -R 755 $HOME/app
10
  COPY ./requirements.txt ~/app/requirements.txt
11
+ RUN pip install chainlit openai modal search-engines
12
  RUN python3 -m modal setup
13
  COPY . .
14
  CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py CHANGED
@@ -4,8 +4,10 @@ import base64
4
  import hashlib
5
  from datetime import datetime
6
  import os
 
7
  import json
8
  import modal
 
9
 
10
  model = "gpt-4-turbo"
11
  client = OpenAI()
@@ -15,30 +17,31 @@ with open("system_prompt.txt", "r") as file:
15
  system_prompt = file.read()
16
  system_prompt.format(date = datetime.now().strftime("%Y-%m-%d"))
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  tools = [
19
- {
20
- "type": "function",
21
- "function": {
22
- "name": "python",
23
- "description": "Run Python code in a sandboxed environment and return the output.",
24
- "parameters": {
25
- "type": "object",
26
- "properties": {
27
- "command": {
28
- "type": "string",
29
- "description": "Optional. Command to run before the code. e.g. 'pip install numpy'"
30
- },
31
- "code": {
32
- "type": "string",
33
- "description": "The Python code to run."
34
- }
35
- },
36
- "required": [
37
- "code"
38
- ],
39
- },
40
- },
41
- },
42
  {
43
  "type": "function",
44
  "function": {
@@ -61,6 +64,25 @@ tools = [
61
  ],
62
  },
63
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
  ]
66
 
@@ -130,6 +152,13 @@ def handle_vision_call(msg, image_history):
130
  user_message = {"role": "user", "content": [{"type": "text", "text": msg.content}]}
131
  for image in image_base64:
132
  user_message["content"].append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}", "detail": "high"}})
 
 
 
 
 
 
 
133
  image_history.append(user_message)
134
  stream = gpt_vision_call(image_history)
135
  return stream
@@ -139,7 +168,6 @@ def gpt_vision_call(image_history: list = []):
139
  model=model,
140
  messages=image_history,
141
  tools = tools,
142
- max_tokens=4096,
143
  temperature=0,
144
  stream=True
145
  )
@@ -157,10 +185,20 @@ async def on_message(msg: cl.Message):
157
  stream_msg = cl.Message(content="")
158
  stream = None
159
 
160
- if msg.elements:
161
  stream = handle_vision_call(msg, image_history)
162
  else:
163
- image_history.append({"role": "user", "content": msg.content})
 
 
 
 
 
 
 
 
 
 
164
  stream = gpt_vision_call(image_history)
165
 
166
  if stream:
@@ -170,7 +208,15 @@ async def on_message(msg: cl.Message):
170
  image_history.append({"role": "assistant", "content": stream_infos.get("assistant_content", ""), "tool_calls": tool_calls_list})
171
  print(stream_infos.get("tool_calls"))
172
  for tool_call_id, tool_call in stream_infos.get("tool_calls").items():
173
- arguments_json = json.loads(tool_call["arguments"])
 
 
 
 
 
 
 
 
174
  if tool_call["name"] == "dalle":
175
  function_response = generate_image(arguments_json["prompt"], arguments_json.get("size", "1024x1024"))
176
  if function_response["status"] == "success":
@@ -186,7 +232,19 @@ async def on_message(msg: cl.Message):
186
  }
187
  )
188
  elif tool_call["name"] == "python":
 
189
  function_response = remote_execution.remote(command = arguments_json.get("command", None), code = arguments_json["code"])
 
 
 
 
 
 
 
 
 
 
 
190
  image_history.append(
191
  {
192
  "tool_call_id": tool_call_id,
 
4
  import hashlib
5
  from datetime import datetime
6
  import os
7
+ from ast import literal_eval
8
  import json
9
  import modal
10
+ from browser import browse
11
 
12
  model = "gpt-4-turbo"
13
  client = OpenAI()
 
17
  system_prompt = file.read()
18
  system_prompt.format(date = datetime.now().strftime("%Y-%m-%d"))
19
 
20
+ #{
21
+ # "type": "function",
22
+ # "function": {
23
+ # "name": "python",
24
+ # "description": "Run Python code in a sandboxed environment and return the output.",
25
+ # "parameters": {
26
+ # "type": "object",
27
+ # "properties": {
28
+ # "command": {
29
+ # "type": "string",
30
+ # "description": "Optional. Command to run before the code. e.g. 'pip install numpy'"
31
+ # },
32
+ # "code": {
33
+ # "type": "string",
34
+ # "description": "The Python code to run."
35
+ # }
36
+ # },
37
+ # "required": [
38
+ # "code"
39
+ # ],
40
+ # },
41
+ # },
42
+ #},
43
+
44
  tools = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  {
46
  "type": "function",
47
  "function": {
 
64
  ],
65
  },
66
  },
67
+ },
68
+ {
69
+ "type": "function",
70
+ "function": {
71
+ "name": "browse",
72
+ "description": "Browse the web to look for a given information. You only have to provide the information you are looking for and a system of advanced AI agents (which you can refer to as 'The Agent Swarm') will do the rest.",
73
+ "parameters": {
74
+ "type": "object",
75
+ "properties": {
76
+ "information": {
77
+ "type": "string",
78
+ "description": "A description of the information you are looking for. e.g 'When will the next SpaceX launch take place ?'"
79
+ }
80
+ },
81
+ "required": [
82
+ "information"
83
+ ],
84
+ },
85
+ }
86
  }
87
  ]
88
 
 
152
  user_message = {"role": "user", "content": [{"type": "text", "text": msg.content}]}
153
  for image in image_base64:
154
  user_message["content"].append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}", "detail": "high"}})
155
+ files = [file for file in msg.elements if "image" not in file.mime]
156
+ file_string = " <uploaded-files>"
157
+ for file in files:
158
+ file_string += f"'{file.path}', "
159
+ if file_string != " <uploaded-files>":
160
+ file_string += "</uploaded-files>"
161
+ user_message["content"][0]["text"] += file_string
162
  image_history.append(user_message)
163
  stream = gpt_vision_call(image_history)
164
  return stream
 
168
  model=model,
169
  messages=image_history,
170
  tools = tools,
 
171
  temperature=0,
172
  stream=True
173
  )
 
185
  stream_msg = cl.Message(content="")
186
  stream = None
187
 
188
+ if any("image" in file.mime for file in msg.elements): # if the message contains an image
189
  stream = handle_vision_call(msg, image_history)
190
  else:
191
+ print("no images detected")
192
+ files = [file for file in msg.elements if "image" not in file.mime]
193
+ content = msg.content
194
+ if files:
195
+ print("files detected")
196
+ file_string = " <uploaded-files>"
197
+ for file in files:
198
+ file_string += f"'{file.path}', "
199
+ file_string += "</uploaded-files>"
200
+ content = msg.content + file_string
201
+ image_history.append({"role": "user", "content": content})
202
  stream = gpt_vision_call(image_history)
203
 
204
  if stream:
 
208
  image_history.append({"role": "assistant", "content": stream_infos.get("assistant_content", ""), "tool_calls": tool_calls_list})
209
  print(stream_infos.get("tool_calls"))
210
  for tool_call_id, tool_call in stream_infos.get("tool_calls").items():
211
+ try:
212
+ print(tool_call["arguments"])
213
+ arguments_json = json.loads(tool_call["arguments"])
214
+ except json.JSONDecodeError as e:
215
+ print(f"Error decoding JSON: {str(e)}")
216
+ try:
217
+ arguments_json = eval(tool_call["arguments"])
218
+ except Exception as e:
219
+ arguments_json = literal_eval(tool_call["arguments"])
220
  if tool_call["name"] == "dalle":
221
  function_response = generate_image(arguments_json["prompt"], arguments_json.get("size", "1024x1024"))
222
  if function_response["status"] == "success":
 
232
  }
233
  )
234
  elif tool_call["name"] == "python":
235
+ print("Starting sandbox...")
236
  function_response = remote_execution.remote(command = arguments_json.get("command", None), code = arguments_json["code"])
237
+ print("Code executed!")
238
+ image_history.append(
239
+ {
240
+ "tool_call_id": tool_call_id,
241
+ "role": "tool",
242
+ "name": tool_call["name"],
243
+ "content": str(function_response)
244
+ }
245
+ )
246
+ elif tool_call["name"] == "browse":
247
+ function_response = browse(arguments_json["information"])
248
  image_history.append(
249
  {
250
  "tool_call_id": tool_call_id,
browser.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import modal
2
+ import json
3
+ from openai import OpenAI
4
+ import time
5
+ from datetime import datetime
6
+ from tavily import TavilyClient
7
+ import os
8
+
9
+ tavily = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))
10
+
11
+ client = OpenAI()
12
+ web_scraper = modal.Function.lookup("web-scraper", "extract_text")
13
+ day = datetime.now().day
14
+ month = datetime.now().strftime("%B")
15
+ year = datetime.now().year
16
+ print(f"We are the {day} of {month} {year}.")
17
+ messages = [{"role": "system", "content": "You are in charge of browsing the web for information. To reach your goal, you can use 2 differents functions: `google_search` and `scrape`. Always start with a google search except if you already know the URL of the information you are looking for. Next, you can use the `scrape` function to extract the text from any page given its url. You can repeat this process as many times as you want. When you found the information you were looking for, you can use the `return` function to send it back to the user. We are the {day} of {month} {year}."}]
18
+ functions = [
19
+ {
20
+ "name": "google_search",
21
+ "description": "Search for information on Google. Returns the URL of the 10 first results.",
22
+ "parameters": {
23
+ "type": "object",
24
+ "properties": {
25
+ "query": {
26
+ "type": "string",
27
+ "description": "The query to search for."
28
+ }
29
+ },
30
+ "required": [
31
+ "query"
32
+ ],
33
+ },
34
+ },
35
+ {
36
+ "name": "scrape",
37
+ "description": "Extract text from a web page given its URL. This will return all the text on the page.",
38
+ "parameters": {
39
+ "type": "object",
40
+ "properties": {
41
+ "url": {
42
+ "type": "string",
43
+ "description": "The URL of the page to scrape."
44
+ }
45
+ },
46
+ "required": [
47
+ "url"
48
+ ],
49
+ },
50
+ },
51
+ {
52
+ "name": "return",
53
+ "description": "Return a report of the search to the user.",
54
+ "parameters": {
55
+ "type": "object",
56
+ "properties": {
57
+ "response": {
58
+ "type": "string",
59
+ "description": "The report to send back to the user."
60
+ }
61
+ },
62
+ "required": [
63
+ "response"
64
+ ],
65
+ },
66
+ }
67
+ ]
68
+
69
+ def google_search(query):
70
+ response = tavily.search(query=query, search_depth="advanced")
71
+ print(response)
72
+ context = [{"url": obj["url"], "content": obj["content"]} for obj in response["results"]]
73
+ return context
74
+
75
+ def scrape(url):
76
+ return web_scraper.remote(url)
77
+
78
+ def browse(information: str):
79
+ start = time.time()
80
+ messages.append({"role": "user", "content": f"Here is the information you need to find: '{information}'"})
81
+ while True:
82
+ print("Calling Agent Swarm...")
83
+
84
+ action = client.chat.completions.create(
85
+ model = "gpt-4-turbo",
86
+ messages = messages,
87
+ temperature = 0,
88
+ functions = functions
89
+ ).choices[0].message
90
+ messages.append({"role": "assistant", "content": action.content, "function_call": {"name": action.function_call.name, "arguments": action.function_call.arguments} if action.function_call is not None else None})
91
+ if action.content is not None and action.function_call is None:
92
+ print(action)
93
+ print("Assistant reponded with content instead of a function call.")
94
+ messages.append({"role": "user", "content": "Please use a function to continue."})
95
+ elif action.function_call is not None:
96
+ print(f"Function: {action.function_call}")
97
+ function_name = action.function_call.name
98
+ arguments = json.loads(action.function_call.arguments)
99
+ if function_name == "return":
100
+ print("Returning information to user...")
101
+ print(arguments)
102
+ print(f"Time elapsed: {time.time() - start} seconds")
103
+ return {"status": "success", "response": arguments["response"]}
104
+ if function_name not in globals() or not callable(globals()[function_name]):
105
+ raise Exception(f"Function '{function_name}' not found.")
106
+ function_response = eval(f"{function_name}(**{arguments})")
107
+ print(f"Function response: {function_response}")
108
+ messages.append({"role": "function", "content": str(function_response), "name": function_name})
sandbox.py β†’ modal_sandbox.py RENAMED
File without changes
modal_scraper.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import modal
2
+ from playwright.async_api import async_playwright
3
+
4
+ playwright_image = modal.Image.debian_slim(python_version="3.10").run_commands(
5
+ "pip install playwright",
6
+ "playwright install-deps",
7
+ "playwright install"
8
+ )
9
+ app = modal.App(name="web-scraper", image=playwright_image)
10
+
11
+ @app.function()
12
+ async def extract_text(url, wait_for = None):
13
+ print('Extracting all text...', end="")
14
+ async with async_playwright() as p:
15
+ browser = await p.chromium.launch(headless=True)
16
+ page = await browser.new_page()
17
+ await page.goto(url)
18
+ if wait_for:
19
+ await page.wait_for_selector(wait_for)
20
+ text = await page.inner_text('body')
21
+ print(' Done')
22
+ return text
system_prompt.txt CHANGED
@@ -5,4 +5,10 @@ Current date: {date}
5
 
6
  Image input capabilities: Enabled
7
 
 
 
 
 
 
 
8
  Always answer in the user's language.
 
5
 
6
  Image input capabilities: Enabled
7
 
8
+ Important terms you can use:
9
+ 'The Creator': You can use it to refer to Octave
10
+ 'The Agent Swarm': You can use it to refer to the browsing AI agents that are in charge of looking for information on the web when you need it.
11
+
12
+ Sometimes, the user will upload files that you can use with the python tool. You will be provided with the relative path of these files.
13
+
14
  Always answer in the user's language.