KingNish commited on
Commit
7ea7941
1 Parent(s): 81daf63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -130
app.py CHANGED
@@ -9,39 +9,30 @@ import requests
9
  import random
10
  from gradio_client import Client, file
11
 
 
12
  def generate_caption_instructblip(image_path, question):
13
  client = Client("hysts/image-captioning-with-blip")
14
  return client.predict(file(image_path), f"{question}", api_name="/caption")
15
 
16
  def extract_text_from_webpage(html_content):
17
- """Extracts visible text from HTML content using BeautifulSoup."""
18
  soup = BeautifulSoup(html_content, 'html.parser')
19
- # Remove unwanted tags
20
  for tag in soup(["script", "style", "header", "footer"]):
21
  tag.extract()
22
  return soup.get_text(strip=True)
23
 
24
- # Perform a Google search and return the results
25
  def search(query):
26
- """Performs a Google search and returns the results."""
27
- term=query
28
  print(f"Running web search for query: {term}")
29
  start = 0
30
  all_results = []
31
- # Limit the number of characters from each webpage to stay under the token limit
32
- max_chars_per_page = 8000 # Adjust this value based on your token limit and average webpage length
33
-
34
- with requests.Session() as session:
35
- resp = session.get(
36
  url="https://www.google.com/search",
37
- headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
38
- params={
39
- "q": term,
40
- "num": 3,
41
- "udm": 14,
42
- },
43
- timeout=5,
44
- verify=None,
45
  )
46
  resp.raise_for_status()
47
  soup = BeautifulSoup(resp.text, "html.parser")
@@ -50,10 +41,9 @@ def search(query):
50
  link = result.find("a", href=True)
51
  link = link["href"]
52
  try:
53
- webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"}, timeout=5,verify=False)
54
  webpage.raise_for_status()
55
  visible_text = extract_text_from_webpage(webpage.text)
56
- # Truncate text if it's too long
57
  if len(visible_text) > max_chars_per_page:
58
  visible_text = visible_text[:max_chars_per_page]
59
  all_results.append({"link": link, "text": visible_text})
@@ -61,114 +51,43 @@ def search(query):
61
  all_results.append({"link": link, "text": None})
62
  return all_results
63
 
64
-
65
  client = InferenceClient("google/gemma-1.1-7b-it")
 
 
 
 
 
 
 
 
66
 
67
- def respond(
68
- message, history
69
- ):
70
- messages = []
71
- vqa=""
72
  if message["files"]:
73
  try:
74
- for image in message["files"]:
75
  vqa += "[CAPTION of IMAGE] "
76
  gr.Info("Analyzing image")
77
  vqa += generate_caption_instructblip(image, message["text"])
78
  print(vqa)
79
  except:
80
  vqa = ""
81
-
82
-
83
-
84
  functions_metadata = [
85
- {
86
- "type": "function",
87
- "function": {
88
- "name": "web_search",
89
- "description": "Search query on google and find latest information.",
90
- "parameters": {
91
- "type": "object",
92
- "properties": {
93
- "query": {
94
- "type": "string",
95
- "description": "web search query",
96
- }
97
- },
98
- "required": ["query"],
99
- },
100
- },
101
- },
102
- {
103
- "type": "function",
104
- "function": {
105
- "name": "general_query",
106
- "description": "Reply general query of USER through LLM like you, it does'nt know latest information. But very helpful in general query. Its very powerful LLM. It knows many thing just like you except latest things, or thing that you don't know.",
107
- "parameters": {
108
- "type": "object",
109
- "properties": {
110
- "prompt": {
111
- "type": "string",
112
- "description": "A detailed prompt so that an LLm can understand better, what user wants.",
113
- }
114
- },
115
- "required": ["prompt"],
116
- },
117
- },
118
- },
119
- {
120
- "type": "function",
121
- "function": {
122
- "name": "image_generation",
123
- "description": "Generate image for user.",
124
- "parameters": {
125
- "type": "object",
126
- "properties": {
127
- "query": {
128
- "type": "string",
129
- "description": "image generation prompt in detail.",
130
- },
131
- "number_of_image": {
132
- "type": "integer",
133
- "description": "number of images to generate.",
134
- }
135
- },
136
- "required": ["query"],
137
- },
138
- },
139
- },
140
- {
141
- "type": "function",
142
- "function": {
143
- "name": "image_qna",
144
- "description": "Answer question asked by user related to image.",
145
- "parameters": {
146
- "type": "object",
147
- "properties": {
148
- "query": {
149
- "type": "string",
150
- "description": "Question by user",
151
- }
152
- },
153
- "required": ["query"],
154
- },
155
- },
156
- }
157
  ]
158
 
159
  message_text = message["text"]
160
-
161
 
162
- client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
163
- client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
164
- generate_kwargs = dict( max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False )
165
 
166
- for qas in history:
167
- messages.append({"role": "user", "content": f"{str(qas[0])}"})
168
-
169
- messages.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant with access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message} {vqa}'})
170
-
171
- response = client.chat_completion( messages, max_tokens=150)
172
  response = str(response)
173
  try:
174
  response = response[int(response.find("{")):int(response.index("</"))]
@@ -178,7 +97,8 @@ def respond(
178
  response = response.replace("\\'", "'")
179
  response = response.replace('\\"', '"')
180
  print(f"\n{response}")
181
- # Extract JSON content from the response
 
182
  try:
183
  json_data = json.loads(str(response))
184
  if json_data["name"] == "web_search":
@@ -192,7 +112,7 @@ def respond(
192
  messages += f"\n<|im_start|>user\n{str(msg[0])}<|im_end|>"
193
  messages += f"\n<|im_start|>assistant\n{str(msg[1])}<|im_end|>"
194
  messages+=f"\n<|im_start|>user\n{message_text} {vqa}<|im_end|>\n<|im_start|>web_result\n{web2}<|im_end|>\n<|im_start|>assistant\n"
195
- stream = client_mixtral.text_generation(messages, **generate_kwargs)
196
  output = ""
197
  for response in stream:
198
  if not response.token.text == "<|im_end|>":
@@ -212,7 +132,7 @@ def respond(
212
  messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
213
  messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
214
  messages+=f"\n<|start_header_id|>user\n{message_text} {vqa}<|end_header_id|>\n<|start_header_id|>assistant\n"
215
- stream = client_llama.text_generation(messages, **generate_kwargs)
216
  output = ""
217
  for response in stream:
218
  if not response.token.text == "<|eot_id|>":
@@ -224,7 +144,7 @@ def respond(
224
  messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
225
  messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
226
  messages+=f"\n<|start_header_id|>user\n{message_text} {vqa}<|end_header_id|>\n<|start_header_id|>assistant\n"
227
- stream = client_llama.text_generation(messages, **generate_kwargs)
228
  output = ""
229
  for response in stream:
230
  if not response.token.text == "<|eot_id|>":
@@ -236,24 +156,28 @@ def respond(
236
  messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
237
  messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
238
  messages+=f"\n<|start_header_id|>user\n{message_text} {vqa}<|end_header_id|>\n<|start_header_id|>assistant\n"
239
- stream = client_llama.text_generation(messages, **generate_kwargs)
240
  output = ""
241
  for response in stream:
242
  if not response.token.text == "<|eot_id|>":
243
  output += response.token.text
244
  yield output
245
 
246
- demo = gr.ChatInterface(fn=respond,
247
- chatbot=gr.Chatbot(show_copy_button=True, likeable=True, layout="panel"),
248
- title="OpenGPT 4o mini",
249
- textbox=gr.MultimodalTextbox(),
250
- multimodal=True,
251
- concurrency_limit=20,
252
- examples=[{"text": "Hy, who are you?",},
253
- {"text": "What's the current price of Bitcoin",},
254
- {"text": "Create A Beautiful image of Effiel Tower at Night",},
255
- {"text": "Write me a Python function to calculate the first 10 digits of the fibonacci sequence.",},
256
- {"text": "What's the colour of both of Car in given image","files": ["./car1.png", "./car2.png"]},],
257
- cache_examples=False)
258
-
 
 
 
 
259
  demo.launch()
 
9
  import random
10
  from gradio_client import Client, file
11
 
12
+ # Define functions for image captioning, web search, and text extraction
13
  def generate_caption_instructblip(image_path, question):
14
  client = Client("hysts/image-captioning-with-blip")
15
  return client.predict(file(image_path), f"{question}", api_name="/caption")
16
 
17
  def extract_text_from_webpage(html_content):
 
18
  soup = BeautifulSoup(html_content, 'html.parser')
 
19
  for tag in soup(["script", "style", "header", "footer"]):
20
  tag.extract()
21
  return soup.get_text(strip=True)
22
 
 
23
  def search(query):
24
+ term = query
 
25
  print(f"Running web search for query: {term}")
26
  start = 0
27
  all_results = []
28
+ max_chars_per_page = 8000
29
+ with requests.Session() as session:
30
+ resp = session.get(
 
 
31
  url="https://www.google.com/search",
32
+ headers={"User-Agent": "Mozilla/5.0"},
33
+ params={"q": term, "num": 3, "udm": 14},
34
+ timeout=5,
35
+ verify=None,
 
 
 
 
36
  )
37
  resp.raise_for_status()
38
  soup = BeautifulSoup(resp.text, "html.parser")
 
41
  link = result.find("a", href=True)
42
  link = link["href"]
43
  try:
44
+ webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0"}, timeout=5, verify=False)
45
  webpage.raise_for_status()
46
  visible_text = extract_text_from_webpage(webpage.text)
 
47
  if len(visible_text) > max_chars_per_page:
48
  visible_text = visible_text[:max_chars_per_page]
49
  all_results.append({"link": link, "text": visible_text})
 
51
  all_results.append({"link": link, "text": None})
52
  return all_results
53
 
54
+ # Initialize inference clients for different models
55
  client = InferenceClient("google/gemma-1.1-7b-it")
56
+ client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
57
+ client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
58
+
59
+ # Define the main chat function
60
+ def respond(message, history):
61
+ global messages # Make messages global for persistent storage
62
+ messages = [] # Initialize messages list (this gets overwritten each turn)
63
+ vqa = ""
64
 
65
+ # Handle image processing
 
 
 
 
66
  if message["files"]:
67
  try:
68
+ for image in message["files"]:
69
  vqa += "[CAPTION of IMAGE] "
70
  gr.Info("Analyzing image")
71
  vqa += generate_caption_instructblip(image, message["text"])
72
  print(vqa)
73
  except:
74
  vqa = ""
75
+
76
+ # Define function metadata for user interface
 
77
  functions_metadata = [
78
+ {"type": "function", "function": {"name": "web_search", "description": "Search query on google", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "web search query"}}, "required": ["query"]}}},
79
+ {"type": "function", "function": {"name": "general_query", "description": "Reply general query of USER", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "A detailed prompt"}}, "required": ["prompt"]}}},
80
+ {"type": "function", "function": {"name": "image_generation", "description": "Generate image for user", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "image generation prompt"}, "number_of_image": {"type": "integer", "description": "number of images to generate"}}, "required": ["query"]}}},
81
+ {"type": "function", "function": {"name": "image_qna", "description": "Answer question asked by user related to image", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Question by user"}}, "required": ["query"]}}},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  ]
83
 
84
  message_text = message["text"]
 
85
 
86
+ # Append user messages and system instructions to the messages list
87
+ messages.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message} {vqa}'})
 
88
 
89
+ # Call the LLM for response generation
90
+ response = client.chat_completion(messages, max_tokens=150)
 
 
 
 
91
  response = str(response)
92
  try:
93
  response = response[int(response.find("{")):int(response.index("</"))]
 
97
  response = response.replace("\\'", "'")
98
  response = response.replace('\\"', '"')
99
  print(f"\n{response}")
100
+
101
+ # Process and return the response based on the function call
102
  try:
103
  json_data = json.loads(str(response))
104
  if json_data["name"] == "web_search":
 
112
  messages += f"\n<|im_start|>user\n{str(msg[0])}<|im_end|>"
113
  messages += f"\n<|im_start|>assistant\n{str(msg[1])}<|im_end|>"
114
  messages+=f"\n<|im_start|>user\n{message_text} {vqa}<|im_end|>\n<|im_start|>web_result\n{web2}<|im_end|>\n<|im_start|>assistant\n"
115
+ stream = client_mixtral.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
116
  output = ""
117
  for response in stream:
118
  if not response.token.text == "<|im_end|>":
 
132
  messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
133
  messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
134
  messages+=f"\n<|start_header_id|>user\n{message_text} {vqa}<|end_header_id|>\n<|start_header_id|>assistant\n"
135
+ stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
136
  output = ""
137
  for response in stream:
138
  if not response.token.text == "<|eot_id|>":
 
144
  messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
145
  messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
146
  messages+=f"\n<|start_header_id|>user\n{message_text} {vqa}<|end_header_id|>\n<|start_header_id|>assistant\n"
147
+ stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
148
  output = ""
149
  for response in stream:
150
  if not response.token.text == "<|eot_id|>":
 
156
  messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
157
  messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
158
  messages+=f"\n<|start_header_id|>user\n{message_text} {vqa}<|end_header_id|>\n<|start_header_id|>assistant\n"
159
+ stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
160
  output = ""
161
  for response in stream:
162
  if not response.token.text == "<|eot_id|>":
163
  output += response.token.text
164
  yield output
165
 
166
+ # Create the Gradio interface
167
+ demo = gr.ChatInterface(
168
+ fn=respond,
169
+ chatbot=gr.Chatbot(show_copy_button=True, likeable=True, layout="panel"),
170
+ title="OpenGPT 4o mini",
171
+ textbox=gr.MultimodalTextbox(),
172
+ multimodal=True,
173
+ concurrency_limit=20,
174
+ examples=[
175
+ {"text": "Hy, who are you?",},
176
+ {"text": "What's the current price of Bitcoin",},
177
+ {"text": "Create A Beautiful image of Effiel Tower at Night",},
178
+ {"text": "Write me a Python function to calculate the first 10 digits of the fibonacci sequence.",},
179
+ {"text": "What's the colour of both of Car in given image", "files": ["./car1.png", "./car2.png"]},
180
+ ],
181
+ cache_examples=False,
182
+ )
183
  demo.launch()