Spaces:

Shreyas94
/

SentinelAI102

Sleeping

App Files Files Community

Shreyas94 commited on Jun 13, 2024

Commit

ddfb119

verified ·

1 Parent(s): bf295f9

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -99

app.py CHANGED Viewed

@@ -1,21 +1,52 @@
 import os
 import urllib
 import requests
 from typing import List, Dict, Union
 import torch
 import gradio as gr
-from bs4 import BeautifulSoup
 from huggingface_hub import InferenceClient
-from functools import lru_cache
-import logging
-# Set up logging
-logging.basicConfig(level=logging.DEBUG)
-# Set device to CUDA if available, otherwise CPU
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Extract text from webpage
 @lru_cache(maxsize=128)
 def extract_text_from_webpage(html_content):
     soup = BeautifulSoup(html_content, "html.parser")
@@ -24,120 +55,199 @@ def extract_text_from_webpage(html_content):
     visible_text = soup.get_text(strip=True)
     return visible_text
-# Perform a Google search and return the results
 def search(term, num_results=2, lang="en", timeout=5, safe="active", ssl_verify=None):
     escaped_term = urllib.parse.quote_plus(term)
     start = 0
     all_results = []
-    max_chars_per_page = 8000  # Limit the number of characters from each webpage
     with requests.Session() as session:
         while start < num_results:
-            try:
-                resp = session.get(
-                    url="https://www.google.com/search",
-                    headers={"User-Agent": "Mozilla/5.0"},
-                    params={"q": term, "num": num_results - start, "hl": lang, "start": start, "safe": safe},
-                    timeout=timeout,
-                    verify=ssl_verify,
-                )
-                resp.raise_for_status()
-                logging.debug(f"Raw HTML response from Google: {resp.text[:1000]}")  # Log the first 1000 characters of the HTML
-                soup = BeautifulSoup(resp.text, "html.parser")
-                result_block = soup.find_all("div", attrs={"class": "g"})
-                if not result_block:
-                    start += 1
-                    continue
-                for result in result_block:
-                    link_tag = result.find("a", href=True)
-                    if link_tag:
-                        link = link_tag["href"]
-                        try:
-                            webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0"})
-                            webpage.raise_for_status()
-                            visible_text = extract_text_from_webpage(webpage.text)
-                            if len(visible_text) > max_chars_per_page:
-                                visible_text = visible_text[:max_chars_per_page] + "..."
-                            all_results.append({"link": link, "text": visible_text})
-                        except requests.exceptions.RequestException as e:
-                            logging.error(f"Error fetching or processing {link}: {e}")
-                            all_results.append({"link": link, "text": None})
-                    else:
-                        all_results.append({"link": None, "text": None})
-                start += len(result_block)
-            except requests.exceptions.RequestException as e:
-                logging.error(f"Error during search request: {e}")
-                break
-    logging.debug(f"Web search results: {all_results}")
     return all_results
-# Format the prompt for the language model
 def format_prompt(user_prompt, chat_history):
     prompt = "<s>"
     for item in chat_history:
         if isinstance(item, tuple):
-            prompt += f"[INST] {item[0]} [/INST]"
-            prompt += f" {item[1]}</s>"
         else:
             prompt += f" [Image] "
     prompt += f"[INST] {user_prompt} [/INST]"
     return prompt
-# Model inference function
-def start_inference(prompt, enable_web_search):
-    return next(model_inference(prompt, enable_web_search))
-def model_inference(prompt, enable_web_search):
-    for response in fetch_response(prompt, enable_web_search):
-        yield response
-def fetch_response(prompt, enable_web_search):
-    if enable_web_search:
-        # Perform web search and generate text based on the retrieved results
-        web_results = search(prompt)
-        if not web_results:
-            web2 = "No results found."
-        else:
-            web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results if res['text']])
-        logging.debug(f"Formatted web search results: {web2}")
-        client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
-        generate_kwargs = dict(max_new_tokens=4000, do_sample=True)
-        formatted_prompt = format_prompt(
-            f"""You are OpenGPT 4o... [USER] {prompt} [WEB] {web2} [OpenGPT 4o]""",
-            [(prompt, web2)])
-        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-        output = ""
-        for response in stream:
-            if not response.token.text == "</s>":
-                output += response.token.text
-            yield output
-    else:
-        # Use the microsoft/Phi-3-mini-4k-instruct model for generating text based on user prompts
-        client = InferenceClient("microsoft/Phi-3-mini-4k-instruct")
-        generate_kwargs = dict(max_new_tokens=5000, do_sample=True)
-        formatted_prompt = format_prompt(f"""You are OpenGPT 4o... [USER] {prompt} [OpenGPT 4o]""", [(prompt, )])
-        logging.debug(f"Formatted prompt without web search: {formatted_prompt}")
-        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-        output = ""
-        for response in stream:
-            if not response.token.text == "</s>":
-                output += response.token.text
-            yield output
-# Create a chatbot interface with a Fetch button
-chatbot = gr.Interface(
-    fn=start_inference,
     inputs=[
-        gr.Textbox(label="User Prompt", placeholder="Enter your prompt here..."),
-        gr.Checkbox(label="Enable Web Search", value=False)
     ],
-    outputs=gr.Textbox(label="Response", placeholder="Responses will appear here..."),
-    live=True
 )
-# Launch the Gradio interface
-chatbot.launch()

+# Import necessary libraries
 import os
+import time
+import copy
 import urllib
 import requests
+import random
+from threading import Thread
 from typing import List, Dict, Union
+from functools import lru_cache
+from bs4 import BeautifulSoup
 import torch
 import gradio as gr
+from transformers import TextIteratorStreamer, AutoModelForSeq2SeqLM, AutoTokenizer
 from huggingface_hub import InferenceClient
+# Define device and load model and tokenizer
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# Set system prompt
+SYSTEM_PROMPT = [
+    {
+        "role": "system",
+        "content": [
+            {
+                "type": "text",
+                "text": """You are OpenGPT 4o, an exceptionally capable and versatile AI assistant. Designed to assist human users through insightful conversations, your key attributes include intelligence and knowledge, image generation and perception, and providing reliable information. Always ensure a seamless and enjoyable experience for the user.""",
+            },
+        ],
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "type": "text",
+                "text": "Hello, I'm OpenGPT 4o. How can I help you today?",
+            },
+        ],
+    }
+]
+# Function to check if a turn in the chat history only contains media
+def turn_is_pure_media(turn):
+    return turn[1] is None
+# Function to extract visible text from HTML content
 @lru_cache(maxsize=128)
 def extract_text_from_webpage(html_content):
     soup = BeautifulSoup(html_content, "html.parser")
     visible_text = soup.get_text(strip=True)
     return visible_text
+# Function to perform a Google search and return the results
 def search(term, num_results=2, lang="en", timeout=5, safe="active", ssl_verify=None):
     escaped_term = urllib.parse.quote_plus(term)
     start = 0
     all_results = []
+    max_chars_per_page = 8000
     with requests.Session() as session:
         while start < num_results:
+            resp = session.get(
+                url="https://www.google.com/search",
+                headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
+                params={
+                    "q": term,
+                    "num": num_results - start,
+                    "hl": lang,
+                    "start": start,
+                    "safe": safe,
+                },
+                timeout=timeout,
+                verify=ssl_verify,
+            )
+            resp.raise_for_status()
+            soup = BeautifulSoup(resp.text, "html.parser")
+            result_block = soup.find_all("div", attrs={"class": "g"})
+            if not result_block:
+                start += 1
+                continue
+            for result in result_block:
+                link = result.find("a", href=True)
+                if link:
+                    link = link["href"]
+                    try:
+                        webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
+                        webpage.raise_for_status()
+                        visible_text = extract_text_from_webpage(webpage.text)
+                        if len(visible_text) > max_chars_per_page:
+                            visible_text = visible_text[:max_chars_per_page] + "..."
+                        all_results.append({"link": link, "text": visible_text})
+                    except requests.exceptions.RequestException as e:
+                        print(f"Error fetching or processing {link}: {e}")
+                        all_results.append({"link": link, "text": None})
+                else:
+                    all_results.append({"link": None, "text": None})
+            start += len(result_block)
     return all_results
+# Function to format the prompt for the language model
 def format_prompt(user_prompt, chat_history):
     prompt = "<s>"
     for item in chat_history:
         if isinstance(item, tuple):
+            prompt += f"[INST] {item[0]} [/INST] {item[1]}</s>"
         else:
             prompt += f" [Image] "
     prompt += f"[INST] {user_prompt} [/INST]"
     return prompt
+# Function for model inference
+def model_inference(
+        user_prompt,
+        chat_history,
+        web_search,
+        decoding_strategy,
+        temperature,
+        max_new_tokens,
+        repetition_penalty,
+        top_p,
+):
+    if not user_prompt["files"]:
+        if web_search:
+            web_results = search(user_prompt["text"])
+            web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
+            formatted_prompt = format_prompt(f"{user_prompt['text']} [WEB] {web2}", chat_history)
+            inputs = tokenizer(formatted_prompt, return_tensors="pt").to(DEVICE)
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                repetition_penalty=repetition_penalty,
+                do_sample=True,
+                temperature=temperature,
+                top_p=top_p
+            )
+            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response
+        else:
+            formatted_prompt = format_prompt(user_prompt["text"], chat_history)
+            inputs = tokenizer(formatted_prompt, return_tensors="pt").to(DEVICE)
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                repetition_penalty=repetition_penalty,
+                do_sample=True,
+                temperature=temperature,
+                top_p=top_p
+            )
+            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response
+    else:
+        return "Image input not supported in this implementation."
+# Define Gradio interface components
+max_new_tokens = gr.Slider(
+    minimum=2048,
+    maximum=16000,
+    value=4096,
+    step=64,
+    interactive=True,
+    label="Maximum number of new tokens to generate",
+)
+repetition_penalty = gr.Slider(
+    minimum=0.01,
+    maximum=5.0,
+    value=1,
+    step=0.01,
+    interactive=True,
+    label="Repetition penalty",
+    info="1.0 is equivalent to no penalty",
+)
+decoding_strategy = gr.Radio(
+    [
+        "Greedy",
+        "Top P Sampling",
+    ],
+    value="Top P Sampling",
+    label="Decoding strategy",
+    interactive=True,
+    info="Higher values are equivalent to sampling more low-probability tokens.",
+)
+temperature = gr.Slider(
+    minimum=0.0,
+    maximum=2.0,
+    value=0.5,
+    step=0.05,
+    visible=True,
+    interactive=True,
+    label="Sampling temperature",
+    info="Higher values will produce more diverse outputs.",
+)
+top_p = gr.Slider(
+    minimum=0.01,
+    maximum=0.99,
+    value=0.9,
+    step=0.01,
+    visible=True,
+    interactive=True,
+    label="Top P",
+    info="Higher values are equivalent to sampling more low-probability tokens.",
+)
+# Create a chatbot interface
+chatbot = gr.Chatbot(
+    label="OpenGPT-4o-Chatty",
+    show_copy_button=True,
+    likeable=True,
+    layout="panel"
+)
+# Define Gradio interface
+def chat_interface(user_input, history, web_search, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p):
+    response = model_inference(
+        user_input,
+        history,
+        web_search,
+        decoding_strategy,
+        temperature,
+        max_new_tokens,
+        repetition_penalty,
+        top_p,
+    )
+    history.append((user_input, response))
+    return history, history
+# Create Gradio interface
+interface = gr.Interface(
+    fn=chat_interface,
     inputs=[
+        gr.Textbox(label="User Input"),
+        gr.State([]),
+        gr.Checkbox(label="Web Search", value=True),
+        decoding_strategy,
+        temperature,
+        max_new_tokens,
+        repetition_penalty,
+        top_p
+    ],
+    outputs=[
+        chatbot,
+        gr.State([])
     ],
+    title="OpenGPT-4o-Chatty",
+    description="An AI assistant capable of insightful conversations and web search."
 )
+if __name__ == "__main__":
+    interface.launch()