|
import gradio as gr |
|
import urllib.request |
|
import requests |
|
import bs4 |
|
import lxml |
|
import os |
|
|
|
from huggingface_hub import InferenceClient,HfApi |
|
import random |
|
import json |
|
import datetime |
|
|
|
from prompts import ( |
|
FINDER, |
|
COMPRESS_HISTORY_PROMPT, |
|
COMPRESS_DATA_PROMPT, |
|
COMPRESS_DATA_PROMPT_SMALL, |
|
LOG_PROMPT, |
|
LOG_RESPONSE, |
|
PREFIX, |
|
TASK_PROMPT, |
|
) |
|
api=HfApi() |
|
|
|
|
|
|
|
client = InferenceClient( |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1" |
|
) |
|
|
|
def parse_action(string: str): |
|
print("PARSING:") |
|
print(string) |
|
assert string.startswith("action:") |
|
idx = string.find("action_input=") |
|
print(idx) |
|
if idx == -1: |
|
print ("idx == -1") |
|
print (string[8:]) |
|
return string[8:], None |
|
|
|
print ("last return:") |
|
print (string[8 : idx - 1]) |
|
print (string[idx + 13 :].strip("'").strip('"')) |
|
return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"') |
|
|
|
|
|
|
|
VERBOSE = True |
|
MAX_HISTORY = 100 |
|
MAX_DATA = 20000 |
|
|
|
def format_prompt(message, history): |
|
prompt = "<s>" |
|
for user_prompt, bot_response in history: |
|
prompt += f"[INST] {user_prompt} [/INST]" |
|
prompt += f" {bot_response}</s> " |
|
prompt += f"[INST] {message} [/INST]" |
|
return prompt |
|
|
|
|
|
def run_gpt( |
|
prompt_template, |
|
stop_tokens, |
|
max_tokens, |
|
seed, |
|
purpose, |
|
**prompt_kwargs, |
|
): |
|
timestamp=datetime.datetime.now() |
|
|
|
print(seed) |
|
generate_kwargs = dict( |
|
temperature=0.9, |
|
max_new_tokens=max_tokens, |
|
top_p=0.95, |
|
repetition_penalty=1.0, |
|
do_sample=True, |
|
seed=seed, |
|
) |
|
|
|
content = PREFIX.format( |
|
timestamp=timestamp, |
|
purpose=purpose, |
|
) + prompt_template.format(**prompt_kwargs) |
|
if VERBOSE: |
|
print(LOG_PROMPT.format(content)) |
|
|
|
|
|
|
|
|
|
|
|
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) |
|
resp = "" |
|
for response in stream: |
|
resp += response.token.text |
|
|
|
|
|
if VERBOSE: |
|
print(LOG_RESPONSE.format(resp)) |
|
return resp |
|
|
|
def compress_data(c,purpose, task, history, result): |
|
seed=random.randint(1,1000000000) |
|
|
|
print (c) |
|
|
|
|
|
divr=int(c)/MAX_DATA |
|
divi=int(divr)+1 if divr != int(divr) else int(divr) |
|
chunk = int(int(c)/divr) |
|
print(f'chunk:: {chunk}') |
|
print(f'divr:: {divr}') |
|
print (f'divi:: {divi}') |
|
out = [] |
|
|
|
s=0 |
|
e=chunk |
|
print(f'e:: {e}') |
|
new_history="" |
|
task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' |
|
for z in range(divi): |
|
print(f's:e :: {s}:{e}') |
|
|
|
hist = history[s:e] |
|
|
|
resp = run_gpt( |
|
COMPRESS_DATA_PROMPT, |
|
stop_tokens=["observation:", "task:", "action:", "thought:"], |
|
max_tokens=2048, |
|
seed=seed, |
|
purpose=purpose, |
|
task=task, |
|
knowledge=new_history, |
|
history=hist, |
|
).strip('\n') |
|
new_history = resp |
|
print (resp) |
|
out+=resp |
|
e=e+chunk |
|
s=s+chunk |
|
''' |
|
resp = run_gpt( |
|
COMPRESS_DATA_PROMPT, |
|
stop_tokens=["observation:", "task:", "action:", "thought:"], |
|
max_tokens=2048, |
|
seed=seed, |
|
purpose=purpose, |
|
task=task, |
|
knowledge=new_history, |
|
history=result, |
|
) |
|
''' |
|
print ("final" + resp) |
|
history = "result: {}\n".format(resp) |
|
return history |
|
|
|
|
|
|
|
|
|
def compress_history(purpose, task, history): |
|
resp = run_gpt( |
|
COMPRESS_HISTORY_PROMPT, |
|
stop_tokens=["observation:", "task:", "action:", "thought:"], |
|
max_tokens=1024, |
|
seed=random.randint(1,1000000000), |
|
purpose=purpose, |
|
task=task, |
|
history=history, |
|
) |
|
history = "observation: {}\n".format(resp) |
|
return history |
|
|
|
|
|
def call_main(purpose, task, history, action_input, result): |
|
resp = run_gpt( |
|
FINDER, |
|
stop_tokens=["observation:", "task:"], |
|
max_tokens=2048, |
|
seed=random.randint(1,1000000000), |
|
purpose=purpose, |
|
task=task, |
|
history=history, |
|
) |
|
lines = resp.strip().strip("\n").split("\n") |
|
|
|
for line in lines: |
|
if line == "": |
|
continue |
|
if line.startswith("thought: "): |
|
history += "{}\n".format(line) |
|
|
|
if line.startswith("action: "): |
|
action_name, action_input = parse_action(line) |
|
print(f'ACTION::{action_name} -- INPUT :: {action_input}') |
|
|
|
return action_name, action_input, history, task, result |
|
else: |
|
pass |
|
|
|
|
|
|
|
if "VERBOSE": |
|
print(history) |
|
return "MAIN", None, history, task, result |
|
|
|
|
|
def call_set_task(purpose, task, history, action_input, result): |
|
task = run_gpt( |
|
TASK_PROMPT, |
|
stop_tokens=[], |
|
max_tokens=1024, |
|
seed=random.randint(1,1000000000), |
|
purpose=purpose, |
|
task=task, |
|
history=history, |
|
).strip("\n") |
|
history += "observation: task has been updated to: {}\n".format(task) |
|
return "MAIN", None, history, task, result |
|
|
|
|
|
|
|
|
|
def search_all(url): |
|
source="" |
|
return source |
|
|
|
|
|
|
|
def find_all(purpose,task,history, url, result): |
|
return_list=[] |
|
print (url) |
|
print (f"trying URL:: {url}") |
|
try: |
|
if url != "" and url != None: |
|
out = [] |
|
source = requests.get(url) |
|
if source.status_code ==200: |
|
soup = bs4.BeautifulSoup(source.content,'lxml') |
|
|
|
rawp=(f'RAW TEXT RETURNED: {soup.text}') |
|
cnt=0 |
|
cnt+=len(rawp) |
|
out.append(rawp) |
|
out.append("HTML fragments: ") |
|
q=("a","p","span","content","article") |
|
for p in soup.find_all("a"): |
|
out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}]) |
|
c=0 |
|
out = str(out) |
|
rl = len(out) |
|
print(f'rl:: {rl}') |
|
for i in str(out): |
|
if i == " " or i=="," or i=="\n" or i=="/" or i=="." or i=="<": |
|
c +=1 |
|
print (f'c:: {c}') |
|
if c > MAX_HISTORY: |
|
print("compressing...") |
|
rawp = compress_data(c,purpose,task,out,result) |
|
result += rawp |
|
else: |
|
rawp = out |
|
|
|
|
|
|
|
history += "observation: the search results are:\n {}\n".format(rawp) |
|
task = "compile report or complete?" |
|
return "MAIN", None, history, task, result |
|
else: |
|
history += f"observation: That URL string returned an error: {source.status_code}, I should try a different URL string\n" |
|
|
|
return "MAIN", None, history, task, result |
|
|
|
else: |
|
history += "observation: An Error occured\nI need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" |
|
return "MAIN", None, history, task, result |
|
except Exception as e: |
|
print (e) |
|
history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" |
|
return "MAIN", None, history, task, result |
|
|
|
|
|
|
|
|
|
return "MAIN", None, history, task, result |
|
|
|
|
|
|
|
|
|
|
|
NAME_TO_FUNC = { |
|
"MAIN": call_main, |
|
"UPDATE-TASK": call_set_task, |
|
"SEARCH_ENGINE": find_all, |
|
"SCRAPE_WEBSITE": find_all, |
|
} |
|
|
|
|
|
def run_action(purpose, task, history, action_name, action_input,result): |
|
if "COMPLETE" in action_name: |
|
print("Complete - Exiting") |
|
|
|
return "COMPLETE", None, history, task, result |
|
|
|
|
|
if len(history.split("\n")) > MAX_HISTORY: |
|
if VERBOSE: |
|
print("COMPRESSING HISTORY") |
|
history = compress_history(purpose, task, history) |
|
if action_name in NAME_TO_FUNC: |
|
|
|
assert action_name in NAME_TO_FUNC |
|
|
|
print(f"RUN: {action_name} ACTION_INPUT: {action_input}") |
|
return NAME_TO_FUNC[action_name](purpose, task, history, action_input, result) |
|
else: |
|
history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, SCRAPE_WEBSITE, COMPLETE)\n" |
|
|
|
return "MAIN", None, history, task, result |
|
|
|
def run(purpose,history): |
|
yield "Searching..." |
|
task=None |
|
result="" |
|
|
|
if not history: |
|
history = "" |
|
else: |
|
history=str(history) |
|
action_name = "MAIN" |
|
action_input = None |
|
while True: |
|
print("") |
|
print("") |
|
print("---") |
|
|
|
print("task:", task) |
|
print("---") |
|
|
|
print("---") |
|
|
|
action_name, action_input, history, task, result = run_action( |
|
purpose, |
|
task, |
|
history, |
|
action_name, |
|
action_input, |
|
result |
|
) |
|
|
|
if not result: |
|
yield "More Searching..." |
|
else: |
|
yield result |
|
if action_name == "COMPLETE": |
|
break |
|
return result |
|
|
|
|
|
|
|
|
|
examples =[ |
|
"What is the current weather in Florida?", |
|
"Find breaking news about Texas", |
|
"Find the best deals on flippers for scuba diving", |
|
"Teach me to fly a helicopter" |
|
] |
|
|
|
|
|
gr.ChatInterface( |
|
fn=run, |
|
chatbot=gr.Chatbot(show_label=False, show_share_button=True, show_copy_button=True, likeable=True, layout="panel", height="800px"), |
|
title="Mixtral 46.7B Powered <br> Search", |
|
examples=examples, |
|
concurrency_limit=20, |
|
).launch() |
|
''' |
|
with gr.Blocks() as app: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
inp = gr.Textbox() |
|
with gr.Column(scale=2): |
|
q = gr.Textbox(value="p") |
|
with gr.Column(scale=2): |
|
num = gr.Textbox() |
|
with gr.Row(): |
|
all_btn = gr.Button("Load") |
|
find_btn = gr.Button("Find") |
|
with gr.Row(): |
|
rawp = gr.JSON() |
|
outp = gr.JSON() |
|
outl = gr.Textbox() |
|
|
|
all_btn.click(find_all,[inp,q,num],[rawp]) |
|
find_btn.click(find_it,[inp,q,num],[outp,outl]) |
|
|
|
app.launch() |
|
''' |