import urllib.request import gradio as gr from huggingface_hub import get_token from chatbot import get_retrieval_qa from flagging import HuggingFaceDatasetSaver # get the html data and save it to a file def download_html(_url: str, _filename: str): html = urllib.request.urlopen(_url).read() with open(_filename, "wb") as f: f.write(html) url = "https://sea.ai/faq" filename = "FAQ_SEA.AI.html" download_html(url, filename) # load the retrieval QA model qa = get_retrieval_qa(filename) # dataset callback dataset_name = "SEA-AI/seadog-chat-history" hf_writer = HuggingFaceDatasetSaver(get_token(), dataset_name) def answer_question(message, history, system): print(f"{message=}, {history=}, {system=}") # concatenate the history, message and system query = " ".join([message, system]) retrieval_qa = qa.invoke(query) result = retrieval_qa["result"] # "query" and "source_documents" are also available result = result.replace('"', "").strip() # clean up the result # save the query and result to the dataset hf_writer.flag(flag_data=[query, [dict(role="assistant", content=result)]]) return result title = "✨ SEA Dog" description = """
DISCLAIMERS
I can't remember conversations yet, be patient with me.
Your queries will be saved to
this dataset
for analytics purposes.