|
from functools import cache |
|
import os |
|
import time |
|
import gradio as gr |
|
from langchain.llms import OpenAI |
|
from langchain.chains.summarize import load_summarize_chain |
|
from langchain.document_loaders import TextLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.vectorstores import Chroma |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.chains.question_answering import load_qa_chain |
|
import tiktoken |
|
|
|
|
|
@cache |
|
def tiktoken_len_builder(model_name): |
|
tokenizer = tiktoken.encoding_for_model(model_name) |
|
|
|
def token_len(text): |
|
tokens = tokenizer.encode(text, disallowed_special=()) |
|
return len(tokens) |
|
|
|
return token_len |
|
|
|
|
|
def split_documents(docs, length_function, chunk_size=400): |
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=chunk_size, |
|
chunk_overlap=20, |
|
length_function=length_function, |
|
) |
|
return text_splitter.split_documents(docs) |
|
|
|
|
|
def summarize_docs(llm, docs): |
|
llm = OpenAI(temperature=temperature, openai_api_key=openai_api_key, model_name=model_name) |
|
chain = load_summarize_chain(llm, chain_type="map_reduce") |
|
return chain.run(docs) |
|
|
|
class MdnaQA: |
|
def __init__(self, llm, docs): |
|
self.docs = docs |
|
self.chain = load_qa_chain(llm, chain_type="stuff") |
|
embeddings = OpenAIEmbeddings() |
|
self.docsearch = Chroma.from_documents(docs, embeddings) |
|
|
|
def ask(self, question): |
|
input_documents = self.docsearch.similarity_search(question) |
|
return self.chain.run(input_documents=input_documents, question=question) |
|
|
|
|
|
filename = '2023-05-12_2023_q1_goog_mdna.txt' |
|
loader = TextLoader(filename) |
|
documents = loader.load() |
|
model_name = "text-davinci-003" |
|
tiktoken_len = tiktoken_len_builder(model_name) |
|
docs = split_documents(documents, tiktoken_len) |
|
tokens_sum = sum(tiktoken_len(d.page_content) for d in docs) |
|
|
|
title = "Alphabet's Q1 2023 10-Q MD&A" |
|
|
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f'# {title}') |
|
gr.Markdown('Video tutorial ') |
|
gr.Markdown('Link to a blog post the video tutorial') |
|
gr.Markdown("You can an API key [from OpenAI](https://platform.openai.com/account/api-keys)") |
|
openai_api_key = gr.Text( |
|
value=os.getenv("OPENAI_API_KEY"), |
|
type="password", |
|
label="OpenAI API key", |
|
) |
|
temperature = gr.Slider(0, 2, value=0, step=0.1, label="Temperature", info="adjusts a model's output from predictable to random") |
|
mdna = gr.State(docs) |
|
tokens_total = gr.Textbox(label="Total input tokens", value=tokens_sum, info='how many tokens will be spent on input / embeddings') |
|
with gr.Tabs(visible=True) as tabs: |
|
with gr.TabItem("Summary"): |
|
|
|
summarize = gr.Button("Summarize MD&A", variant='primary', info='On click you spent tokens on input, instructions and output') |
|
summary = gr.TextArea(label='Summary') |
|
|
|
def summarize_mdna(docs, api_key, temp): |
|
llm = OpenAI(temperature=temp, openai_api_key=api_key) |
|
|
|
return 'HaHa' |
|
|
|
summarize.click(summarize_mdna, inputs=[mdna, openai_api_key, temperature], outputs=[summary]) |
|
with gr.TabItem("QA with MD&A"): |
|
start_qa = gr.Button("Start QA with MD&A", variant='primary') |
|
chatbot = gr.Chatbot(label="QA with MD&A", visible=False) |
|
question = gr.Textbox( |
|
label="Your question", interactive=True, visible=False |
|
) |
|
qa_chat = gr.State() |
|
send = gr.Button("Ask question", variant='primary', visible=False) |
|
|
|
def start_chat(docs, openai_api_key): |
|
|
|
qa_chat = MDNAQAMock() |
|
return ( |
|
qa_chat, |
|
gr.Textbox.update(visible=True), |
|
gr.Textbox.update(visible=True), |
|
gr.Button.update(visible=True) |
|
) |
|
|
|
start_qa.click( |
|
start_chat, [openai_api_key], [qa_chat, chatbot, question, send] |
|
) |
|
|
|
def respond(qa_chat, question, chat_history): |
|
answer = qa_chat.ask(question) |
|
chat_history.append((question, answer)) |
|
time.sleep(3) |
|
return "", chat_history |
|
|
|
send.click(respond, [qa_chat, question, chatbot], [question, chatbot]) |
|
question.submit( |
|
respond, [qa_chat, question, chatbot], [question, chatbot] |
|
) |
|
|
|
|
|
demo.launch() |
|
|