File size: 5,081 Bytes
e436366 b1191e1 e436366 b1191e1 e436366 e92c8bf e436366 1cefb16 e436366 d28a1cf e436366 f27ab79 e436366 d28a1cf f27ab79 96a6b1e d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 a98467a e436366 d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 d28a1cf e436366 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
from functools import cache
import os
import gradio as gr
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
import tiktoken
@cache
def tiktoken_len_builder(model_name):
tokenizer = tiktoken.encoding_for_model(model_name)
def token_len(text):
tokens = tokenizer.encode(text, disallowed_special=())
return len(tokens)
return token_len
def split_documents(docs, length_function, chunk_size=400):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=20,
length_function=length_function,
)
return text_splitter.split_documents(docs)
def summarize_docs(llm, docs):
chain = load_summarize_chain(llm, chain_type="map_reduce")
return chain.run(docs)
class MdnaQA:
def __init__(self, llm, docs):
self.docs = docs
self.chain = load_qa_chain(llm, chain_type="stuff")
embeddings = OpenAIEmbeddings(openai_api_key=llm.openai_api_key)
self.docsearch = Chroma.from_documents(docs, embeddings)
def ask(self, question):
input_documents = self.docsearch.similarity_search(question)
return self.chain.run(input_documents=input_documents, question=question)
filename = "2023-05-12_2023_q1_goog_mdna.txt"
loader = TextLoader(filename)
documents = loader.load()
model_name = "text-davinci-003"
tiktoken_len = tiktoken_len_builder(model_name)
docs = split_documents(documents, tiktoken_len)
tokens_sum = sum(tiktoken_len(d.page_content) for d in docs)
title = "Alphabet's Q1 2023 10-Q MD&A"
video = '<iframe width="560" height="315" src="https://www.youtube.com/embed/LuXtsWQfmFg" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
with gr.Blocks(title=title) as demo:
gr.Markdown(f"# {title}")
gr.HTML(video)
gr.Markdown("Blog post https://blog.experienced.dev/earnings-report-insights-programmer-decodes-alphabets-q1-2023-10-q-form/")
gr.Markdown(
"You can get an API key [from OpenAI](https://platform.openai.com/account/api-keys)"
)
openai_api_key = gr.Text(
value=os.getenv("OPENAI_API_KEY"),
type="password",
label="OpenAI API key",
)
temperature = gr.Slider(
0,
2,
value=0,
step=0.1,
label="Temperature",
info="adjusts a model's output from predictable to random",
)
mdna = gr.State(docs)
tokens_total = gr.Textbox(
label="Total input tokens",
value=tokens_sum,
info="how many tokens will be spent on input / embeddings",
)
with gr.Tabs(visible=True) as tabs:
with gr.TabItem("Summary"):
summarize = gr.Button(
"Summarize MD&A",
variant="primary",
info="On click you spent tokens on input, instructions and output",
)
summary = gr.TextArea(label="Summary")
def summarize_mdna(docs, api_key, temp):
llm = OpenAI(temperature=temp, openai_api_key=api_key)
mdna_summary = summarize_docs(llm, docs)
return mdna_summary
summarize.click(
summarize_mdna,
inputs=[mdna, openai_api_key, temperature],
outputs=[summary],
)
with gr.TabItem("QA with MD&A"):
start_qa = gr.Button("Start QA with MD&A", variant="primary")
chatbot = gr.Chatbot(label="QA with MD&A", visible=False)
question = gr.Textbox(
label="Your question", interactive=True, visible=False
)
qa_chat = gr.State()
send = gr.Button("Ask question", variant="primary", visible=False)
def start_chat(docs, api_key, temp):
llm = OpenAI(temperature=temp, openai_api_key=api_key)
qa_chat = MdnaQA(llm, docs)
return (
qa_chat,
gr.Textbox.update(visible=True),
gr.Textbox.update(visible=True),
gr.Button.update(visible=True),
)
start_qa.click(
start_chat,
[mdna, openai_api_key, temperature],
[qa_chat, chatbot, question, send],
)
def respond(qa_chat, question, chat_history):
answer = qa_chat.ask(question)
chat_history.append((question, answer))
return "", chat_history
send.click(respond, [qa_chat, question, chatbot], [question, chatbot])
question.submit(respond, [qa_chat, question, chatbot], [question, chatbot])
demo.launch()
|