File size: 5,081 Bytes
e436366
 
b1191e1
e436366
 
 
 
 
 
 
 
b1191e1
e436366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e92c8bf
e436366
 
 
 
1cefb16
e436366
 
 
 
 
 
 
d28a1cf
e436366
 
 
 
 
 
 
 
f27ab79
e436366
 
d28a1cf
f27ab79
96a6b1e
d28a1cf
 
 
e436366
 
 
 
 
d28a1cf
 
 
 
 
 
 
 
e436366
d28a1cf
 
 
 
 
e436366
 
d28a1cf
 
 
 
 
 
e436366
 
 
a98467a
 
e436366
d28a1cf
 
 
 
 
e436366
d28a1cf
e436366
 
 
 
 
d28a1cf
e436366
d28a1cf
 
 
e436366
 
 
 
d28a1cf
e436366
 
 
d28a1cf
 
 
e436366
 
 
 
 
 
 
 
d28a1cf
e436366
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from functools import cache
import os
import gradio as gr
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
import tiktoken


@cache
def tiktoken_len_builder(model_name):
    tokenizer = tiktoken.encoding_for_model(model_name)

    def token_len(text):
        tokens = tokenizer.encode(text, disallowed_special=())
        return len(tokens)

    return token_len


def split_documents(docs, length_function, chunk_size=400):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=20,
        length_function=length_function,
    )
    return text_splitter.split_documents(docs)


def summarize_docs(llm, docs):
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    return chain.run(docs)


class MdnaQA:
    def __init__(self, llm, docs):
        self.docs = docs
        self.chain = load_qa_chain(llm, chain_type="stuff")
        embeddings = OpenAIEmbeddings(openai_api_key=llm.openai_api_key)
        self.docsearch = Chroma.from_documents(docs, embeddings)

    def ask(self, question):
        input_documents = self.docsearch.similarity_search(question)
        return self.chain.run(input_documents=input_documents, question=question)


filename = "2023-05-12_2023_q1_goog_mdna.txt"
loader = TextLoader(filename)
documents = loader.load()
model_name = "text-davinci-003"
tiktoken_len = tiktoken_len_builder(model_name)
docs = split_documents(documents, tiktoken_len)
tokens_sum = sum(tiktoken_len(d.page_content) for d in docs)

title = "Alphabet's Q1 2023 10-Q MD&A"
video = '<iframe width="560" height="315" src="https://www.youtube.com/embed/LuXtsWQfmFg" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"# {title}")
    gr.HTML(video)
    gr.Markdown("Blog post https://blog.experienced.dev/earnings-report-insights-programmer-decodes-alphabets-q1-2023-10-q-form/")
    gr.Markdown(
        "You can get an API key [from OpenAI](https://platform.openai.com/account/api-keys)"
    )
    openai_api_key = gr.Text(
        value=os.getenv("OPENAI_API_KEY"),
        type="password",
        label="OpenAI API key",
    )
    temperature = gr.Slider(
        0,
        2,
        value=0,
        step=0.1,
        label="Temperature",
        info="adjusts a model's output from predictable to random",
    )
    mdna = gr.State(docs)
    tokens_total = gr.Textbox(
        label="Total input tokens",
        value=tokens_sum,
        info="how many tokens will be spent on input / embeddings",
    )
    with gr.Tabs(visible=True) as tabs:
        with gr.TabItem("Summary"):
            summarize = gr.Button(
                "Summarize MD&A",
                variant="primary",
                info="On click you spent tokens on input, instructions and output",
            )
            summary = gr.TextArea(label="Summary")

            def summarize_mdna(docs, api_key, temp):
                llm = OpenAI(temperature=temp, openai_api_key=api_key)
                mdna_summary = summarize_docs(llm, docs)
                return mdna_summary

            summarize.click(
                summarize_mdna,
                inputs=[mdna, openai_api_key, temperature],
                outputs=[summary],
            )
        with gr.TabItem("QA with MD&A"):
            start_qa = gr.Button("Start QA with MD&A", variant="primary")
            chatbot = gr.Chatbot(label="QA with MD&A", visible=False)
            question = gr.Textbox(
                label="Your question", interactive=True, visible=False
            )
            qa_chat = gr.State()
            send = gr.Button("Ask question", variant="primary", visible=False)

            def start_chat(docs, api_key, temp):
                llm = OpenAI(temperature=temp, openai_api_key=api_key)
                qa_chat = MdnaQA(llm, docs)
                return (
                    qa_chat,
                    gr.Textbox.update(visible=True),
                    gr.Textbox.update(visible=True),
                    gr.Button.update(visible=True),
                )

            start_qa.click(
                start_chat,
                [mdna, openai_api_key, temperature],
                [qa_chat, chatbot, question, send],
            )

            def respond(qa_chat, question, chat_history):
                answer = qa_chat.ask(question)
                chat_history.append((question, answer))
                return "", chat_history

            send.click(respond, [qa_chat, question, chatbot], [question, chatbot])
            question.submit(respond, [qa_chat, question, chatbot], [question, chatbot])


demo.launch()