Spaces:
Runtime error
Runtime error
File size: 4,585 Bytes
3be625e d194aec 3be625e 5448bd5 8d7baef 5448bd5 8d7baef 5448bd5 8d7baef 5448bd5 140e718 5448bd5 3be625e 5448bd5 140e718 5448bd5 140e718 3be625e 5448bd5 3be625e 5448bd5 3be625e 5448bd5 3be625e 5448bd5 c1c30ae 5448bd5 3be625e 5448bd5 288bd7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# for setting/extracting environment variables such as API keys
import os
### 1. For Web Scraping
# for querying Financial Modelling Prep API
from urllib.request import urlopen
import json
### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
# for tokenizing texts and splitting them into chunks of documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
# for turning documents into embeddings before putting them in vector store
from langchain_community.embeddings import HuggingFaceEmbeddings
# for vector store for documents
from langchain_community.vectorstores import Chroma
### 3. For Querying LLM
# for loading HuggingFace LLM models from the hub
from langchain_community.llms import HuggingFaceHub
# for querying LLM conveniently using the context
from langchain.chains.question_answering import load_qa_chain
### 4. For Gradio App UI
import gradio as gr
from huggingface_hub import InferenceClient
fmp_api_key = os.environ['FMP_API_KEY']
# initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
hf_embeddings = HuggingFaceEmbeddings()
if os.path.exists("chromadb_earnings_transcripts_extracted"):
os.system("rm -r chromadb_earnings_transcripts_extracted")
if os.path.exists("earnings_transcripts_chromadb.zip"):
os.system("rm earnings_transcripts_chromadb.zip")
os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcripts_extracted")
chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
# Load the huggingface inference endpoint of an LLM model
# Name of the LLM model we are using, feel free to try others!
model = "mistralai/Mistral-7B-Instruct-v0.1"
# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":200})
print("### Chroma DB and LLM model loaded successfully...")
def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
"""
Return answer to the query
"""
input_docs = vectorstore.similarity_search(query,k=4)
qa_chain = load_qa_chain(llm, chain_type="stuff")
query = f"[INST]According to the earnings calls transcripts earlier, {query}[INST]"
response = qa_chain.run(input_documents=input_docs, question=query)
source_docs_1 = input_docs[0].page_content
source_docs_2 = input_docs[1].page_content
source_docs_3 = input_docs[2].page_content
source_docs_4 = input_docs[3].page_content
source_title_1 = input_docs[0].metadata['title']
source_title_2 = input_docs[1].metadata['title']
source_title_3 = input_docs[2].metadata['title']
source_title_4 = input_docs[3].metadata['title']
return response,source_docs_1 ,source_docs_2,source_docs_3,source_docs_4, source_title_1, source_title_2, source_title_3, source_title_4
with gr.Blocks() as app:
with gr.Row():
gr.HTML("<h1>Chat with Tesla 2023 Earnings Calls Transcripts</h1>")
with gr.Row():
query = gr.Textbox("How is Tesla planning to expand?", placeholder="Enter question here...", label="Enter question")
btn = gr.Button("Ask Question")
with gr.Row():
gr.HTML("<h3>Answer</h3>")
with gr.Row():
answer = gr.Textbox(label="Answer")
with gr.Row():
gr.HTML("<h3>Sources Referenced from Tesla 2023 Earnings Calls Transcripts</h3>")
with gr.Row():
with gr.Column():
source_title_1 = gr.Markdown()
source1 = gr.Textbox(label="Source Text 1")
with gr.Column():
source_title_2 = gr.Markdown()
source2 = gr.Textbox(label="Source Text 2")
with gr.Row():
with gr.Column():
source_title_3 = gr.Markdown()
source3 = gr.Textbox(label="Source Text 3")
with gr.Column():
source_title_4 = gr.Markdown()
source4 = gr.Textbox(label="Source Text 4")
query.submit(fn=source_question_answer, inputs=[query],
outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
btn.click(fn=source_question_answer, inputs=[query],
outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
app.launch() |