Spaces:

bohmian
/

chat_with_earnings_call

Runtime error

App Files Files Community

bohmian commited on Oct 15, 2023

Commit

2b8a96e

1 Parent(s): 1de170c

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -20

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import json
 ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
 # for tokenizing texts and splitting them into chunks of documents
-from transformers import GPT2TokenizerFast
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 # for turning documents into embeddings before putting them in vector store
 from langchain.embeddings import HuggingFaceEmbeddings
@@ -26,26 +25,9 @@ import gradio as gr
 fmp_api_key = os.environ['FMP_API_KEY']
-def get_jsonparsed_data(url):
-    response = urlopen(url)
-    data = response.read().decode("utf-8")
-    return json.loads(data)
-# initialize the following tokenizers and splitters to tokenize and split the texts into chunks later (feel free to try others)
-tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
-text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=200, chunk_overlap=20)
-# initialize the default model for embedding the tokenized texts, the articles will be stored in this embedded form in the vector database
 hf_embeddings = HuggingFaceEmbeddings()
-# Load the huggingface inference endpoint of an LLM model
-# Name of the LLM model we are using, feel free to try others!
-model = "mistralai/Mistral-7B-Instruct-v0.1"
-# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
-hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
 os.system("rm -r chromadb_earnings_transcripts_extracted")
 os.system("rm earnings_transcripts_chromadb.zip")
 os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
@@ -53,6 +35,12 @@ os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcri
 chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
 def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
@@ -117,4 +105,4 @@ with gr.Blocks() as app:
     btn.click(fn=source_question_answer, inputs=[query],
               outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
-app.launch(share=True, debug=True)

 ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
 # for tokenizing texts and splitting them into chunks of documents
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 # for turning documents into embeddings before putting them in vector store
 from langchain.embeddings import HuggingFaceEmbeddings
 fmp_api_key = os.environ['FMP_API_KEY']
+# initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
 hf_embeddings = HuggingFaceEmbeddings()
 os.system("rm -r chromadb_earnings_transcripts_extracted")
 os.system("rm earnings_transcripts_chromadb.zip")
 os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
 chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
+# Load the huggingface inference endpoint of an LLM model
+# Name of the LLM model we are using, feel free to try others!
+model = "mistralai/Mistral-7B-Instruct-v0.1"
+# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
+hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
 def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
     btn.click(fn=source_question_answer, inputs=[query],
               outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
+app.launch()