Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,6 @@ import json
|
|
8 |
|
9 |
### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
|
10 |
# for tokenizing texts and splitting them into chunks of documents
|
11 |
-
from transformers import GPT2TokenizerFast
|
12 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
13 |
# for turning documents into embeddings before putting them in vector store
|
14 |
from langchain.embeddings import HuggingFaceEmbeddings
|
@@ -26,26 +25,9 @@ import gradio as gr
|
|
26 |
|
27 |
fmp_api_key = os.environ['FMP_API_KEY']
|
28 |
|
29 |
-
|
30 |
-
def get_jsonparsed_data(url):
|
31 |
-
response = urlopen(url)
|
32 |
-
data = response.read().decode("utf-8")
|
33 |
-
return json.loads(data)
|
34 |
-
|
35 |
-
# initialize the following tokenizers and splitters to tokenize and split the texts into chunks later (feel free to try others)
|
36 |
-
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
37 |
-
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=200, chunk_overlap=20)
|
38 |
-
|
39 |
-
# initialize the default model for embedding the tokenized texts, the articles will be stored in this embedded form in the vector database
|
40 |
hf_embeddings = HuggingFaceEmbeddings()
|
41 |
|
42 |
-
# Load the huggingface inference endpoint of an LLM model
|
43 |
-
# Name of the LLM model we are using, feel free to try others!
|
44 |
-
model = "mistralai/Mistral-7B-Instruct-v0.1"
|
45 |
-
|
46 |
-
# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
|
47 |
-
hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
|
48 |
-
|
49 |
os.system("rm -r chromadb_earnings_transcripts_extracted")
|
50 |
os.system("rm earnings_transcripts_chromadb.zip")
|
51 |
os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
|
@@ -53,6 +35,12 @@ os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcri
|
|
53 |
|
54 |
chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
|
58 |
def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
|
@@ -117,4 +105,4 @@ with gr.Blocks() as app:
|
|
117 |
btn.click(fn=source_question_answer, inputs=[query],
|
118 |
outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
|
119 |
|
120 |
-
app.launch(
|
|
|
8 |
|
9 |
### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
|
10 |
# for tokenizing texts and splitting them into chunks of documents
|
|
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
# for turning documents into embeddings before putting them in vector store
|
13 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
25 |
|
26 |
fmp_api_key = os.environ['FMP_API_KEY']
|
27 |
|
28 |
+
# initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
hf_embeddings = HuggingFaceEmbeddings()
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
os.system("rm -r chromadb_earnings_transcripts_extracted")
|
32 |
os.system("rm earnings_transcripts_chromadb.zip")
|
33 |
os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
|
|
|
35 |
|
36 |
chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
|
37 |
|
38 |
+
# Load the huggingface inference endpoint of an LLM model
|
39 |
+
# Name of the LLM model we are using, feel free to try others!
|
40 |
+
model = "mistralai/Mistral-7B-Instruct-v0.1"
|
41 |
+
|
42 |
+
# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
|
43 |
+
hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
|
44 |
|
45 |
|
46 |
def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
|
|
|
105 |
btn.click(fn=source_question_answer, inputs=[query],
|
106 |
outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
|
107 |
|
108 |
+
app.launch()
|