bohmian commited on
Commit
2b8a96e
·
1 Parent(s): 1de170c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -20
app.py CHANGED
@@ -8,7 +8,6 @@ import json
8
 
9
  ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
10
  # for tokenizing texts and splitting them into chunks of documents
11
- from transformers import GPT2TokenizerFast
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  # for turning documents into embeddings before putting them in vector store
14
  from langchain.embeddings import HuggingFaceEmbeddings
@@ -26,26 +25,9 @@ import gradio as gr
26
 
27
  fmp_api_key = os.environ['FMP_API_KEY']
28
 
29
-
30
- def get_jsonparsed_data(url):
31
- response = urlopen(url)
32
- data = response.read().decode("utf-8")
33
- return json.loads(data)
34
-
35
- # initialize the following tokenizers and splitters to tokenize and split the texts into chunks later (feel free to try others)
36
- tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
37
- text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=200, chunk_overlap=20)
38
-
39
- # initialize the default model for embedding the tokenized texts, the articles will be stored in this embedded form in the vector database
40
  hf_embeddings = HuggingFaceEmbeddings()
41
 
42
- # Load the huggingface inference endpoint of an LLM model
43
- # Name of the LLM model we are using, feel free to try others!
44
- model = "mistralai/Mistral-7B-Instruct-v0.1"
45
-
46
- # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
47
- hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
48
-
49
  os.system("rm -r chromadb_earnings_transcripts_extracted")
50
  os.system("rm earnings_transcripts_chromadb.zip")
51
  os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
@@ -53,6 +35,12 @@ os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcri
53
 
54
  chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
55
 
 
 
 
 
 
 
56
 
57
 
58
  def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
@@ -117,4 +105,4 @@ with gr.Blocks() as app:
117
  btn.click(fn=source_question_answer, inputs=[query],
118
  outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
119
 
120
- app.launch(share=True, debug=True)
 
8
 
9
  ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
10
  # for tokenizing texts and splitting them into chunks of documents
 
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  # for turning documents into embeddings before putting them in vector store
13
  from langchain.embeddings import HuggingFaceEmbeddings
 
25
 
26
  fmp_api_key = os.environ['FMP_API_KEY']
27
 
28
+ # initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
 
 
 
 
 
 
 
 
 
 
29
  hf_embeddings = HuggingFaceEmbeddings()
30
 
 
 
 
 
 
 
 
31
  os.system("rm -r chromadb_earnings_transcripts_extracted")
32
  os.system("rm earnings_transcripts_chromadb.zip")
33
  os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
 
35
 
36
  chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
37
 
38
+ # Load the huggingface inference endpoint of an LLM model
39
+ # Name of the LLM model we are using, feel free to try others!
40
+ model = "mistralai/Mistral-7B-Instruct-v0.1"
41
+
42
+ # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
43
+ hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
44
 
45
 
46
  def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
 
105
  btn.click(fn=source_question_answer, inputs=[query],
106
  outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
107
 
108
+ app.launch()