alexkueck commited on
Commit
1590faf
1 Parent(s): 7a812ee

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +4 -6
utils.py CHANGED
@@ -29,10 +29,9 @@ from pygments.lexers import guess_lexer,get_lexer_by_name
29
  from pygments.formatters import HtmlFormatter
30
 
31
  from langchain.chains import LLMChain, RetrievalQA
32
- from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader, UnstructuredWordDocumentLoader, DirectoryLoader
33
  from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
34
- from langchain.document_loaders.generic import GenericLoader
35
- from langchain.document_loaders.parsers import OpenAIWhisperParser
36
  from langchain.schema import AIMessage, HumanMessage
37
  from langchain_community.llms import HuggingFaceHub
38
  from langchain_community.llms import HuggingFaceTextGenInference
@@ -47,7 +46,6 @@ from langchain import hub
47
  from langchain.output_parsers.openai_tools import PydanticToolsParser
48
  from langchain.prompts import PromptTemplate
49
  from langchain.schema import Document
50
- from langchain_community.tools.tavily_search import TavilySearchResults
51
  from langchain_community.vectorstores import Chroma
52
  from langchain_core.messages import BaseMessage, FunctionMessage
53
  from langchain_core.output_parsers import StrOutputParser
@@ -189,10 +187,10 @@ def document_loading_splitting():
189
  #Chroma DB die splits ablegen - vektorisiert...
190
  def document_storage_chroma(splits):
191
  #OpenAi embeddings----------------------------------
192
- Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
193
 
194
  #HF embeddings--------------------------------------
195
- #Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
196
 
197
 
198
  ############################################
 
29
  from pygments.formatters import HtmlFormatter
30
 
31
  from langchain.chains import LLMChain, RetrievalQA
32
+ from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, DirectoryLoader
33
  from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
34
+ from langchain.document_loaders import GenericLoader
 
35
  from langchain.schema import AIMessage, HumanMessage
36
  from langchain_community.llms import HuggingFaceHub
37
  from langchain_community.llms import HuggingFaceTextGenInference
 
46
  from langchain.output_parsers.openai_tools import PydanticToolsParser
47
  from langchain.prompts import PromptTemplate
48
  from langchain.schema import Document
 
49
  from langchain_community.vectorstores import Chroma
50
  from langchain_core.messages import BaseMessage, FunctionMessage
51
  from langchain_core.output_parsers import StrOutputParser
 
187
  #Chroma DB die splits ablegen - vektorisiert...
188
  def document_storage_chroma(splits):
189
  #OpenAi embeddings----------------------------------
190
+ #Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
191
 
192
  #HF embeddings--------------------------------------
193
+ Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
194
 
195
 
196
  ############################################