kanishka089 commited on
Commit
388b261
·
verified ·
1 Parent(s): 079820c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -7,19 +7,20 @@ from langchain_community.document_loaders import pdf
7
  from langchain_community.embeddings import OllamaEmbeddings
8
  from langchain_community.vectorstores import Chroma
9
  from langchain_core.vectorstores import VectorStoreRetriever
10
-
11
  # instructions to start
12
  # https://www.linkedin.com/pulse/enhance-document-management-ai-extract-insights-from-pdfs-le-sueur-kfd5f/
13
  # https://github.com/RexiaAI/codeExamples/blob/main/localRAG/RAG.py
14
  # ollama pull nomic-embed-text
15
- load_dotenv() #remove string if hosting in huggingface
16
  token = os.getenv('HUGGINGFACE_TOKEN')
17
  client = InferenceClient(
18
  "meta-llama/Meta-Llama-3-8B-Instruct",
19
  token=token,
20
  )
21
- print(token)
22
 
 
 
23
  # Load, split, and retrieve documents from a local PDF file
24
  def loadAndRetrieveDocuments() -> VectorStoreRetriever:
25
  loader = pdf.PyPDFLoader("k.pdf") #constitution
@@ -27,7 +28,7 @@ def loadAndRetrieveDocuments() -> VectorStoreRetriever:
27
  textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
28
  documentSplits = textSplitter.split_documents(documents)
29
  embeddings = OllamaEmbeddings(model="nomic-embed-text")
30
- vectorStore = Chroma.from_documents(documents=documentSplits, embedding=embeddings)
31
  return vectorStore.as_retriever()
32
 
33
 
@@ -85,4 +86,4 @@ interface = gr.Interface(
85
  )
86
 
87
  # Launch the app
88
- interface.launch()
 
7
  from langchain_community.embeddings import OllamaEmbeddings
8
  from langchain_community.vectorstores import Chroma
9
  from langchain_core.vectorstores import VectorStoreRetriever
10
+ from langchain.embeddings import GPT4AllEmbeddings
11
  # instructions to start
12
  # https://www.linkedin.com/pulse/enhance-document-management-ai-extract-insights-from-pdfs-le-sueur-kfd5f/
13
  # https://github.com/RexiaAI/codeExamples/blob/main/localRAG/RAG.py
14
  # ollama pull nomic-embed-text
15
+ load_dotenv('secret.env') #remove string if hosting in huggingface
16
  token = os.getenv('HUGGINGFACE_TOKEN')
17
  client = InferenceClient(
18
  "meta-llama/Meta-Llama-3-8B-Instruct",
19
  token=token,
20
  )
 
21
 
22
+ model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
23
+ gpt4all_kwargs = {'allow_download': 'false'}
24
  # Load, split, and retrieve documents from a local PDF file
25
  def loadAndRetrieveDocuments() -> VectorStoreRetriever:
26
  loader = pdf.PyPDFLoader("k.pdf") #constitution
 
28
  textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
29
  documentSplits = textSplitter.split_documents(documents)
30
  embeddings = OllamaEmbeddings(model="nomic-embed-text")
31
+ vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name, gpt4all_kwargs=gpt4all_kwargs))
32
  return vectorStore.as_retriever()
33
 
34
 
 
86
  )
87
 
88
  # Launch the app
89
+ interface.launch()