Spaces:
Runtime error
Runtime error
kanishka089
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -7,19 +7,20 @@ from langchain_community.document_loaders import pdf
|
|
7 |
from langchain_community.embeddings import OllamaEmbeddings
|
8 |
from langchain_community.vectorstores import Chroma
|
9 |
from langchain_core.vectorstores import VectorStoreRetriever
|
10 |
-
|
11 |
# instructions to start
|
12 |
# https://www.linkedin.com/pulse/enhance-document-management-ai-extract-insights-from-pdfs-le-sueur-kfd5f/
|
13 |
# https://github.com/RexiaAI/codeExamples/blob/main/localRAG/RAG.py
|
14 |
# ollama pull nomic-embed-text
|
15 |
-
load_dotenv() #remove string if hosting in huggingface
|
16 |
token = os.getenv('HUGGINGFACE_TOKEN')
|
17 |
client = InferenceClient(
|
18 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
19 |
token=token,
|
20 |
)
|
21 |
-
print(token)
|
22 |
|
|
|
|
|
23 |
# Load, split, and retrieve documents from a local PDF file
|
24 |
def loadAndRetrieveDocuments() -> VectorStoreRetriever:
|
25 |
loader = pdf.PyPDFLoader("k.pdf") #constitution
|
@@ -27,7 +28,7 @@ def loadAndRetrieveDocuments() -> VectorStoreRetriever:
|
|
27 |
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
28 |
documentSplits = textSplitter.split_documents(documents)
|
29 |
embeddings = OllamaEmbeddings(model="nomic-embed-text")
|
30 |
-
vectorStore = Chroma.from_documents(documents=documentSplits, embedding=
|
31 |
return vectorStore.as_retriever()
|
32 |
|
33 |
|
@@ -85,4 +86,4 @@ interface = gr.Interface(
|
|
85 |
)
|
86 |
|
87 |
# Launch the app
|
88 |
-
interface.launch()
|
|
|
7 |
from langchain_community.embeddings import OllamaEmbeddings
|
8 |
from langchain_community.vectorstores import Chroma
|
9 |
from langchain_core.vectorstores import VectorStoreRetriever
|
10 |
+
from langchain.embeddings import GPT4AllEmbeddings
|
11 |
# instructions to start
|
12 |
# https://www.linkedin.com/pulse/enhance-document-management-ai-extract-insights-from-pdfs-le-sueur-kfd5f/
|
13 |
# https://github.com/RexiaAI/codeExamples/blob/main/localRAG/RAG.py
|
14 |
# ollama pull nomic-embed-text
|
15 |
+
load_dotenv('secret.env') #remove string if hosting in huggingface
|
16 |
token = os.getenv('HUGGINGFACE_TOKEN')
|
17 |
client = InferenceClient(
|
18 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
19 |
token=token,
|
20 |
)
|
|
|
21 |
|
22 |
+
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
23 |
+
gpt4all_kwargs = {'allow_download': 'false'}
|
24 |
# Load, split, and retrieve documents from a local PDF file
|
25 |
def loadAndRetrieveDocuments() -> VectorStoreRetriever:
|
26 |
loader = pdf.PyPDFLoader("k.pdf") #constitution
|
|
|
28 |
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
29 |
documentSplits = textSplitter.split_documents(documents)
|
30 |
embeddings = OllamaEmbeddings(model="nomic-embed-text")
|
31 |
+
vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name, gpt4all_kwargs=gpt4all_kwargs))
|
32 |
return vectorStore.as_retriever()
|
33 |
|
34 |
|
|
|
86 |
)
|
87 |
|
88 |
# Launch the app
|
89 |
+
interface.launch()
|