Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,19 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from dotenv import load_dotenv
|
4 |
-
from langchain.vectorstores.faiss import FAISS
|
5 |
-
from langchain.
|
6 |
from langchain.document_loaders import PyPDFLoader
|
7 |
from langchain.text_splitter import CharacterTextSplitter
|
8 |
-
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
|
9 |
-
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
10 |
|
11 |
# Load environment variables
|
12 |
load_dotenv()
|
13 |
|
14 |
-
# Use Hugging Face Inference API embeddings
|
15 |
-
inference_api_key = os.getenv('HF') # Use getenv to retrieve environment variable
|
16 |
-
api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
|
17 |
-
api_key=inference_api_key,
|
18 |
-
model_name="sentence-transformers/all-MiniLM-l6-v2"
|
19 |
-
)
|
20 |
-
|
21 |
# Load and process the PDF files
|
22 |
loader = PyPDFLoader("./new_papers/ALiBi.pdf")
|
23 |
documents = loader.load()
|
24 |
|
25 |
-
# Split the documents into chunks and embed them using
|
26 |
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
|
27 |
vdocuments = text_splitter.split_documents(documents)
|
28 |
|
@@ -50,7 +41,7 @@ api_tool = gr.Interface(
|
|
50 |
outputs=gr.Textbox(),
|
51 |
live=True,
|
52 |
title="API PDF Retrieval Tool",
|
53 |
-
description="This tool indexes PDF documents and retrieves relevant answers based on a given query (
|
54 |
)
|
55 |
|
56 |
# Launch the Gradio interface
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from dotenv import load_dotenv
|
4 |
+
from langchain.vectorstores.faiss import FAISS
|
5 |
+
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
6 |
from langchain.document_loaders import PyPDFLoader
|
7 |
from langchain.text_splitter import CharacterTextSplitter
|
|
|
|
|
8 |
|
9 |
# Load environment variables
|
10 |
load_dotenv()
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Load and process the PDF files
|
13 |
loader = PyPDFLoader("./new_papers/ALiBi.pdf")
|
14 |
documents = loader.load()
|
15 |
|
16 |
+
# Split the documents into chunks and embed them using HuggingFaceBgeEmbeddings
|
17 |
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
|
18 |
vdocuments = text_splitter.split_documents(documents)
|
19 |
|
|
|
41 |
outputs=gr.Textbox(),
|
42 |
live=True,
|
43 |
title="API PDF Retrieval Tool",
|
44 |
+
description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HuggingFaceBgeEmbeddings).",
|
45 |
)
|
46 |
|
47 |
# Launch the Gradio interface
|