Chris4K commited on
Commit
c33d1d0
1 Parent(s): 229718e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -27
app.py CHANGED
@@ -1,21 +1,18 @@
1
- import os
2
- #!pip install -q gradio langchain pypdf chromadb
3
  import gradio as gr
4
  from dotenv import load_dotenv
5
- from PyPDF2 import PdfReader
6
- from langchain.vectorstores import Chroma
7
- from langchain.vectorstores import FAISS
8
  from langchain.document_loaders import PyPDFLoader
9
  from langchain.text_splitter import CharacterTextSplitter
10
  from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
11
  from langchain.embeddings import HuggingFaceBgeEmbeddings
12
- from langchain.memory import ConversationBufferMemory
13
- from langchain.chains import ConversationalRetrievalChain
14
- from langchain.llms import HuggingFaceHub
15
 
 
 
16
 
17
  # Use Hugging Face Inference API embeddings
18
- inference_api_key = os.environ['HF']
19
  api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
20
  api_key=inference_api_key,
21
  model_name="sentence-transformers/all-MiniLM-l6-v2"
@@ -24,17 +21,11 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
24
  # Load and process the PDF files
25
  loader = PyPDFLoader("./new_papers/ALiBi.pdf")
26
  documents = loader.load()
27
- print("-----------")
28
- print(documents[0])
29
- print("-----------")
30
 
31
  # Split the documents into chunks and embed them using the HfApiEmbeddingTool
32
  text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
33
  vdocuments = text_splitter.split_documents(documents)
34
 
35
-
36
-
37
-
38
  model = "BAAI/bge-base-en-v1.5"
39
  encode_kwargs = {
40
  "normalize_embeddings": True
@@ -42,17 +33,9 @@ encode_kwargs = {
42
  embeddings = HuggingFaceBgeEmbeddings(
43
  model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
44
  )
45
- api_db = FAISS.from_texts(texts=vdocuments, embedding=embeddings)
46
- api_db.as_retriever.similarity("What is ICD?")
47
 
48
-
49
- # Extract the embedding arrays from the PDF documents
50
- #embeddings = []
51
- #for doc in vdocuments:
52
- # embeddings.extend(api_hf_embeddings.get_embeddings(doc))
53
-
54
- # Create Chroma vector store for API embeddings
55
- #api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
56
 
57
  # Define the PDF retrieval function
58
  def pdf_retrieval(query):
@@ -60,7 +43,6 @@ def pdf_retrieval(query):
60
  response = api_db.similarity_search(query)
61
  return response
62
 
63
- # Create Gradio interface for the API retriever
64
  # Create Gradio interface for the API retriever
65
  api_tool = gr.Interface(
66
  fn=pdf_retrieval,
@@ -72,4 +54,4 @@ api_tool = gr.Interface(
72
  )
73
 
74
  # Launch the Gradio interface
75
- api_tool.launch()
 
1
+ import os
 
2
  import gradio as gr
3
  from dotenv import load_dotenv
4
+ from langchain.vectorstores.faiss import FAISS # Import FAISS
5
+ from langchain.vectorstores.chroma import Chroma # Import Chroma
 
6
  from langchain.document_loaders import PyPDFLoader
7
  from langchain.text_splitter import CharacterTextSplitter
8
  from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
9
  from langchain.embeddings import HuggingFaceBgeEmbeddings
 
 
 
10
 
11
+ # Load environment variables
12
+ load_dotenv()
13
 
14
  # Use Hugging Face Inference API embeddings
15
+ inference_api_key = os.getenv('HF') # Use getenv to retrieve environment variable
16
  api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
17
  api_key=inference_api_key,
18
  model_name="sentence-transformers/all-MiniLM-l6-v2"
 
21
  # Load and process the PDF files
22
  loader = PyPDFLoader("./new_papers/ALiBi.pdf")
23
  documents = loader.load()
 
 
 
24
 
25
  # Split the documents into chunks and embed them using the HfApiEmbeddingTool
26
  text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
27
  vdocuments = text_splitter.split_documents(documents)
28
 
 
 
 
29
  model = "BAAI/bge-base-en-v1.5"
30
  encode_kwargs = {
31
  "normalize_embeddings": True
 
33
  embeddings = HuggingFaceBgeEmbeddings(
34
  model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
35
  )
 
 
36
 
37
+ # Create FAISS vector store for API embeddings
38
+ api_db = FAISS.from_texts(texts=vdocuments, embedding=embeddings)
 
 
 
 
 
 
39
 
40
  # Define the PDF retrieval function
41
  def pdf_retrieval(query):
 
43
  response = api_db.similarity_search(query)
44
  return response
45
 
 
46
  # Create Gradio interface for the API retriever
47
  api_tool = gr.Interface(
48
  fn=pdf_retrieval,
 
54
  )
55
 
56
  # Launch the Gradio interface
57
+ api_tool.launch()