TypeError: _load_sbert_model() got an unexpected keyword argument 'token'
Attempts:
import streamlit as st # type: ignore
from PyPDF2 import PdfReader # type: ignore
from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore
from langchain.embeddings import HuggingFaceInstructEmbeddings # type: ignore
from langchain.vectorstores import FAISS # type: ignore
from dotenv import load_dotenv # type: ignore
from InstructorEmbedding import INSTRUCTOR # type: ignore
from sentence_transformers import SentenceTransformer # Use SentenceTransformer module to use Hugging face Model
#import torch
with st.sidebar:
st.title('LLM Chat App')
st.markdown('''
## About
This app is an LLM-powered chatbot built using:
- Streamlit
- Langchain
- HuggingFace
''')
def main():
load_dotenv()
st.header("Chat with PDF ๐๐ฌ")
# upload PDF file
pdf = st.file_uploader("Upload your PDF", type='pdf')
#st.write(pdf) # this code displays file name, indicating if any file is uploaded
#check if file is uploaded before reading (prevent error on first run, when no doc is uploaded)
if pdf is not None:
pdf_reader = PdfReader(pdf)
#st.write(pdf_reader)
text = ""
for page in pdf_reader.pages:
# extract text from every page
text += page.extract_text()
#st.write(text) #check if text are sucessfully read
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text=text)
#st.write(chunks)
# attempt 1: embeddings (failed)
#model = INSTRUCTOR('hkunlp/instructor-xl')
#embeddings = model.encode([[chunk] for chunk in chunks])
# attempt 2: embeddings (failed)
#model = SentenceTransformer('hkunlp/instructor-xl') #loaded for a very long time
#model = "hkunlp/instructor-xl"
#embeddings = HuggingFaceInstructEmbeddings(model_name=model)
# attempt 3: embeddings
#embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
#vectorstore = FAISS.from_text(chunks, embeddings=embeddings)
# attempt 4 (failed)
# Initialize the INSTRUCTOR model
model = INSTRUCTOR('hkunlp/instructor-xl')
# Encode chunks
embeddings = []
for chunk in chunks:
instruction = "Embed PDF text"
embeddings.append(model.encode([instruction, chunk]))
# Initialize vector store
vectorstore = FAISS.from_text(chunks, embeddings=embeddings)
if name == 'main':
main()
requirements.txt :
langchain==0.0.154
pyPDF2==3.0.1
python-dotenv==1.0.0
streamlit==1.18.1
faiss-cpu==1.7.4
streamlit-extras
altair==4.1.0
huggingface-hub==0.14.1
InstructorEmbedding==1.0.1
sentence-transformers==2.2.2
using python 3.9
having the same problem,have u solved it?