File size: 2,600 Bytes
7eaa9e0
 
 
 
 
 
 
 
 
2092f50
7eaa9e0
 
 
 
 
 
 
 
b74f07b
7eaa9e0
b74f07b
 
 
 
 
 
 
 
7eaa9e0
 
 
ff6fd42
7eaa9e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2092f50
b74f07b
7eaa9e0
 
 
 
 
 
 
 
 
cc58b68
7eaa9e0
 
 
1c287e5
7eaa9e0
1c287e5
186d68d
7eaa9e0
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import streamlit as st
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
from langchain import HuggingFaceHub, LLMChain
from langchain.embeddings import HuggingFaceHubEmbeddings,HuggingFaceInferenceAPIEmbeddings
token = os.environ['HF_TOKEN']
repo_id = "sentence-transformers/all-mpnet-base-v2"
hf = HuggingFaceHubEmbeddings(
    repo_id=repo_id,
    task="feature-extraction",
    huggingfacehub_api_token= token,
)

# from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings

# embeddings = HuggingFaceInferenceAPIEmbeddings(
#     api_key=token, model_name="sentence-transformers/all-MiniLM-l6-v2"
# )
# hf = HuggingFaceHubEmbeddings(
#     repo_id=repo_id,
#     task="feature-extraction",
#     huggingfacehub_api_token= HUGGINGFACEHUB_API_TOKEN,
# )


def main():
    
    st.set_page_config(page_title="Ask your PDF")
    st.header("Ask your PDF 💬")
    
    # upload file
    pdf = st.file_uploader("Upload your PDF", type="pdf")
    
    # extract the text
    if pdf is not None:
      pdf_reader = PdfReader(pdf)
      text = ""
      for page in pdf_reader.pages:
        text += page.extract_text()
        
      # split into chunks
      text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
      )
      chunks = text_splitter.split_text(text)
      
      # create embeddings
      # embeddings = OpenAIEmbeddings()
      # embeddings = query(chunks)
      # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
      
      knowledge_base = FAISS.from_texts(chunks, hf)
      
      # show user input
      user_question = st.text_input("Ask a question about your PDF:")
      if user_question:
        docs = knowledge_base.similarity_search(user_question)
        
        # llm = OpenAI()

        hub_llm = HuggingFaceHub(
        repo_id='mistralai/Mistral-7B-Instruct-v0.3',
    model_kwargs={'temperature':0.01,"max_length": 2048,},
    huggingfacehub_api_token=token)
        llm = hub_llm
        chain = load_qa_chain(llm, chain_type="map_reduce")
        with get_openai_callback() as cb:
          response = chain.run(input_documents=docs, question=[user_question])
          # print(cb)
           
        st.write(response)
    

if __name__ == '__main__':
    main()