Javeralopez commited on
Commit
c43c04e
verified
1 Parent(s): 7fc96e6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+
4
+
5
+
6
+ from PyPDF2 import PdfReader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.chat_models import ChatOpenAI
11
+ from langchain.chains.question_answering import load_qa_chain
12
+
13
+
14
+
15
+ st.set_page_config('preguntaDOC')
16
+ st.header("Pregunta a tu PDF")
17
+ OPENAI_API_KEY = st.text_input('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', type='password')
18
+ pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
19
+
20
+ @st.cache_resource
21
+ def create_embeddings(pdf):
22
+ pdf_reader = PdfReader(pdf)
23
+ text = ""
24
+ for page in pdf_reader.pages:
25
+ text += page.extract_text()
26
+
27
+ text_splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=1800,
29
+ chunk_overlap=1000,
30
+ length_function=len
31
+ )
32
+ chunks = text_splitter.split_text(text)
33
+
34
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
35
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
36
+
37
+ return knowledge_base
38
+
39
+ if pdf_obj:
40
+ knowledge_base = create_embeddings(pdf_obj)
41
+ user_question = st.text_input("Haz una pregunta sobre tu PDF:")
42
+
43
+ if user_question:
44
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
45
+ docs = knowledge_base.similarity_search(user_question, 3)
46
+ llm = ChatOpenAI(model_name='gpt-3.5-turbo')
47
+ chain = load_qa_chain(llm, chain_type="stuff")
48
+ respuesta = chain.run(input_documents=docs, question=user_question)
49
+
50
+ st.write(respuesta)
51
+
52
+
53
+