Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
from dotenv import load_dotenv
|
3 |
from PyPDF2 import PdfReader
|
4 |
-
from langchain.document_loaders import DirectoryLoader
|
5 |
-
from langchain.document_loaders import TextLoader
|
6 |
-
from langchain.document_loaders.csv_loader import CSVLoader
|
7 |
-
from langchain.document_loaders import JSONLoader
|
8 |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
9 |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
|
10 |
from langchain.vectorstores import FAISS, Chroma
|
@@ -18,6 +14,8 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
|
|
18 |
import tempfile # μμ νμΌμ μμ±νκΈ° μν λΌμ΄λΈλ¬λ¦¬μ
λλ€.
|
19 |
import os
|
20 |
|
|
|
|
|
21 |
|
22 |
# PDF λ¬Έμλ‘λΆν° ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μμ
λλ€.
|
23 |
def get_pdf_text(pdf_docs):
|
@@ -36,7 +34,7 @@ def get_text_file(docs):
|
|
36 |
temp_dir = tempfile.TemporaryDirectory() # μμ λλ ν 리λ₯Ό μμ±ν©λλ€.
|
37 |
temp_filepath = os.path.join(temp_dir.name, docs.name) # μμ νμΌ κ²½λ‘λ₯Ό μμ±ν©λλ€.
|
38 |
with open(temp_filepath, "wb") as f: # μμ νμΌμ λ°μ΄λ리 μ°κΈ° λͺ¨λλ‘ μ½λλ€.
|
39 |
-
f.write(docs.getvalue()) #
|
40 |
loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=TextLoader)
|
41 |
data = loader.load()
|
42 |
return data
|
@@ -45,8 +43,9 @@ def get_csv_file(docs):
|
|
45 |
temp_dir = tempfile.TemporaryDirectory() # μμ λλ ν 리λ₯Ό μμ±ν©λλ€.
|
46 |
temp_filepath = os.path.join(temp_dir.name, docs.name) # μμ νμΌ κ²½λ‘λ₯Ό μμ±ν©λλ€.
|
47 |
with open(temp_filepath, "wb") as f: # μμ νμΌμ λ°μ΄λ리 μ°κΈ° λͺ¨λλ‘ μ½λλ€.
|
48 |
-
f.write(docs.getvalue()) #
|
49 |
-
loader = CSVLoader(temp_filepath)
|
|
|
50 |
data = loader.load()
|
51 |
return data
|
52 |
|
@@ -54,7 +53,7 @@ def get_json_file(docs):
|
|
54 |
temp_dir = tempfile.TemporaryDirectory() # μμ λλ ν 리λ₯Ό μμ±ν©λλ€.
|
55 |
temp_filepath = os.path.join(temp_dir.name, docs.name) # μμ νμΌ κ²½λ‘λ₯Ό μμ±ν©λλ€.
|
56 |
with open(temp_filepath, "wb") as f: # μμ νμΌμ λ°μ΄λ리 μ°κΈ° λͺ¨λλ‘ μ½λλ€.
|
57 |
-
f.write(docs.getvalue()) #
|
58 |
loader = JSONLoader(
|
59 |
file_path=temp_filepath,
|
60 |
jq_schema='.messages[].content',
|
|
|
1 |
import streamlit as st
|
2 |
from dotenv import load_dotenv
|
3 |
from PyPDF2 import PdfReader
|
|
|
|
|
|
|
|
|
4 |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
5 |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
|
6 |
from langchain.vectorstores import FAISS, Chroma
|
|
|
14 |
import tempfile # μμ νμΌμ μμ±νκΈ° μν λΌμ΄λΈλ¬λ¦¬μ
λλ€.
|
15 |
import os
|
16 |
|
17 |
+
from langchain.document_loaders import DirectoryLoader
|
18 |
+
|
19 |
|
20 |
# PDF λ¬Έμλ‘λΆν° ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μμ
λλ€.
|
21 |
def get_pdf_text(pdf_docs):
|
|
|
34 |
temp_dir = tempfile.TemporaryDirectory() # μμ λλ ν 리λ₯Ό μμ±ν©λλ€.
|
35 |
temp_filepath = os.path.join(temp_dir.name, docs.name) # μμ νμΌ κ²½λ‘λ₯Ό μμ±ν©λλ€.
|
36 |
with open(temp_filepath, "wb") as f: # μμ νμΌμ λ°μ΄λ리 μ°κΈ° λͺ¨λλ‘ μ½λλ€.
|
37 |
+
f.write(docs.getvalue()) # text νμΌμ λ΄μ©μ μμ νμΌμ μλλ€.
|
38 |
loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=TextLoader)
|
39 |
data = loader.load()
|
40 |
return data
|
|
|
43 |
temp_dir = tempfile.TemporaryDirectory() # μμ λλ ν 리λ₯Ό μμ±ν©λλ€.
|
44 |
temp_filepath = os.path.join(temp_dir.name, docs.name) # μμ νμΌ κ²½λ‘λ₯Ό μμ±ν©λλ€.
|
45 |
with open(temp_filepath, "wb") as f: # μμ νμΌμ λ°μ΄λ리 μ°κΈ° λͺ¨λλ‘ μ½λλ€.
|
46 |
+
f.write(docs.getvalue()) # csv νμΌμ λ΄μ©μ μμ νμΌμ μλλ€.
|
47 |
+
#loader = CSVLoader(file_path=temp_filepath)
|
48 |
+
loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=CSVLoader)
|
49 |
data = loader.load()
|
50 |
return data
|
51 |
|
|
|
53 |
temp_dir = tempfile.TemporaryDirectory() # μμ λλ ν 리λ₯Ό μμ±ν©λλ€.
|
54 |
temp_filepath = os.path.join(temp_dir.name, docs.name) # μμ νμΌ κ²½λ‘λ₯Ό μμ±ν©λλ€.
|
55 |
with open(temp_filepath, "wb") as f: # μμ νμΌμ λ°μ΄λ리 μ°κΈ° λͺ¨λλ‘ μ½λλ€.
|
56 |
+
f.write(docs.getvalue()) # json λ΄μ©μ μμ νμΌμ μλλ€.
|
57 |
loader = JSONLoader(
|
58 |
file_path=temp_filepath,
|
59 |
jq_schema='.messages[].content',
|