gaf7921 commited on
Commit
d6781b8
β€’
1 Parent(s): 10cd33c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
- from langchain.document_loaders import DirectoryLoader
5
- from langchain.document_loaders import TextLoader
6
- from langchain.document_loaders.csv_loader import CSVLoader
7
- from langchain.document_loaders import JSONLoader
8
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
9
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
10
  from langchain.vectorstores import FAISS, Chroma
@@ -18,6 +14,8 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
18
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
19
  import os
20
 
 
 
21
 
22
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
23
  def get_pdf_text(pdf_docs):
@@ -36,7 +34,7 @@ def get_text_file(docs):
36
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
37
  temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
38
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
39
- f.write(docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
40
  loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=TextLoader)
41
  data = loader.load()
42
  return data
@@ -45,8 +43,9 @@ def get_csv_file(docs):
45
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
46
  temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
47
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
48
- f.write(docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
49
- loader = CSVLoader(temp_filepath)
 
50
  data = loader.load()
51
  return data
52
 
@@ -54,7 +53,7 @@ def get_json_file(docs):
54
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
55
  temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
56
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
57
- f.write(docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
58
  loader = JSONLoader(
59
  file_path=temp_filepath,
60
  jq_schema='.messages[].content',
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
 
 
 
 
4
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS, Chroma
 
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
 
17
+ from langchain.document_loaders import DirectoryLoader
18
+
19
 
20
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
21
  def get_pdf_text(pdf_docs):
 
34
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
35
  temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
36
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
37
+ f.write(docs.getvalue()) # text 파일의 λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
38
  loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=TextLoader)
39
  data = loader.load()
40
  return data
 
43
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
44
  temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
45
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
46
+ f.write(docs.getvalue()) # csv 파일의 λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
47
+ #loader = CSVLoader(file_path=temp_filepath)
48
+ loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=CSVLoader)
49
  data = loader.load()
50
  return data
51
 
 
53
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
54
  temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
55
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
56
+ f.write(docs.getvalue()) # json λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
57
  loader = JSONLoader(
58
  file_path=temp_filepath,
59
  jq_schema='.messages[].content',