lds204 commited on
Commit
1d332b7
β€’
1 Parent(s): c834d78
Files changed (1) hide show
  1. app.py +31 -8
app.py CHANGED
@@ -8,6 +8,9 @@ from langchain.chains import ConversationalRetrievalChain
8
  from htmlTemplates import css, bot_template, user_template
9
  from langchain.llms import LlamaCpp # For loading transformer models.
10
  from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
 
 
 
11
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
12
  import os
13
  from huggingface_hub import hf_hub_download # Hugging Face Hubμ—μ„œ λͺ¨λΈμ„ λ‹€μš΄λ‘œλ“œν•˜κΈ° μœ„ν•œ ν•¨μˆ˜μž…λ‹ˆλ‹€.
@@ -24,15 +27,35 @@ def get_pdf_text(pdf_docs):
24
 
25
  # 과제
26
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
27
- def get_text_file(docs):
28
- pass
 
 
 
 
 
 
29
 
30
- def get_csv_file(docs):
31
- pass
32
-
33
- def get_json_file(docs):
34
- pass
35
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
38
  def get_text_chunks(documents):
 
8
  from htmlTemplates import css, bot_template, user_template
9
  from langchain.llms import LlamaCpp # For loading transformer models.
10
  from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
11
+ import json
12
+ from pathlib import Path
13
+ from pprint import pprint
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
  from huggingface_hub import hf_hub_download # Hugging Face Hubμ—μ„œ λͺ¨λΈμ„ λ‹€μš΄λ‘œλ“œν•˜κΈ° μœ„ν•œ ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
27
 
28
  # 과제
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
+ def get_text_file(txt_docs):
31
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
32
+ temp_filepath = os.path.join(temp_dir.name, txt_docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
33
+ with open(temp_filepath, "wb") as f:
34
+ f.write(txt_docs.getvalue())
35
+ txt_loader = TextLoader(temp_filepath)
36
+ txt_doc = txt_loader.load()
37
+ return txt_doc
38
 
39
+ def get_csv_file(csv_docs):
40
+ temp_dir = tempfile.TemporaryDirectory()
41
+ temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
42
+ with open(temp_filepath, "wb") as f:
43
+ f.write(csv_docs.getvalue())
44
+ csv_loader = CSVLoader(temp_filepath)
45
+ csv_doc = pdf_loader.load()
46
+ return csv_doc
47
+
48
+ def get_json_file(json_docs):
49
+ temp_dir = tempfile.TemporaryDirectory()
50
+ temp_filepath = os.path.join(temp_dir.name, json_docs.name)
51
+ with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
52
+ f.write(json_docs.getvalue())
53
+ json_loader = JSONLoader(
54
+ file_path='./example_data/facebook_chat.json',
55
+ jq_schema='.messages[].content',
56
+ text_content=False)
57
+ json_doc = json_loader.load()
58
+ return json_doc
59
 
60
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
61
  def get_text_chunks(documents):