kmc0003a commited on
Commit
032e2f7
β€’
1 Parent(s): 7092843

Upload 4 files

Browse files
Files changed (1) hide show
  1. app.py +20 -38
app.py CHANGED
@@ -8,53 +8,36 @@ from langchain.chains import ConversationalRetrievalChain
8
  from htmlTemplates import css, bot_template, user_template
9
  from langchain.llms import LlamaCpp # For loading transformer models.
10
  from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
11
- import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
12
  import os
13
- import csv
14
- import json
15
- from huggingface_hub import hf_hub_download # Hugging Face Hubμ—μ„œ λͺ¨λΈμ„ λ‹€μš΄λ‘œλ“œν•˜κΈ° μœ„ν•œ ν•¨μˆ˜μž…λ‹ˆλ‹€.
16
-
17
 
18
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
19
  def get_pdf_text(pdf_docs):
20
- temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
21
- temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
22
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
23
- f.write(pdf_docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
24
- pdf_loader = PyPDFLoader(temp_filepath) # PyPDFLoaderλ₯Ό μ‚¬μš©ν•΄ PDFλ₯Ό λ‘œλ“œν•©λ‹ˆλ‹€.
25
- pdf_doc = pdf_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
26
- return pdf_doc # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
27
-
28
 
29
  # 과제
30
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
31
  def get_text_file(docs):
32
- with tempfile.NamedTemporaryFile(delete=False) as temp:
33
- temp.write(docs.getvalue())
34
- temp.seek(0)
35
- text_loader = TextLoader(temp.name)
36
- text_doc = text_loader.load()
37
- return [text_doc]
38
-
39
-
40
  def get_csv_file(docs):
41
- with tempfile.NamedTemporaryFile(delete=False) as temp:
42
- temp.write(docs.getvalue())
43
- temp.seek(0)
44
- csv_loader = CSVLoader(temp.name)
45
- csv_doc = csv_loader.load()
46
- return [csv_doc]
47
-
48
 
49
  def get_json_file(docs):
50
- with tempfile.NamedTemporaryFile(delete=False) as temp:
51
- temp.write(docs.getvalue())
52
- temp.seek(0)
53
- json_loader = JSONLoader(temp.name)
54
- json_doc = json_loader.load()
55
- return [json_doc]
56
-
57
-
58
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
59
  def get_text_chunks(documents):
60
  text_splitter = RecursiveCharacterTextSplitter(
@@ -94,8 +77,7 @@ def get_conversation_chain(vectorstore):
94
  retriever=vectorstore.as_retriever(),
95
  memory=memory
96
  )
97
- return conversation_chain # μƒμ„±λœ λŒ€ν™” 체인을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
98
-
99
 
100
  # μ‚¬μš©μž μž…λ ₯을 μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
101
  def handle_userinput(user_question):
@@ -166,4 +148,4 @@ def main():
166
 
167
 
168
  if __name__ == '__main__':
169
- main()
 
8
  from htmlTemplates import css, bot_template, user_template
9
  from langchain.llms import LlamaCpp # For loading transformer models.
10
  from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
11
+ import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
12
  import os
13
+ from huggingface_hub import hf_hub_download # Hugging Face Hubμ—μ„œ λͺ¨λΈμ„ λ‹€μš΄λ‘œλ“œν•˜κΈ° μœ„ν•œ ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
 
 
14
 
15
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
16
  def get_pdf_text(pdf_docs):
17
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
18
+ temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
19
  with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
20
+ f.write(pdf_docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
21
+ pdf_loader = PyPDFLoader(temp_filepath) # PyPDFLoaderλ₯Ό μ‚¬μš©ν•΄ PDFλ₯Ό λ‘œλ“œν•©λ‹ˆλ‹€.
22
+ pdf_doc = pdf_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
23
+ return pdf_doc # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
 
24
 
25
  # 과제
26
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
27
  def get_text_file(docs):
28
+ text = docs.read().decode()
29
+ return [text]
30
+
 
 
 
 
 
31
  def get_csv_file(docs):
32
+ csv_reader = csv.reader(docs)
33
+ text = ' '.join([' '.join(row) for row in csv_reader])
34
+ return [text]
 
 
 
 
35
 
36
  def get_json_file(docs):
37
+ json_data = json.load(docs)
38
+ text = json.dumps(json_data)
39
+ return [text]
40
+
 
 
 
 
41
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
42
  def get_text_chunks(documents):
43
  text_splitter = RecursiveCharacterTextSplitter(
 
77
  retriever=vectorstore.as_retriever(),
78
  memory=memory
79
  )
80
+ return conversation_chain # μƒμ„±λœ λŒ€ν™” 체인을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
 
81
 
82
  # μ‚¬μš©μž μž…λ ₯을 μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
83
  def handle_userinput(user_question):
 
148
 
149
 
150
  if __name__ == '__main__':
151
+ main()