sujeongim0402@gmail.com commited on
Commit
beefdef
โ€ข
1 Parent(s): 8b057ae

edit codes

Browse files
Files changed (1) hide show
  1. app.py +39 -38
app.py CHANGED
@@ -19,59 +19,60 @@ import os
19
 
20
  # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
21
  def get_pdf_text(pdf_docs):
22
- temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
23
- temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
24
- with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
25
- f.write(pdf_docs.getvalue()) # PDF ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
26
- pdf_loader = PyPDFLoader(temp_filepath) # PyPDFLoader๋ฅผ ์‚ฌ์šฉํ•ด PDF๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
27
- pdf_doc = pdf_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
28
- return pdf_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
29
 
30
  # ๊ณผ์ œ
31
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
32
-
33
  def get_text_file(docs):
34
- temp_dir2 = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
35
- temp_filepath2 = os.path.join(temp_dir2.name, docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
36
- with open(temp_filepath2, "wb") as f:
37
- f.write(docs.getvalue())
38
- txt_loader = TextLoader(
39
- file_path=temp_filepath2,
40
  txt_args={
41
- 'delimiter': ' '
 
 
42
  }
43
  )
44
- txt_data = txt_loader.load()
45
- return txt_data
46
 
47
  def get_csv_file(docs):
48
- temp_dir3 = tempfile.TemporaryDirectory()
49
- temp_filepath3 = os.path.join(temp_dir3.name, docs.name)
50
- with open(temp_filepath3, "wb") as f:
51
- f.write(docs.getvalue())
52
- csv_loader = CSVLoader(
53
- file_path=temp_filepath3,
54
  csv_args={
55
- "delimiter": ",",
56
- "quotechar": '"',
57
- "fieldnames": ["name", "school", "address", "phone"],
58
  },
59
  )
60
- csv_data = csv_loader.load()
61
- return csv_data
62
 
63
  def get_json_file(docs):
64
- temp_dir4 = tempfile.TemporaryDirectory()
65
- temp_filepath4 = os.path.join(temp_dir4.name, docs.name)
66
- with open(temp_filepath4, "wb") as f:
67
- f.write(docs.getvalue())
68
- json_loader = JSONLoader(
69
- file_path=temp_filepath4,
70
- jq_schema='.messages[].content',
71
- text_content=False
72
  )
73
- json_data = json_loader.load()
74
- return json_data
75
 
76
 
77
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
19
 
20
  # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
21
  def get_pdf_text(pdf_docs):
22
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
23
+ temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
24
+ with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ด๊ธฐ
25
+ f.write(pdf_docs.getvalue()) # PDF ๋ฌธ์„œ ๋‚ด์šฉ ์ž„์‹œ ํŒŒ์ผ์— ์“ฐ๊ธฐ
26
+ pdf_loader = PyPDFLoader(temp_filepath) # PyPDFLoader๋กœ PDF ๋กœ๋“œ
27
+ pdf_doc = pdf_loader.load() # ํ…์ŠคํŠธ ์ถ”์ถœ
28
+ return pdf_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
29
 
30
  # ๊ณผ์ œ
31
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
 
32
  def get_text_file(docs):
33
+ temp_dir2 = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
34
+ temp_filepath2 = os.path.join(temp_dir2.name, docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
35
+ with open(temp_filepath2, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ด๊ธฐ
36
+ f.write(docs.getvalue()) # text ๋ฌธ์„œ์˜ ๋‚ด์šฉ ์ž„์‹œ ํŒŒ์ผ์— ์“ฐ๊ธฐ
37
+ txt_loader = TextLoader( # TextLoader๋กœ text ํŒŒ์ผ ๋กœ๋“œ
38
+ file_path=temp_filepath2, # text ๋ฌธ์„œ์˜ ๋‚ด์šฉ์ด ์“ฐ์ธ ํŒŒ์ผ ๊ฒฝ๋กœ
39
  txt_args={
40
+ "delimiter": " ", # ๋‚ด์šฉ์€ ๋„์–ด์“ฐ๊ธฐ๋กœ ๊ตฌ๋ถ„
41
+ # ์ž‘๋™์„ ์•ˆ ํ•ด์„œ ์ž„์˜๋กœ ๋‚ด์šฉ ๋„ฃ๊ธฐ
42
+ #"content":'"What is the most important thing in Team project? I think it is communication. No matter how good an individual ability is I think it is difficult to achieve good results without communicating with each other a lot."'
43
  }
44
  )
45
+ txt_data = txt_loader.load() # ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ์ €์žฅ
46
+ return txt_data # ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
47
 
48
  def get_csv_file(docs):
49
+ temp_dir3 = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
50
+ temp_filepath3 = os.path.join(temp_dir3.name, docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
51
+ with open(temp_filepath3, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ด๊ธฐ
52
+ f.write(docs.getvalue()) # csv ๋ฌธ์„œ์˜ ๋‚ด์šฉ ์ž„์‹œ ํŒŒ์ผ์— ์“ฐ๊ธฐ
53
+ csv_loader = CSVLoader( # CSVLoader๋กœ csv ํŒŒ์ผ ๋กœ๋“œ
54
+ file_path=temp_filepath3, # CSV ๋ฌธ์„œ์˜ ๋‚ด์šฉ์ด ์“ฐ์ธ ํŒŒ์ผ ๊ฒฝ๋กœ
55
  csv_args={
56
+ "delimiter": ",", # ๋‚ด์šฉ์€ ์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„
57
+ "quotechar": '"', # ๋ฌธ์ž์—ด์€ "" ์•ˆ์— ์“ฐ์ž„
58
+ "fieldnames": ["name", "school", "address", "phone"], # ํ•„๋“œ ์ด๋ฆ„ ๋‚˜์—ด
59
  },
60
  )
61
+ csv_data = csv_loader.load() # ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ์ €์žฅ
62
+ return csv_data # ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
63
 
64
  def get_json_file(docs):
65
+ temp_dir4 = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
66
+ temp_filepath4 = os.path.join(temp_dir4.name, docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
67
+ with open(temp_filepath4, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ด๊ธฐ
68
+ f.write(docs.getvalue()) # json ๋ฌธ์„œ์˜ ๋‚ด์šฉ ์ž„์‹œ ํŒŒ์ผ์— ์“ฐ๊ธฐ
69
+ json_loader = JSONLoader( # JSONLoader๋กœ json ํŒŒ์ผ ๋กœ๋“œ
70
+ file_path=temp_filepath4, # json ๋ฌธ์„œ์˜ ๋‚ด์šฉ์ด ์“ฐ์ธ ํŒŒ์ผ ๊ฒฝ๋กœ
71
+ jq_schema='.messages[].content', # json ๋ฌธ์„œ์—์„œ ์ถ”์ถœํ•  ๋‚ด์šฉ ์„ค์ •(์ฑ„ํŒ… ๋ฉ”์‹œ์ง€)
72
+ text_content=False # ์ถ”์ถœํ•œ ๋ฐ์ดํ„ฐ๋Š” ํ…์ŠคํŠธ ํ˜•์‹์œผ๋กœ
73
  )
74
+ json_data = json_loader.load() # ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ์ €์žฅ
75
+ return json_data # ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
76
 
77
 
78
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.