Files changed (1) hide show
  1. app.py +22 -4
app.py CHANGED
@@ -25,13 +25,31 @@ def get_pdf_text(pdf_docs):
25
  # 과제
26
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
27
  def get_text_file(docs):
28
- pass
 
29
 
30
  def get_csv_file(docs):
31
- pass
32
-
 
 
 
 
 
 
 
 
33
  def get_json_file(docs):
34
- pass
 
 
 
 
 
 
 
 
 
35
 
36
 
37
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
25
  # 과제
26
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
27
  def get_text_file(docs):
28
+ text = file.getvalue().decode("utf-8") # ν…μŠ€νŠΈ νŒŒμΌμ„ λ¬Έμžμ—΄λ‘œ λ””μ½”λ”©ν•©λ‹ˆλ‹€.
29
+ return [text]
30
 
31
  def get_csv_file(docs):
32
+ import pandas as pd
33
+
34
+ # CSV νŒŒμΌμ„ Pandas DataFrame으둜 μ½μŠ΅λ‹ˆλ‹€.
35
+ df = pd.read_csv(file)
36
+
37
+ # μ—¬κΈ°μ—μ„œ μ›ν•˜λŠ” 열을 μ„ νƒν•˜κ±°λ‚˜ 전체 νŒŒμΌμ„ μ½μ–΄μ˜¬ 수 μžˆμŠ΅λ‹ˆλ‹€.
38
+ # 예λ₯Ό λ“€μ–΄, 'text_column' μ—΄μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” 방법은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€.
39
+ texts = df['text_column'].tolist()
40
+
41
+ return texts
42
  def get_json_file(docs):
43
+ import json
44
+
45
+ # JSON νŒŒμΌμ„ λ””μ½”λ”©ν•˜μ—¬ 데이터λ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
46
+ data = json.load(file)
47
+
48
+ # μ—¬κΈ°μ—μ„œ μ μ ˆν•œ λ°©λ²•μœΌλ‘œ JSON λ°μ΄ν„°μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
49
+ # 예λ₯Ό λ“€μ–΄, 'text' 킀에 ν•΄λ‹Ήν•˜λŠ” 값을 μΆ”μΆœν•˜λŠ” 방법은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€.
50
+ texts = [item['text'] for item in data]
51
+
52
+ return texts
53
 
54
 
55
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.