Thback commited on
Commit
2dd3bbc
โ€ข
1 Parent(s): 2759478

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -20
app.py CHANGED
@@ -25,26 +25,14 @@ def get_pdf_text(pdf_docs):
25
  # ๊ณผ์ œ
26
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
27
 
28
- def get_text_file(text_docs):
29
- # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
30
- temp_dir = tempfile.TemporaryDirectory()
31
-
32
- # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
33
- temp_filepath = os.path.join(temp_dir.name, "temp_file.txt")
34
-
35
- # ํ…์ŠคํŠธ ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
36
- with open(temp_filepath, "w", encoding="utf-8") as f:
37
- f.write(text_docs.getvalue())
38
-
39
- # ์ž„์‹œ ํŒŒ์ผ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค.
40
- with open(temp_filepath, "r", encoding="utf-8") as f:
41
- text_content = f.read()
42
-
43
- # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ž๋™์œผ๋กœ ์ •๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
44
- temp_dir.cleanup()
45
-
46
- return [text_content] # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด์•„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
47
-
48
 
49
 
50
 
 
25
  # ๊ณผ์ œ
26
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
27
 
28
+ def get_pdf_text(text_docs):
29
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
30
+ temp_filepath = os.path.join(temp_dir.name, text_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
31
+ with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
32
+ f.write(text_docs.getvalue()) # Text ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
33
+ text_loader = TextLoader(temp_filepath) # TextLoader๋ฅผ ์‚ฌ์šฉํ•ด PDF๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
34
+ text_doc = text_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
35
+ return text_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
 
38