DDingcheol commited on
Commit
aa0948a
โ€ข
1 Parent(s): 4b11872

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -18
app.py CHANGED
@@ -11,10 +11,6 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
11
  import tempfile # ์ž„์‹œ ํŒŒ์ผ์„ ์ƒ์„ฑํ•˜๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ์ž…๋‹ˆ๋‹ค.
12
  import os
13
  from huggingface_hub import hf_hub_download # Hugging Face Hub์—์„œ ๋ชจ๋ธ์„ ๋‹ค์šด๋กœ๋“œํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
14
- #์ถ”๊ฐ€๋กœ ํ•„์š”ํ•œ ๊ฒƒ๋“ค
15
- from PyPDF2 import PdfReader
16
- import io
17
- import fitz
18
 
19
 
20
  # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
@@ -30,20 +26,7 @@ def get_pdf_text(pdf_docs):
30
  # ๊ณผ์ œ
31
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
32
  def get_text_file(docs):
33
- text_list = []
34
- for doc in docs:
35
- if isinstance(doc, bytes):
36
- # Bytes ๊ฐ์ฒด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ PdfReader๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.
37
- pdf_reader = PdfReader(io.BytesIO(doc))
38
- else:
39
- # ํŒŒ์ผ ๊ฐ์ฒด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ PdfReader๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.
40
- pdf_reader = PdfReader(doc)
41
-
42
- # ๊ฐ ํŽ˜์ด์ง€์˜ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜์—ฌ text_list์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
43
- for page in pdf_reader.pages:
44
- text = page.extract_text()
45
- text_list.append(text)
46
- return text_list
47
 
48
 
49
 
 
11
  import tempfile # ์ž„์‹œ ํŒŒ์ผ์„ ์ƒ์„ฑํ•˜๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ์ž…๋‹ˆ๋‹ค.
12
  import os
13
  from huggingface_hub import hf_hub_download # Hugging Face Hub์—์„œ ๋ชจ๋ธ์„ ๋‹ค์šด๋กœ๋“œํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
 
 
 
14
 
15
 
16
  # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
26
  # ๊ณผ์ œ
27
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
28
  def get_text_file(docs):
29
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32