Spaces:

bin20
/

weektest11

Sleeping

App Files Files Community

bin20 commited on Nov 21, 2023

Commit

a907b29

1 Parent(s): 91f2adf

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -18

app.py CHANGED Viewed

@@ -24,32 +24,36 @@ def get_pdf_text(pdf_docs):
 # 과제
 # 아래 텍스트 추출 함수를 작성
-def get_text_file(text_contents):
-    temp_dir = tempfile.TemporaryDirectory()  # 임시 디렉토리를 생성합니다.
-    temp_filepath = os.path.join(temp_dir.name, text_contents.name)  # 임시 텍스트 파일 경로를 생성합니다.
-    with open(temp_filepath, "w", encoding="utf-8") as f:  # 텍스트 파일을 쓰기 모드로 엽니다.
-        f.write(text_contents.getvalue())  # 텍스트 데이터를 파일에 씁니다.
-    text_loader = TextLoader(temp_filepath)  # TextLoader를 사용해 텍스트 파일을 로드합니다.
-    text_content = text_loader.load()  # 텍스트를 추출합니다.
-    return text_content  # 추출한 텍스트를 반환합니다.
-def get_csv_file(csv_datas):
-    temp_dir = tempfile.TemporaryDirectory()  # 임시 디렉토리를 생성합니다.
-    temp_filepath = os.path.join(temp_dir.name, csv_datas.name)  # 임시 CSV 파일 경로를 생성합니다.
-    with open(temp_filepath, "w", newline="", encoding="utf-8") as csvfile:  # CSV 파일을 쓰기 모드로 엽니다.
-        csvfile.write(csv_datas.getvalue())  # CSV 데이터를 파일에 씁니다.
-    csv_loader = CSVLoader(temp_filepath)
     csv_data = csv_loader.load()
-    return csv_data
 def get_json_file(json_datas):
     temp_dir = tempfile.TemporaryDirectory()  # 임시 디렉토리를 생성합니다.
     temp_filepath = os.path.join(temp_dir.name, json_datas.name)  # 임시 JSON 파일 경로를 생성합니다.
     with open(temp_filepath, "w", encoding="utf-8") as jsonfile:  # JSON 파일을 쓰기 모드로 엽니다.
         jsonfile.write(json_datas.getvalue())  # JSON 데이터를 파일에 씁니다.
-    json_loader = JSONLoader(temp_filepath)
     json_data = json_loader.load()
-    return json_data
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):

 # 과제
 # 아래 텍스트 추출 함수를 작성
+def get_text_file(text_data):
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_filepath = os.path.join(temp_dir.name, text_data.name)
+    with open(temp_filepath, "w", encoding="utf-8") as textfile:
+        textfile.write(text_data.getvalue())
+    text_loader = TextLoader(temp_filepath)
+    text = text_loader.load()
+    return text
+def get_csv_text(csv_file):
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_filepath = os.path.join(temp_dir.name, csv_file.name)
+    with open(temp_filepath, "w", encoding="utf-8") as csvfile:
+        csvfile.write(csv_file.getvalue())
+    csv_loader = CSVLoader(temp_filepath, text_column='text_column_name')
     csv_data = csv_loader.load()
+    text_from_csv = '\n'.join(csv_data['text_column_name']) if 'text_column_name' in csv_data else ''
+    return text_from_csv
 def get_json_file(json_datas):
     temp_dir = tempfile.TemporaryDirectory()  # 임시 디렉토리를 생성합니다.
     temp_filepath = os.path.join(temp_dir.name, json_datas.name)  # 임시 JSON 파일 경로를 생성합니다.
     with open(temp_filepath, "w", encoding="utf-8") as jsonfile:  # JSON 파일을 쓰기 모드로 엽니다.
         jsonfile.write(json_datas.getvalue())  # JSON 데이터를 파일에 씁니다.
+    json_loader = JSONLoader(
+        temp_filepath,
+        jq_schema='messages[].content',
+        text_content=False)
     json_data = json_loader.load()
+    return json_data
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):