ginipick commited on
Commit
46b5d6c
ยท
verified ยท
1 Parent(s): b058138

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -6,6 +6,7 @@ from typing import List, Dict, Tuple
6
  import json
7
  import io
8
  import traceback
 
9
 
10
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
11
  hf_client = InferenceClient(
@@ -120,7 +121,14 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
120
  from io import StringIO
121
  # CSV ๋ฐ์ดํ„ฐ๋ฅผ StringIO๋ฅผ ํ†ตํ•ด ์ฝ๊ธฐ
122
  csv_data = StringIO(text)
123
- df = pd.read_csv(csv_data, sep=',', dtype=str)
 
 
 
 
 
 
 
124
  # ๋ฐ์ดํ„ฐ ์œ ํ˜• ์ตœ์ ํ™”
125
  df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
126
  # Parquet ํŒŒ์ผ๋กœ ๋ณ€ํ™˜
@@ -130,13 +138,16 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
130
  parquet_content = load_parquet(parquet_filename)
131
  return f"{parquet_filename} ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_filename
132
  except Exception as e:
133
- return f"ํ…์ŠคํŠธ ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", ""
 
 
134
 
135
  def preprocess_text_with_llm(input_text: str) -> str:
136
  # LLM์—๊ฒŒ ์ž…๋ ฅ ํ…์ŠคํŠธ๋ฅผ ์ „์ฒ˜๋ฆฌํ•˜๋„๋ก ์š”์ฒญ
137
  system_prompt = """๋‹น์‹ ์€ ์ž…๋ ฅ๋œ ๊ธด ํ…์ŠคํŠธ๋ฅผ ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์— ๋งž๊ฒŒ ์ „์ฒ˜๋ฆฌํ•˜๋Š” ์—ญํ• ์„ ํ•ฉ๋‹ˆ๋‹ค.
138
  - ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์€ id,text,label,metadata์ž…๋‹ˆ๋‹ค.
139
- - ๊ฐ ํ–‰์€ ์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„๋˜๋ฉฐ, **ํ…์ŠคํŠธ๋‚˜ ๋‹ค๋ฅธ ํ•„๋“œ ๋‚ด์— ์‰ผํ‘œ๊ฐ€ ์žˆ์„ ๊ฒฝ์šฐ ํ•ด๋‹น ํ•„๋“œ๋ฅผ ํฐ๋”ฐ์˜ดํ‘œ(")๋กœ ๊ฐ์Œ‰๋‹ˆ๋‹ค.**
 
140
  - ํ…์ŠคํŠธ๋ฅผ ์˜๋ฏธ ๋‹จ์œ„๋กœ ๋ถ„ํ• ํ•˜๊ณ , ์ ์ ˆํžˆ ๋ฌธ์žฅ์„ ์žฌ๊ตฌ์„ฑํ•˜๊ณ  ํŽธ์ง‘ํ•˜์—ฌ ์ตœ์ ํ™”๋œ ๋ฌธ์žฅ์œผ๋กœ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
141
  - ๊ฐ ๋ฌธ์žฅ์— ๋Œ€ํ•ด id๋ฅผ ๋ถ€์—ฌํ•˜๊ณ , ์ ์ ˆํ•œ label(์นดํ…Œ๊ณ ๋ฆฌ)์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
142
  - metadata์—๋Š” ์ถœ์ฒ˜๋‚˜ ๋‚ ์งœ ๋“ฑ์˜ ์ถ”๊ฐ€ ์ •๋ณด๋ฅผ ํฌํ•จํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
@@ -334,7 +345,7 @@ with gr.Blocks(css=css) as demo:
334
  text_input = gr.Textbox(
335
  label="ํ…์ŠคํŠธ ์ž…๋ ฅ (๊ฐ ํ–‰์€ `id,text,label,metadata` ํ˜•์‹์œผ๋กœ ์ž…๋ ฅ)",
336
  lines=10,
337
- placeholder="์˜ˆ: 1,์ด์ˆœ์‹ ,์žฅ๊ตฐ,๊ฑฐ๋ถ์„ \n2,์›๊ท ,์žฅ๊ตฐ,๋ชจํ•จ\n3,์„ ์กฐ,์™•,์‹œ๊ธฐ\n4,๋„์š”ํ† ๋ฏธ ํžˆ๋ฐ์š”์‹œ,์™•,์นจ๋žต"
338
  )
339
  convert_button = gr.Button("๋ณ€ํ™˜ ๋ฐ ๋‹ค์šด๋กœ๋“œ")
340
  convert_status = gr.Textbox(label="๋ณ€ํ™˜ ์ƒํƒœ", interactive=False)
@@ -390,3 +401,4 @@ if __name__ == "__main__":
390
  demo.launch()
391
 
392
 
 
 
6
  import json
7
  import io
8
  import traceback
9
+ import csv
10
 
11
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
12
  hf_client = InferenceClient(
 
121
  from io import StringIO
122
  # CSV ๋ฐ์ดํ„ฐ๋ฅผ StringIO๋ฅผ ํ†ตํ•ด ์ฝ๊ธฐ
123
  csv_data = StringIO(text)
124
+ df = pd.read_csv(
125
+ csv_data,
126
+ sep=',',
127
+ dtype=str,
128
+ quoting=csv.QUOTE_ALL, # ๋ชจ๋“  ํ•„๋“œ๋ฅผ ํฐ๋”ฐ์˜ดํ‘œ๋กœ ๊ฐ์‹ธ๋Š” ๊ฒƒ์œผ๋กœ ์ฒ˜๋ฆฌ
129
+ escapechar='\\', # ์ด์Šค์ผ€์ดํ”„ ๋ฌธ์ž ์„ค์ •
130
+ engine='python' # Python ์—”์ง„ ์‚ฌ์šฉ
131
+ )
132
  # ๋ฐ์ดํ„ฐ ์œ ํ˜• ์ตœ์ ํ™”
133
  df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
134
  # Parquet ํŒŒ์ผ๋กœ ๋ณ€ํ™˜
 
138
  parquet_content = load_parquet(parquet_filename)
139
  return f"{parquet_filename} ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_filename
140
  except Exception as e:
141
+ error_message = f"ํ…์ŠคํŠธ ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}\n{traceback.format_exc()}"
142
+ print(error_message)
143
+ return error_message, "", ""
144
 
145
  def preprocess_text_with_llm(input_text: str) -> str:
146
  # LLM์—๊ฒŒ ์ž…๋ ฅ ํ…์ŠคํŠธ๋ฅผ ์ „์ฒ˜๋ฆฌํ•˜๋„๋ก ์š”์ฒญ
147
  system_prompt = """๋‹น์‹ ์€ ์ž…๋ ฅ๋œ ๊ธด ํ…์ŠคํŠธ๋ฅผ ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์— ๋งž๊ฒŒ ์ „์ฒ˜๋ฆฌํ•˜๋Š” ์—ญํ• ์„ ํ•ฉ๋‹ˆ๋‹ค.
148
  - ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์€ id,text,label,metadata์ž…๋‹ˆ๋‹ค.
149
+ - ๊ฐ ํ–‰์€ ์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„๋˜๋ฉฐ, **ํ…์ŠคํŠธ๋‚˜ ๋‹ค๋ฅธ ํ•„๋“œ ๋‚ด์— ์‰ผํ‘œ๊ฐ€ ์žˆ์„ ๊ฒฝ์šฐ ํ•ด๋‹น ํ•„๋“œ๋ฅผ ๋ฐ˜๋“œ์‹œ ํฐ๋”ฐ์˜ดํ‘œ(")๋กœ ๊ฐ์Œ‰๋‹ˆ๋‹ค.**
150
+ - **ํ•„๋“œ ๋‚ด์— ํฐ๋”ฐ์˜ดํ‘œ๊ฐ€ ํฌํ•จ๋œ ๊ฒฝ์šฐ, ํ•ด๋‹น ํฐ๋”ฐ์˜ดํ‘œ ์•ž์— ๋ฐฑ์Šฌ๋ž˜์‹œ(\\)๋ฅผ ์ถ”๊ฐ€ํ•˜์—ฌ ์ด์Šค์ผ€์ดํ”„ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.**
151
  - ํ…์ŠคํŠธ๋ฅผ ์˜๋ฏธ ๋‹จ์œ„๋กœ ๋ถ„ํ• ํ•˜๊ณ , ์ ์ ˆํžˆ ๋ฌธ์žฅ์„ ์žฌ๊ตฌ์„ฑํ•˜๊ณ  ํŽธ์ง‘ํ•˜์—ฌ ์ตœ์ ํ™”๋œ ๋ฌธ์žฅ์œผ๋กœ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
152
  - ๊ฐ ๋ฌธ์žฅ์— ๋Œ€ํ•ด id๋ฅผ ๋ถ€์—ฌํ•˜๊ณ , ์ ์ ˆํ•œ label(์นดํ…Œ๊ณ ๋ฆฌ)์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
153
  - metadata์—๋Š” ์ถœ์ฒ˜๋‚˜ ๋‚ ์งœ ๋“ฑ์˜ ์ถ”๊ฐ€ ์ •๋ณด๋ฅผ ํฌํ•จํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
 
345
  text_input = gr.Textbox(
346
  label="ํ…์ŠคํŠธ ์ž…๋ ฅ (๊ฐ ํ–‰์€ `id,text,label,metadata` ํ˜•์‹์œผ๋กœ ์ž…๋ ฅ)",
347
  lines=10,
348
+ placeholder='์˜ˆ: 1,"์ด์ˆœ์‹ ","์žฅ๊ตฐ","๊ฑฐ๋ถ์„ "\n2,"์›๊ท ","์žฅ๊ตฐ","๋ชจํ•จ"\n3,"์„ ์กฐ","์™•","์‹œ๊ธฐ"\n4,"๋„์š”ํ† ๋ฏธ ํžˆ๋ฐ์š”์‹œ","์™•","์นจ๋žต"'
349
  )
350
  convert_button = gr.Button("๋ณ€ํ™˜ ๋ฐ ๋‹ค์šด๋กœ๋“œ")
351
  convert_status = gr.Textbox(label="๋ณ€ํ™˜ ์ƒํƒœ", interactive=False)
 
401
  demo.launch()
402
 
403
 
404
+