openfree commited on
Commit
2af89cf
β€’
1 Parent(s): 7ddfb78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -15
app.py CHANGED
@@ -15,20 +15,38 @@ LLM_MODELS = {
15
  def get_client(model_name):
16
  return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def read_uploaded_file(file):
19
  if file is None:
20
- return ""
21
  try:
22
  if file.name.endswith('.parquet'):
23
  df = pd.read_parquet(file.name, engine='pyarrow')
24
- return df.head(10).to_markdown(index=False)
 
25
  else:
26
  content = file.read()
27
  if isinstance(content, bytes):
28
- return content.decode('utf-8')
29
- return content
30
  except Exception as e:
31
- return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
32
 
33
  def format_history(history):
34
  formatted_history = []
@@ -39,19 +57,25 @@ def format_history(history):
39
  return formatted_history
40
 
41
  def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
42
- system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ 닡변할것. λ„ˆλŠ” 주어진 μ†ŒμŠ€μ½”λ“œλ‚˜ 데이터λ₯Ό 기반으둜 "μ„œλΉ„μŠ€ μ‚¬μš© μ„€λͺ… 및 μ•ˆλ‚΄, Q&Aλ₯Ό ν•˜λŠ” 역할이닀". μ•„μ£Ό μΉœμ ˆν•˜κ³  μžμ„Έν•˜κ²Œ 4000토큰 이상 Markdown ν˜•μ‹μœΌλ‘œ μž‘μ„±ν•˜λΌ. λ„ˆλŠ” μž…λ ₯된 λ‚΄μš©μ„ 기반으둜 μ‚¬μš© μ„€λͺ… 및 질의 응닡을 μ§„ν–‰ν•˜λ©°, μ΄μš©μžμ—κ²Œ 도움을 μ£Όμ–΄μ•Ό ν•œλ‹€. μ΄μš©μžκ°€ κΆκΈˆν•΄ ν•  λ§Œν•œ λ‚΄μš©μ— μΉœμ ˆν•˜κ²Œ μ•Œλ €μ£Όλ„λ‘ ν•˜λΌ. 전체 λ‚΄μš©μ— λŒ€ν•΄μ„œλŠ” λ³΄μ•ˆμ„ μœ μ§€ν•˜κ³ , ν‚€ κ°’ 및 μ—”λ“œν¬μΈνŠΈμ™€ ꡬ체적인 λͺ¨λΈμ€ κ³΅κ°œν•˜μ§€ 마라."""
43
 
44
  if uploaded_file:
45
- content = read_uploaded_file(uploaded_file)
46
- file_extension = os.path.splitext(uploaded_file.name)[1].lower()
 
 
 
 
47
 
48
- if file_extension == '.parquet':
49
  system_message += f"\n\n파일 λ‚΄μš©:\n```markdown\n{content}\n```"
50
  else:
51
  system_message += f"\n\n파일 λ‚΄μš©:\n```python\n{content}\n```"
52
 
53
  if message == "파일 뢄석을 μ‹œμž‘ν•©λ‹ˆλ‹€.":
54
- message = """μ—…λ‘œλ“œλœ νŒŒμΌμ„ λΆ„μ„ν•˜μ—¬ λ‹€μŒ λ‚΄μš©μ„ ν¬ν•¨ν•˜μ—¬ μƒμ„Ένžˆ μ„€λͺ…ν•˜λΌ:
 
 
55
  1. 파일의 μ£Όμš” λͺ©μ κ³Ό κΈ°λŠ₯
56
  2. μ£Όμš” νŠΉμ§•κ³Ό κ΅¬μ„±μš”μ†Œ
57
  3. ν™œμš© 방법 및 μ‚¬μš© μ‹œλ‚˜λ¦¬μ˜€
@@ -86,7 +110,6 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
86
  css = """
87
  footer {visibility: hidden}
88
  """
89
- # ... (이전 μ½”λ“œ 동일)
90
 
91
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
92
  with gr.Row():
@@ -101,7 +124,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
101
  clear = gr.ClearButton([msg, chatbot])
102
 
103
  with gr.Column(scale=1):
104
- model_name = gr.Dropdown(
105
  choices=list(LLM_MODELS.keys()),
106
  value="Default",
107
  label="LLM λͺ¨λΈ 선택",
@@ -110,7 +133,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
110
 
111
  file_upload = gr.File(
112
  label="파일 μ—…λ‘œλ“œ",
113
- file_types=["text", ".parquet"], # 파일 νƒ€μž… μˆ˜μ •
114
  type="filepath"
115
  )
116
 
@@ -120,8 +143,6 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
120
  temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
121
  top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
122
 
123
-
124
-
125
  # 이벀트 바인딩
126
  msg.submit(
127
  chat,
 
15
  def get_client(model_name):
16
  return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
17
 
18
+ def analyze_file_content(content, file_type):
19
+ """파일 λ‚΄μš©μ„ λΆ„μ„ν•˜μ—¬ 1쀄 μš”μ•½μ„ λ°˜ν™˜"""
20
+ if file_type == 'parquet':
21
+ return f"데이터셋 뢄석: {content.count('|')-1}개 컬럼의 데이터 ν…Œμ΄λΈ”"
22
+
23
+ # ν…μŠ€νŠΈ 파일의 경우
24
+ lines = content.split('\n')
25
+ total_lines = len(lines)
26
+ non_empty_lines = len([line for line in lines if line.strip()])
27
+
28
+ if 'def ' in content or 'class ' in content:
29
+ functions = len([line for line in lines if 'def ' in line])
30
+ classes = len([line for line in lines if 'class ' in line])
31
+ return f"μ½”λ“œ 뢄석: {total_lines}μ€„μ˜ Python μ½”λ“œ ({functions}개 ν•¨μˆ˜, {classes}개 클래슀 포함)"
32
+ else:
33
+ return f"ν…μŠ€νŠΈ 뢄석: {total_lines}μ€„μ˜ ν…μŠ€νŠΈ λ¬Έμ„œ (유효 λ‚΄μš© {non_empty_lines}쀄)"
34
+
35
  def read_uploaded_file(file):
36
  if file is None:
37
+ return "", ""
38
  try:
39
  if file.name.endswith('.parquet'):
40
  df = pd.read_parquet(file.name, engine='pyarrow')
41
+ content = df.head(10).to_markdown(index=False)
42
+ return content, "parquet"
43
  else:
44
  content = file.read()
45
  if isinstance(content, bytes):
46
+ content = content.decode('utf-8')
47
+ return content, "text"
48
  except Exception as e:
49
+ return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "error"
50
 
51
  def format_history(history):
52
  formatted_history = []
 
57
  return formatted_history
58
 
59
  def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
60
+ system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ 닡변할것. λ„ˆλŠ” 주어진 μ†ŒμŠ€μ½”λ“œλ‚˜ 데이터λ₯Ό 기반으둜 "μ„œλΉ„μŠ€ μ‚¬μš© μ„€λͺ… 및 μ•ˆλ‚΄, Q&Aλ₯Ό ν•˜λŠ” 역할이닀". μ•„μ£Ό μΉœμ ˆν•˜κ³  μžμ„Έν•˜κ²Œ 4000토큰 이상 Markdown ν˜•μ‹μœΌλ‘œ μž‘μ„±ν•˜λΌ. λ„ˆλŠ” μž…λ ₯된 λ‚΄μš©μ„ 기반으둜 μ‚¬μš© μ„€λͺ… 및 질의 응닡을 μ§„ν–‰ν•˜λ©°, μ΄μš©μžμ—κ²Œ 도움을 μ£Όμ–΄μ•Ό ν•œλ‹€."""
61
 
62
  if uploaded_file:
63
+ content, file_type = read_uploaded_file(uploaded_file)
64
+ if file_type == "error":
65
+ return "", history + [[message, content]]
66
+
67
+ # 파일 λ‚΄μš© 뢄석 및 μš”μ•½
68
+ file_summary = analyze_file_content(content, file_type)
69
 
70
+ if file_type == 'parquet':
71
  system_message += f"\n\n파일 λ‚΄μš©:\n```markdown\n{content}\n```"
72
  else:
73
  system_message += f"\n\n파일 λ‚΄μš©:\n```python\n{content}\n```"
74
 
75
  if message == "파일 뢄석을 μ‹œμž‘ν•©λ‹ˆλ‹€.":
76
+ message = f"""[파일 μš”μ•½] {file_summary}
77
+
78
+ λ‹€μŒ λ‚΄μš©μ„ ν¬ν•¨ν•˜μ—¬ μƒμ„Ένžˆ μ„€λͺ…ν•˜λΌ:
79
  1. 파일의 μ£Όμš” λͺ©μ κ³Ό κΈ°λŠ₯
80
  2. μ£Όμš” νŠΉμ§•κ³Ό κ΅¬μ„±μš”μ†Œ
81
  3. ν™œμš© 방법 및 μ‚¬μš© μ‹œλ‚˜λ¦¬μ˜€
 
110
  css = """
111
  footer {visibility: hidden}
112
  """
 
113
 
114
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
115
  with gr.Row():
 
124
  clear = gr.ClearButton([msg, chatbot])
125
 
126
  with gr.Column(scale=1):
127
+ model_name = gr.Radio(
128
  choices=list(LLM_MODELS.keys()),
129
  value="Default",
130
  label="LLM λͺ¨λΈ 선택",
 
133
 
134
  file_upload = gr.File(
135
  label="파일 μ—…λ‘œλ“œ",
136
+ file_types=["text", ".parquet"],
137
  type="filepath"
138
  )
139
 
 
143
  temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
144
  top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
145
 
 
 
146
  # 이벀트 바인딩
147
  msg.submit(
148
  chat,