openfree commited on
Commit
4c295f1
Β·
verified Β·
1 Parent(s): 9e0b0c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -274
app.py CHANGED
@@ -1,275 +1,2 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  import os
4
- import pandas as pd
5
- import pdfplumber
6
- from typing import List, Tuple
7
-
8
- # LLM Models Definition
9
- LLM_MODELS = {
10
- "Cohere c4ai-crp-08-2024": "CohereForAI/c4ai-command-r-plus-08-2024", # Default
11
- "Meta Llama3.3-70B": "meta-llama/Llama-3.3-70B-Instruct",
12
- "Mistral Nemo 2407": "mistralai/Mistral-Nemo-Instruct-2407",
13
- "Alibaba Qwen QwQ-32B": "Qwen/QwQ-32B-Preview"
14
- }
15
-
16
- def get_client(model_name):
17
- return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
18
-
19
- def analyze_file_content(content, file_type):
20
- """Analyze file content and return structural summary"""
21
- if file_type in ['parquet', 'csv', 'pdf']:
22
- try:
23
- if file_type == 'pdf':
24
- with pdfplumber.open(content) as pdf:
25
- pages = pdf.pages
26
- lines = []
27
- for page in pages:
28
- lines.extend(page.extract_text().split('\n'))
29
- else:
30
- lines = content.split('\n')
31
- header = lines[0]
32
- columns = len(header.split('|')) - 1
33
- rows = len(lines) - 3
34
- return f"πŸ“Š Dataset Structure: {columns} columns, {rows} data samples"
35
- except:
36
- return "❌ Dataset structure analysis failed"
37
-
38
- lines = content.split('\n')
39
- total_lines = len(lines)
40
- non_empty_lines = len([line for line in lines if line.strip()])
41
-
42
- if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
43
- functions = len([line for line in lines if 'def ' in line])
44
- classes = len([line for line in lines if 'class ' in line])
45
- imports = len([line for line in lines if 'import ' in line or 'from ' in line])
46
- return f"πŸ’» Code Structure: {total_lines} lines (Functions: {functions}, Classes: {classes}, Imports: {imports})"
47
-
48
- paragraphs = content.count('\n\n') + 1
49
- words = len(content.split())
50
- return f"πŸ“ Document Structure: {total_lines} lines, {paragraphs} paragraphs, ~{words} words"
51
-
52
- def read_uploaded_file(file):
53
- if file is None:
54
- return "", ""
55
- try:
56
- file_ext = os.path.splitext(file.name)[1].lower()
57
-
58
- if file_ext in ['.parquet', '.pdf']:
59
- if file_ext == '.parquet':
60
- df = pd.read_parquet(file.name, engine='pyarrow')
61
- else:
62
- df = pd.read_csv(file.name, encoding='utf-8', engine='python') # Use 'python' engine to handle PDF files
63
- content = df.head(10).to_markdown(index=False)
64
- return content, file_ext
65
- elif file_ext == '.csv':
66
- df = pd.read_csv(file.name)
67
- content = f"πŸ“Š Data Preview:\n{df.head(10).to_markdown(index=False)}\n\n"
68
- content += f"\nπŸ“ˆ Data Information:\n"
69
- content += f"- Total Rows: {len(df)}\n"
70
- content += f"- Total Columns: {len(df.columns)}\n"
71
- content += f"- Column List: {', '.join(df.columns)}\n"
72
- content += f"\nπŸ“‹ Column Data Types:\n"
73
- for col, dtype in df.dtypes.items():
74
- content += f"- {col}: {dtype}\n"
75
- null_counts = df.isnull().sum()
76
- if null_counts.any():
77
- content += f"\n⚠️ Missing Values:\n"
78
- for col, null_count in null_counts[null_counts > 0].items():
79
- content += f"- {col}: {null_count} missing\n"
80
- return content, file_ext
81
- else:
82
- encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
83
- for encoding in encodings:
84
- try:
85
- with open(file.name, 'r', encoding=encoding) as f:
86
- content = f.read()
87
- return content, file_ext
88
- except UnicodeDecodeError:
89
- continue
90
- raise UnicodeDecodeError(f"❌ Unable to read file with supported encodings ({', '.join(encodings)})")
91
- except Exception as e:
92
- return f"❌ Error reading file: {str(e)}", "error"
93
-
94
- def format_history(history):
95
- formatted_history = []
96
- for user_msg, assistant_msg in history:
97
- formatted_history.append({"role": "user", "content": user_msg})
98
- if assistant_msg:
99
- formatted_history.append({"role": "assistant", "content": assistant_msg})
100
- return formatted_history
101
-
102
- def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
103
- system_prefix = """You are a file analysis expert. Analyze the uploaded file in depth from the following perspectives:
104
- 1. πŸ“‹ Overall structure and composition
105
- 2. πŸ“Š Key content and pattern analysis
106
- 3. πŸ“ˆ Data characteristics and meaning
107
- - For datasets: Column meanings, data types, value distributions
108
- - For text/code: Structural features, main patterns
109
- 4. πŸ’‘ Potential applications
110
- 5. ✨ Data quality and areas for improvement
111
- Provide detailed and structured analysis from an expert perspective, but explain in an easy-to-understand way. Format the analysis results in Markdown and include specific examples where possible."""
112
-
113
- if uploaded_file:
114
- content, file_type = read_uploaded_file(uploaded_file)
115
- if file_type == "error":
116
- return "", [{"role": "user", "content": message}, {"role": "assistant", "content": content}]
117
-
118
- file_summary = analyze_file_content(content, file_type)
119
-
120
- if file_type in ['parquet', 'csv', 'pdf']:
121
- system_message += f"\n\nFile Content:\n```markdown\n{content}\n```"
122
- else:
123
- system_message += f"\n\nFile Content:\n```\n{content}\n```"
124
-
125
- if message == "Starting file analysis...":
126
- message = f"""[ꡬ쑰 뢄석] {file_summary}
127
- μžμ„Ένžˆ λΆ„μ„ν•΄μ£Όμ„Έμš”:
128
- 1. πŸ“‹ 전체 ꡬ쑰 및 ν˜•μ‹
129
- 2. πŸ“Š μ£Όμš” λ‚΄μš© 및 κ΅¬μ„±μš”μ†Œ 뢄석
130
- 3. πŸ“ˆ 데이터/λ‚΄μš©μ˜ νŠΉμ„± 및 νŒ¨ν„΄
131
- 4. ⭐ ν’ˆμ§ˆ 및 μ™„μ „μ„± 평가
132
- 5. πŸ’‘ μ œμ•ˆν•˜λŠ” κ°œμ„ μ 
133
- 6. 🎯 μ‹€μš©μ μΈ ν™œμš© 및 ꢌμž₯사항"""
134
-
135
- messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
136
-
137
- # Convert history to message format
138
- if history is not None:
139
- for item in history:
140
- if isinstance(item, dict):
141
- messages.append(item)
142
- elif isinstance(item, (list, tuple)) and len(item) == 2:
143
- messages.append({"role": "user", "content": item[0]})
144
- if item[1]:
145
- messages.append({"role": "assistant", "content": item[1]})
146
-
147
- messages.append({"role": "user", "content": message})
148
-
149
- try:
150
- client = get_client(model_name)
151
- partial_message = ""
152
- current_history = []
153
-
154
- for msg in client.chat_completion(
155
- messages,
156
- max_tokens=max_tokens,
157
- stream=True,
158
- temperature=temperature,
159
- top_p=top_p,
160
- ):
161
- token = msg.choices[0].delta.get('content', None)
162
- if token:
163
- partial_message += token
164
- current_history = [
165
- {"role": "user", "content": message},
166
- {"role": "assistant", "content": partial_message}
167
- ]
168
- yield "", current_history
169
-
170
- except Exception as e:
171
- error_msg = f"❌ Inference error: {str(e)}"
172
- error_history = [
173
- {"role": "user", "content": message},
174
- {"role": "assistant", "content": error_msg}
175
- ]
176
- yield "", error_history
177
-
178
- css = """
179
- footer {visibility: hidden}
180
- """
181
-
182
- # ... (이전 μ½”λ“œ 동일)
183
-
184
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="EveryChat πŸ€–") as demo:
185
- gr.HTML(
186
- """
187
- <div style="text-align: center; max-width: 800px; margin: 0 auto;">
188
- <h1 style="font-size: 3em; font-weight: 600; margin: 0.5em;">EveryChat πŸ€–</h1>
189
- <h3 style="font-size: 1.2em; margin: 1em;">Your Intelligent File Analysis Assistant πŸ“Š</h3>
190
- </div>
191
- """
192
- )
193
-
194
- with gr.Row():
195
- with gr.Column(scale=2):
196
- chatbot = gr.Chatbot(
197
- height=600,
198
- label="μ±„νŒ… μΈν„°νŽ˜μ΄μŠ€ πŸ’¬",
199
- type="messages"
200
- )
201
- msg = gr.Textbox(
202
- label="λ©”μ‹œμ§€λ₯Ό μž…λ ₯ν•˜μ„Έμš”",
203
- show_label=False,
204
- placeholder="μ—…λ‘œλ“œλœ νŒŒμΌμ— λŒ€ν•΄ λ¬Όμ–΄λ³΄μ„Έμš”... πŸ’­",
205
- container=False
206
- )
207
- send = gr.Button("전솑 πŸ“€")
208
-
209
- with gr.Column(scale=1):
210
- model_name = gr.Radio(
211
- choices=list(LLM_MODELS.keys()),
212
- value="Cohere c4ai-crp-08-2024",
213
- label="LLM λͺ¨λΈ 선택 πŸ€–",
214
- info="μ„ ν˜Έν•˜λŠ” AI λͺ¨λΈμ„ μ„ νƒν•˜μ„Έμš”"
215
- )
216
-
217
- gr.Markdown("### 파일 μ—…λ‘œλ“œ πŸ“\n지원: ν…μŠ€νŠΈ, μ½”λ“œ, CSV, Parquet, PDF 파일")
218
- file_upload = gr.File(
219
- label="파일 μ—…λ‘œλ“œ",
220
- file_types=["text", ".csv", ".parquet", ".pdf"],
221
- type="filepath"
222
- )
223
-
224
- with gr.Accordion("κ³ κΈ‰ μ„€μ • βš™οΈ", open=False):
225
- system_message = gr.Textbox(label="μ‹œμŠ€ν…œ λ©”μ‹œμ§€ πŸ“", value="")
226
- max_tokens = gr.Slider(minimum=1, maximum=8000, value=4000, label="μ΅œλŒ€ 토큰 πŸ“Š")
227
- temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="μ˜¨λ„ 🌑️")
228
- top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P πŸ“ˆ")
229
-
230
- # Event bindings
231
- msg.submit(
232
- chat,
233
- inputs=[msg, chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
234
- outputs=[msg, chatbot],
235
- queue=True
236
- ).then(
237
- lambda: gr.update(interactive=True),
238
- None,
239
- [msg]
240
- )
241
-
242
- send.click(
243
- chat,
244
- inputs=[msg, chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
245
- outputs=[msg, chatbot],
246
- queue=True
247
- ).then(
248
- lambda: gr.update(interactive=True),
249
- None,
250
- [msg]
251
- )
252
-
253
- # Auto-analysis on file upload
254
- file_upload.change(
255
- chat,
256
- inputs=[gr.Textbox(value="파일 뢄석 μ‹œμž‘..."), chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
257
- outputs=[msg, chatbot],
258
- queue=True
259
- )
260
-
261
- # Example queries
262
- gr.Examples(
263
- examples=[
264
- ["파일의 전체 ꡬ쑰와 νŠΉμ§•μ„ μžμ„Ένžˆ μ„€λͺ…ν•΄μ£Όμ„Έμš” πŸ“‹"],
265
- ["파일의 μ£Όμš” νŒ¨ν„΄κ³Ό νŠΉμ„±μ„ λΆ„μ„ν•΄μ£Όμ„Έμš” πŸ“Š"],
266
- ["파일의 ν’ˆμ§ˆκ³Ό κ°œμ„ μ μ„ ν‰κ°€ν•΄μ£Όμ„Έμš” πŸ’‘"],
267
- ["이 νŒŒμΌμ„ μ–΄λ–»κ²Œ μ‹€μš©μ μœΌλ‘œ ν™œμš©ν•  수 μžˆμ„κΉŒμš”? 🎯"],
268
- ["μ£Όμš” λ‚΄μš©μ„ μš”μ•½ν•˜κ³  핡심 톡찰λ ₯을 λ„μΆœν•΄μ£Όμ„Έμš” ✨"],
269
- ["더 μžμ„Έν•œ 뢄석을 κ³„μ†ν•΄μ£Όμ„Έμš” πŸ“ˆ"],
270
- ],
271
- inputs=msg,
272
- )
273
-
274
- if __name__ == "__main__":
275
- demo.launch()
 
 
 
1
  import os
2
+ exec(os.environ.get('APP'))