import gradio as gr import os import uuid import threading import pandas as pd import numpy as np from langchain.document_loaders.csv_loader import CSVLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import CTransformers from langchain_experimental.agents import create_pandas_dataframe_agent from langchain.chains import LLMChain from langchain.prompts import PromptTemplate # Global model cache MODEL_CACHE = { "model": None, "init_lock": threading.Lock() } # Create directories for user data os.makedirs("user_data", exist_ok=True) def initialize_model_once(): """Initialize model once using CTransformers API""" with MODEL_CACHE["init_lock"]: if MODEL_CACHE["model"] is None: # Load Phi-2 model (smaller than Mistral) MODEL_CACHE["model"] = CTransformers( model="TheBloke/phi-2-GGUF", model_file="phi-2.Q4_K_M.gguf", model_type="phi2", max_new_tokens=512, temperature=0.1, top_p=0.9, repetition_penalty=1.1, context_length=2048 ) return MODEL_CACHE["model"] class ChatBot: def __init__(self, session_id): self.session_id = session_id self.csv_info = None self.df = None self.chat_history = [] self.user_dir = f"user_data/{session_id}" os.makedirs(self.user_dir, exist_ok=True) def process_file(self, file): if file is None: return "Mohon upload file CSV terlebih dahulu." try: # Handle file from Gradio file_path = file.name if hasattr(file, 'name') else str(file) file_name = os.path.basename(file_path) # Load and save CSV directly with pandas try: self.df = pd.read_csv(file_path) user_file_path = f"{self.user_dir}/uploaded.csv" self.df.to_csv(user_file_path, index=False) # Store CSV info self.csv_info = { "filename": file_name, "rows": self.df.shape[0], "columns": self.df.shape[1], "column_names": self.df.columns.tolist(), } print(f"CSV verified: {self.df.shape[0]} rows, {len(self.df.columns)} columns") except Exception as e: return f"Error membaca CSV: {str(e)}" # Create query translator try: llm = initialize_model_once() query_template = """ Kamu adalah asisten yang mengubah pertanyaan natural language menjadi kode Python dengan pandas. Informasi tentang DataFrame: - Nama kolom: {column_names} - Jumlah baris: {num_rows} - Sample data: {sample_data} Pertanyaan pengguna: {question} Ubah pertanyaan tersebut menjadi kode pandas yang bisa dijalankan. Kode harus ringkas, efisien, dan menggunakan variabel 'df'. Berikan HANYA kode python saja, tanpa backtick, tanpa penjelasan. Kode: """ self.query_chain = LLMChain( llm=llm, prompt=PromptTemplate( input_variables=["column_names", "num_rows", "sample_data", "question"], template=query_template ) ) print("Query translator created successfully") except Exception as e: return f"Error creating query translator: {str(e)}" # Add file info to chat history file_info = f"CSV berhasil dimuat: {file_name} dengan {self.df.shape[0]} baris dan {len(self.df.columns)} kolom. Kolom: {', '.join(self.df.columns.tolist())}" self.chat_history.append(("System", file_info)) return f"File CSV '{file_name}' berhasil diproses! Anda dapat mulai mengajukan pertanyaan tentang data." except Exception as e: import traceback print(traceback.format_exc()) return f"Error pemrosesan file: {str(e)}" def chat(self, message, history): if self.df is None or self.query_chain is None: return "Mohon upload file CSV terlebih dahulu." try: # Handle metadata questions directly message_lower = message.lower() if "nama file" in message_lower: return f"Nama file CSV adalah: {self.csv_info['filename']}" elif "nama kolom" in message_lower: return f"Kolom dalam CSV: {', '.join(self.csv_info['column_names'])}" elif "jumlah baris" in message_lower or "berapa baris" in message_lower: return f"Jumlah baris dalam CSV: {self.csv_info['rows']}" # Get sample data for context sample_str = self.df.head(3).to_string() # Translate question to pandas code code_response = self.query_chain.run( column_names=str(self.csv_info["column_names"]), num_rows=self.csv_info["rows"], sample_data=sample_str, question=message ) # Clean and execute the code try: code = code_response.strip() # Add safety prefix to prevent malicious code if not code.startswith("df"): code = "result = " + code else: code = "result = " + code # Create local context with the dataframe locals_dict = {"df": self.df, "pd": pd, "np": np} # Execute the code print(f"Executing code: {code}") exec(code, {"pd": pd, "np": np}, locals_dict) result = locals_dict.get("result", "No result returned") # Format the result if isinstance(result, pd.DataFrame): if len(result) > 5: result_str = result.head(5).to_string() + f"\n\n[{len(result)} baris ditemukan]" else: result_str = result.to_string() elif isinstance(result, (pd.Series, np.ndarray)): result_str = str(result) else: result_str = str(result) # Build the response response = f"Hasil analisis untuk pertanyaan: '{message}'\n\n" response += f"Kode yang digunakan:\n```python\n{code}\n```\n\n" response += f"Output:\n{result_str}" self.chat_history.append((message, response)) return response except Exception as e: error_msg = f"Error mengeksekusi kode: {str(e)}\nKode yang dihasilkan:\n```python\n{code}\n```" print(error_msg) return error_msg except Exception as e: import traceback print(traceback.format_exc()) return f"Error: {str(e)}" # UI Code def create_gradio_interface(): with gr.Blocks(title="CSV Data Analyzer") as interface: session_id = gr.State(lambda: str(uuid.uuid4())) chatbot_state = gr.State(lambda: None) gr.HTML("

CSV Data Analyzer

") gr.HTML("

Ajukan pertanyaan tentang data CSV Anda

") with gr.Row(): with gr.Column(scale=1): file_input = gr.File( label="Upload CSV Anda", file_types=[".csv"] ) process_button = gr.Button("Proses CSV") with gr.Accordion("Contoh Pertanyaan", open=False): gr.Markdown(""" - "Berapa jumlah data yang memiliki nilai Glucose di atas 150?" - "Bagaimana distribusi kolom Age?" - "Hitung nilai rata-rata dan standar deviasi untuk setiap kolom numerik" - "Buat tabel frekuensi untuk kolom Outcome" """) with gr.Column(scale=2): chatbot_interface = gr.Chatbot( label="Riwayat Chat", height=400 ) message_input = gr.Textbox( label="Ketik pertanyaan Anda", placeholder="Contoh: Berapa jumlah data yang memiliki nilai Glucose di atas 150?", lines=2 ) submit_button = gr.Button("Kirim") clear_button = gr.Button("Bersihkan Chat") # Handler functions def handle_process_file(file, sess_id): chatbot = ChatBot(sess_id) result = chatbot.process_file(file) return chatbot, [(None, result)] process_button.click( fn=handle_process_file, inputs=[file_input, session_id], outputs=[chatbot_state, chatbot_interface] ) def user_message_submitted(message, history, chatbot, sess_id): history = history + [(message, None)] return history, "", chatbot, sess_id def bot_response(history, chatbot, sess_id): if chatbot is None: chatbot = ChatBot(sess_id) history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.") return chatbot, history user_message = history[-1][0] response = chatbot.chat(user_message, history[:-1]) history[-1] = (user_message, response) return chatbot, history submit_button.click( fn=user_message_submitted, inputs=[message_input, chatbot_interface, chatbot_state, session_id], outputs=[chatbot_interface, message_input, chatbot_state, session_id] ).then( fn=bot_response, inputs=[chatbot_interface, chatbot_state, session_id], outputs=[chatbot_state, chatbot_interface] ) message_input.submit( fn=user_message_submitted, inputs=[message_input, chatbot_interface, chatbot_state, session_id], outputs=[chatbot_interface, message_input, chatbot_state, session_id] ).then( fn=bot_response, inputs=[chatbot_interface, chatbot_state, session_id], outputs=[chatbot_state, chatbot_interface] ) def handle_clear_chat(chatbot): if chatbot is not None: chatbot.chat_history = [] return chatbot, [] clear_button.click( fn=handle_clear_chat, inputs=[chatbot_state], outputs=[chatbot_state, chatbot_interface] ) return interface # Launch the interface if __name__ == "__main__": demo = create_gradio_interface() demo.launch(share=True)