import gradio as gr import subprocess import os import torch from llama_cpp import Llama from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings from llama_index.core.llms import ChatMessage from llama_index.llms.llama_cpp import LlamaCPP from llama_index.embeddings.huggingface import HuggingFaceEmbedding from huggingface_hub import hf_hub_download from llama_index.core.node_parser import SentenceSplitter def check_cuda_availability(): """Memeriksa apakah CUDA tersedia.""" if torch.cuda.is_available(): print("CUDA Toolkit tersedia di sistem.") return True else: print("CUDA Toolkit tidak ditemukan di sistem.") return False def install_llama_with_cuda(): """Menginstal ulang llama-cpp-python dengan dukungan CUDA jika tersedia.""" try: if check_cuda_availability(): print("Memasang ulang llama-cpp-python dengan dukungan CUDA...") pip_path = "/home/user/.pyenv/versions/3.10.16/bin/pip" # Sesuaikan dengan lingkungan Anda result = subprocess.run( [pip_path, "install", "llama-cpp-python", "--force-reinstall", "--no-cache-dir"], env={"CMAKE_ARGS": "-DGGML_CUDA=on"}, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True ) print(result.stdout) print("llama-cpp-python berhasil diinstal ulang dengan dukungan CUDA.") else: print("CUDA tidak tersedia. Menggunakan mode CPU.") except subprocess.CalledProcessError as e: print(f"Error saat menginstal ulang llama-cpp-python: {e.stderr}") except Exception as e: print(f"Kesalahan umum: {e}") def initialize_llama_model(): """Mengunduh model Llama dari HuggingFace.""" model_path = hf_hub_download( repo_id="TheBLoke/zephyr-7b-beta-GGUF", filename="zephyr-7b-beta.Q4_K_M.gguf", cache_dir="./models" ) return model_path def initialize_settings(model_path): """Mengatur konfigurasi Settings untuk Llama.""" Settings.llm = Llama( model_path=model_path, n_gpu_layers=1, # Gunakan GPU jika tersedia temperature=0.7, top_p=0.9 ) def initialize_index(): """Menginisialisasi index dari dokumen input.""" documents = SimpleDirectoryReader(input_files=[ "bahandokumen/K3.txt", "bahandokumen/bonus.txt", "bahandokumen/cuti.txt", "bahandokumen/disiplinkerja.txt", "bahandokumen/fasilitas&bantuan.txt", "bahandokumen/upahlembur.txt", "bahandokumen/waktukerja.txt" ]).load_data() parser = SentenceSplitter(chunk_size=150, chunk_overlap=10) nodes = parser.get_nodes_from_documents(documents) embedding = HuggingFaceEmbedding("BAAI/bge-base-en-v1.5") Settings.embed_model = embedding index = VectorStoreIndex(nodes) return index def initialize_chat_engine(index): """Menginisialisasi mesin chat dari index.""" from llama_index.core.prompts import PromptTemplate from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine retriever = index.as_retriever(similarity_top_k=3) chat_engine = CondensePlusContextChatEngine.from_defaults( retriever=retriever, verbose=True ) return chat_engine def generate_response(message, history, chat_engine): """Menghasilkan respons dari chatbot.""" chat_messages = [ ChatMessage( role="system", content="Anda adalah chatbot yang selalu menjawab pertanyaan secara singkat, ramah, dan jelas dalam bahasa Indonesia." ) ] response = chat_engine.stream_chat(message) text = "".join(response.response_gen) # Gabungkan semua token menjadi string history.append((message, text)) # Tambahkan ke riwayat return history def clear_history(chat_engine): """Menghapus riwayat chat.""" chat_engine.clear() def launch_gradio(chat_engine): """Meluncurkan antarmuka Gradio.""" with gr.Blocks() as demo: clear_btn = gr.Button("Clear") clear_btn.click(lambda: clear_history(chat_engine)) chat_interface = gr.ChatInterface( lambda message, history: generate_response(message, history, chat_engine) ) demo.launch() def main(): """Fungsi utama untuk menjalankan aplikasi.""" try: install_llama_with_cuda() model_path = initialize_llama_model() initialize_settings(model_path) index = initialize_index() chat_engine = initialize_chat_engine(index) launch_gradio(chat_engine) except Exception as e: print(f"Terjadi kesalahan: {e}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--install", action="store_true", help="Pasang ulang llama-cpp-python") args = parser.parse_args() if args.install: install_llama_with_cuda() else: main()