import numpy as np import json import gradio as gr # === LOAD CORPUS & DATASET === with open("corpus.txt", "r", encoding="utf-8") as f: corpus = f.read().splitlines() with open("dataset.json", "r", encoding="utf-8") as f: qa_data = json.load(f) # === BUILD VOCAB === all_texts = corpus + list(qa_data.keys()) + list(qa_data.values()) vocab = list(set(" ".join(all_texts).split())) word2id = {w: i for i, w in enumerate(vocab)} id2word = {i: w for w, i in word2id.items()} vocab_size = len(vocab) def one_hot(word): vec = np.zeros(vocab_size) if word in word2id: vec[word2id[word]] = 1 return vec # === PARAMETER MODEL === np.random.seed(42) hidden_size = 512 # bisa diubah W1 = np.random.randn(vocab_size, hidden_size) * 0.01 W2 = np.random.randn(hidden_size, vocab_size) * 0.01 lr = 0.05 def softmax(x): e = np.exp(x - np.max(x)) return e / e.sum() def train_step(sentence): global W1, W2 words = sentence.split() loss = 0 for i in range(len(words)-1): x = one_hot(words[i]) y_true = one_hot(words[i+1]) h = np.dot(x, W1) o = np.dot(h, W2) y_pred = softmax(o) loss += np.mean((y_true - y_pred)**2) grad_o = y_pred - y_true dW2 = np.outer(h, grad_o) dW1 = np.outer(x, np.dot(W2, grad_o)) W1 -= lr * dW1 W2 -= lr * dW2 return loss # === PRETRAIN DENGAN CORPUS === for epoch in range(200): total_loss = 0 for line in corpus: total_loss += train_step(line + " ") if epoch % 50 == 0: print(f"Pretrain Epoch {epoch}, Loss: {total_loss:.4f}") # === FINE-TUNE DENGAN Q&A === for epoch in range(200): total_loss = 0 for q, a in qa_data.items(): total_loss += train_step(q + " " + a) if epoch % 50 == 0: print(f"Finetune Epoch {epoch}, Loss: {total_loss:.4f}") # === GENERATE JAWABAN === def generate_reply(question, max_len=30): words = question.split() if words[0] not in word2id: return "Maaf, aku belum ngerti kata itu 🥺" x = one_hot(words[0]) reply = [] for _ in range(max_len): h = np.dot(x, W1) o = np.dot(h, W2) y_pred = softmax(o) pred_id = np.argmax(y_pred) # argmax biar konsisten pred_word = id2word[pred_id] if pred_word == "": break reply.append(pred_word) x = one_hot(pred_word) return " ".join(reply) # === GRADIO INTERFACE === def chatbot(input_text): return generate_reply(input_text) demo = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Chatbot Numpy ala Cici 🤭", description="Mini chatbot dengan training 2 tahap: corpus + Q&A") if __name__ == "__main__": demo.launch()