saintyboy commited on
Commit
b304835
1 Parent(s): 59cb44c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
3
+ from datasets import load_dataset
4
+
5
+ # Load and prepare the dataset
6
+ dataset = load_dataset("daily_dialog")
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
9
+ model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
10
+
11
+ # Define training arguments
12
+ training_args = TrainingArguments(
13
+ output_dir="./results",
14
+ num_train_epochs=3,
15
+ per_device_train_batch_size=4,
16
+ per_device_eval_batch_size=4,
17
+ warmup_steps=500,
18
+ weight_decay=0.01,
19
+ logging_dir='./logs',
20
+ logging_steps=10,
21
+ )
22
+
23
+ # Prepare the data for training
24
+ def tokenize_function(examples):
25
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
26
+
27
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
28
+
29
+ # Initialize the Trainer
30
+ trainer = Trainer(
31
+ model=model,
32
+ args=training_args,
33
+ train_dataset=tokenized_datasets["train"],
34
+ eval_dataset=tokenized_datasets["test"]
35
+ )
36
+
37
+ # Training the model
38
+ trainer.train()
39
+
40
+ # Streamlit interface
41
+ st.title('Simple Chatbot')
42
+ user_input = st.text_input("You: ")
43
+
44
+ if user_input:
45
+ # Encode the user input and generate a response
46
+ inputs = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')
47
+ reply_ids = model.generate(inputs, max_length=1000, pad_token_id=tokenizer.eos_token_id)
48
+ reply = tokenizer.decode(reply_ids[0], skip_special_tokens=True)
49
+
50
+ st.write("Bot:", reply)