Spaces:
Sleeping
Sleeping
from datasets import Dataset | |
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer | |
from datafile import QL, resume_data_dict | |
import streamlit as st | |
from tensorflow import keras | |
import tensorflow as tf | |
# Load your custom data | |
data = [] | |
for i in range(len(QL['labels'])): | |
data.append({"question":QL["queries"][i], "answer":resume_data_dict[QL['labels'][i]]}) | |
# Create a Dataset | |
dataset = Dataset.from_list(data) | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium") | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
def preprocess_function(examples): | |
inputs = [f"Question: {q}" for q in examples["question"]] | |
model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=128) | |
# Setup the tokenizer for targets | |
with tokenizer.as_target_tokenizer(): | |
labels = tokenizer(examples["answer"], padding="max_length", truncation=True, max_length=128) | |
model_inputs["labels"] = labels["input_ids"] | |
return model_inputs | |
# Apply preprocessing | |
tokenized_dataset = dataset.map(preprocess_function, batched=True) | |
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium") | |
model.resize_token_embeddings(len(tokenizer)) | |
# Define training arguments | |
training_args = TrainingArguments( | |
output_dir="./resume_bot", | |
num_train_epochs=3, | |
per_device_train_batch_size=2, | |
per_device_eval_batch_size=2, | |
warmup_steps=10, | |
weight_decay=0.01, | |
logging_dir="./logs", | |
logging_steps=10, | |
save_steps=500, | |
evaluation_strategy="steps" | |
) | |
# Initialize the Trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_dataset, | |
eval_dataset=tokenized_dataset, | |
tokenizer=tokenizer | |
) | |
# Train the model | |
trainer.train() | |
model_name = "./resume_bot" # Path to your fine-tuned model | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
st.title("Resume Chatbot") | |
if 'history' not in st.session_state: | |
st.session_state.history = [] | |
user_input = st.text_input("You: ", "") | |
if user_input: | |
# Encode the input | |
input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt') | |
try: | |
response_ids = model.generate(input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id) | |
bot_response = tokenizer.decode(response_ids[0], skip_special_tokens=True) | |
# Update the chat history | |
st.session_state.history.append(f"You: {user_input}") | |
st.session_state.history.append(f"Bot: {bot_response}") | |
# Display the bot response | |
st.write(f"Bot: {bot_response}") | |
except Exception as e: | |
st.error(f"Error generating response: {e}") | |
# Add a button to clear the conversation | |
if st.button("Reset Conversation"): | |
st.session_state.history = [] | |