Spaces:
Running
Running
import pandas as pd | |
from datasets import Dataset | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
import streamlit as st | |
def fine_tune_model(uploaded_file): | |
# Read CSV file | |
df = pd.read_csv(uploaded_file) | |
st.subheader("Dataset Preview") | |
st.write(df.head()) | |
# Check for a 'text' column or allow user to choose a column | |
if 'text' not in df.columns: | |
st.warning("No 'text' column found. Please select the column to use for fine-tuning.") | |
column_choice = st.selectbox("Select the column containing text data", df.columns) | |
df['text'] = df[column_choice] # Create a 'text' column based on user selection | |
# Convert CSV to Hugging Face dataset format | |
dataset = Dataset.from_pandas(df) | |
model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"]) | |
if st.button("Fine-tune Model"): | |
if model_name: | |
try: | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def preprocess_function(examples): | |
return tokenizer(examples['text'], truncation=True, padding=True) | |
tokenized_datasets = dataset.map(preprocess_function, batched=True) | |
# Fine-tuning logic (example) | |
train_args = { | |
"output_dir": "./results", | |
"num_train_epochs": 3, | |
"per_device_train_batch_size": 16, | |
"logging_dir": "./logs", | |
} | |
st.success("Fine-tuning started (demo)!") # Fine-tuning process goes here | |
except Exception as e: | |
st.error(f"Error during fine-tuning: {e}") | |
else: | |
st.warning("Please select a model for fine-tuning.") | |