Model Trained Using AutoTrain

This model was trained using AutoTrain. For more information, please visit AutoTrain.

Usage

Do some installations first

pip install transformers datasets matplotlib pandas git-lfs jiwer tqdm numpy
git clone https://huggingface.co/datasets/404NotF0und/MtG-json-to-ForgeScribe

The following code are an example of the usage done on a kaggle notebook

import torch
import random
import csv
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from collections.abc import Sequence

# Function to read the CSV files and extract the relevant columns
def read_dataset(file_path):
    print(f"Reading dataset from {file_path}")
    data = []
    with open(file_path, encoding="utf-8") as csv_file:
        csv_reader = csv.DictReader(csv_file)  # Use DictReader to handle columns by name
        for row in csv_reader:
            json_input = f"{row['instruction']} {row['input']}"  # Assuming 'input' column contains the JSON input
            target_dsl = row["output"]  # Assuming 'output' column contains the target DSL
            data.append((json_input, target_dsl))
    return data


# Function to load the model and tokenizer from Hugging Face
def load_model(model_name, read_token, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=read_token)
    model = AutoModelForCausalLM.from_pretrained(model_name, token=read_token)
    return tokenizer, model

# Function to run inference (text generation)
def run_inference(model, tokenizer, prompt, max_length=300):
    # Encode the prompt text
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    
    # Generate text using the model
    output_sequences = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        temperature=0.5,
        top_k=50,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True
    )
    
    # Decode the generated text
    generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
    
    print(generated_text.split('###')[1])
    
    return generated_text.split('###')[1]

read_token = 'hf_YOUR_TOKEN'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = '404NotF0und/lunar-llm-phi-2-3epoch'

# Load the datasets
validation_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_validation.csv"
test_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_test.csv"
train_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_train.csv"

# Read the datasets
validation_data = read_dataset(validation_path)
test_data = read_dataset(test_path)
train_data = read_dataset(test_path)

def get_random_prompts(dataset, num_samples=3):
    if not isinstance(dataset, Sequence):
        dataset = list(dataset)
    
    if len(dataset) < num_samples:
        raise ValueError(f"Dataset does not have enough elements to sample {num_samples} items.")
    
    random_elements = random.sample(dataset, num_samples)
    
    # Create a list of dictionaries with 'json_input' and 'max_length' for each selected element
    prompts = [
        {
            'json_input': element[0],
            'max_length': len(f"{element[0]}\n### Response: {element[1]}")  # Calculate the length of the response
        }
        for element in random_elements
    ]
    
    return prompts

# Now you can populate the prompts variable with 6 random elements from each dataset
try:
    prompts = [
        {
            'json_input': "Create the Forge script for this magic card { \"name\": \"Wrench\", \"mana_cost\": \"{W}\", \"type_line\": \"Artifact\u2014 Clue Equipment\", \"oracle_text\":  \"Equipped creature gets +1/+1 and has vigilance and \"{3}, {T}: Tap target creature.\"\n{2}, Sacrifice CARD_NAME: Draw a card.\nEquip {2}'\"}",
            'max_length': 100
        }
    ]
except ValueError as e:
    print(e)

# Load the model and tokenizer
tokenizer, model = load_model(model_name, read_token, device)



for prompt in prompts:
    
    print(f"### Question: {prompt['json_input']} \n")
    print("\n" + "-"*80 + "\n")
    # Run inference (text generation)
    generated_text = run_inference(model, tokenizer, prompt['json_input'])

    # Print the generated text
    # print(generated_text)
    print("\n" + "="*80 + "\n")  # Separator for readability

Lastly this is the example of output you should get

### Question: Create the Forge script for this magic card { "name": "Wrench", "mana_cost": "{W}", "type_line": "Artifact— Clue Equipment", "oracle_text":  "Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."
{2}, Sacrifice CARD_NAME: Draw a card.
Equip {2}'"} 


--------------------------------------------------------------------------------

 Response: Name:Wrench\nManaCost:W\nTypes:Artifact Clue Equipment\nK:Equip:2\nS:Mode$ Continuous | Affected$ Creature.EquippedBy | AddPower$ 1 | AddToughness$ 1 | AddKeyword$ Vigilance | AddAbility$ TrigTap | Description$ Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."\nSVar:TrigTap:AB$ Tap | Cost$ 3 T | ValidTgts$ Creature | TgtPrompt$ Select target creature | SpellDescription$ Tap target creature.\nA:AB$ Draw | Cost$ 2 Sac<1/CARDNAME> | NumCards$ 1 | SpellDescription$ Draw a card.\nOracle:Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."\n{2}, Sacrifice Wrench: Draw

================================================================================

404NotF0und
/

lunar-llm-phi-2-3epochs

Model Trained Using AutoTrain

Usage

Dataset used to train 404NotF0und/lunar-llm-phi-2-3epochs