Edit model card

Model Trained Using AutoTrain

This model was trained using AutoTrain. For more information, please visit AutoTrain.

Usage

  • Do some installations first
pip install transformers datasets matplotlib pandas git-lfs jiwer tqdm numpy
git clone https://huggingface.co/datasets/404NotF0und/MtG-json-to-ForgeScribe

The following code are an example of the usage done on a kaggle notebook

import torch
import random
import csv
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from collections.abc import Sequence

# Function to read the CSV files and extract the relevant columns
def read_dataset(file_path):
    print(f"Reading dataset from {file_path}")
    data = []
    with open(file_path, encoding="utf-8") as csv_file:
        csv_reader = csv.DictReader(csv_file)  # Use DictReader to handle columns by name
        for row in csv_reader:
            json_input = f"{row['instruction']} {row['input']}"  # Assuming 'input' column contains the JSON input
            target_dsl = row["output"]  # Assuming 'output' column contains the target DSL
            data.append((json_input, target_dsl))
    return data


# Function to load the model and tokenizer from Hugging Face
def load_model(model_name, read_token, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=read_token)
    model = AutoModelForCausalLM.from_pretrained(model_name, token=read_token)
    return tokenizer, model

# Function to run inference (text generation)
def run_inference(model, tokenizer, prompt, max_length=300):
    # Encode the prompt text
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    
    # Generate text using the model
    output_sequences = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        temperature=0.5,
        top_k=50,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True
    )
    
    # Decode the generated text
    generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
    
    print(generated_text.split('###')[1])
    
    return generated_text.split('###')[1]
read_token = 'hf_YOUR_TOKEN'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = '404NotF0und/lunar-llm-phi-2-3epoch'

# Load the datasets
validation_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_validation.csv"
test_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_test.csv"
train_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_train.csv"

# Read the datasets
validation_data = read_dataset(validation_path)
test_data = read_dataset(test_path)
train_data = read_dataset(test_path)
def get_random_prompts(dataset, num_samples=3):
    if not isinstance(dataset, Sequence):
        dataset = list(dataset)
    
    if len(dataset) < num_samples:
        raise ValueError(f"Dataset does not have enough elements to sample {num_samples} items.")
    
    random_elements = random.sample(dataset, num_samples)
    
    # Create a list of dictionaries with 'json_input' and 'max_length' for each selected element
    prompts = [
        {
            'json_input': element[0],
            'max_length': len(f"{element[0]}\n### Response: {element[1]}")  # Calculate the length of the response
        }
        for element in random_elements
    ]
    
    return prompts

# Now you can populate the prompts variable with 6 random elements from each dataset
try:
    prompts = [
        {
            'json_input': "Create the Forge script for this magic card { \"name\": \"Wrench\", \"mana_cost\": \"{W}\", \"type_line\": \"Artifact\u2014 Clue Equipment\", \"oracle_text\":  \"Equipped creature gets +1/+1 and has vigilance and \"{3}, {T}: Tap target creature.\"\n{2}, Sacrifice CARD_NAME: Draw a card.\nEquip {2}'\"}",
            'max_length': 100
        }
    ]
except ValueError as e:
    print(e)
# Load the model and tokenizer
tokenizer, model = load_model(model_name, read_token, device)



for prompt in prompts:
    
    print(f"### Question: {prompt['json_input']} \n")
    print("\n" + "-"*80 + "\n")
    # Run inference (text generation)
    generated_text = run_inference(model, tokenizer, prompt['json_input'])

    # Print the generated text
    # print(generated_text)
    print("\n" + "="*80 + "\n")  # Separator for readability

Lastly this is the example of output you should get

### Question: Create the Forge script for this magic card { "name": "Wrench", "mana_cost": "{W}", "type_line": "Artifact— Clue Equipment", "oracle_text":  "Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."
{2}, Sacrifice CARD_NAME: Draw a card.
Equip {2}'"} 


--------------------------------------------------------------------------------

 Response: Name:Wrench\nManaCost:W\nTypes:Artifact Clue Equipment\nK:Equip:2\nS:Mode$ Continuous | Affected$ Creature.EquippedBy | AddPower$ 1 | AddToughness$ 1 | AddKeyword$ Vigilance | AddAbility$ TrigTap | Description$ Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."\nSVar:TrigTap:AB$ Tap | Cost$ 3 T | ValidTgts$ Creature | TgtPrompt$ Select target creature | SpellDescription$ Tap target creature.\nA:AB$ Draw | Cost$ 2 Sac<1/CARDNAME> | NumCards$ 1 | SpellDescription$ Draw a card.\nOracle:Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."\n{2}, Sacrifice Wrench: Draw

================================================================================

Downloads last month
13
Safetensors
Model size
2.78B params
Tensor type
FP16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Dataset used to train 404NotF0und/lunar-llm-phi-2-3epochs