--- tags: - autotrain - text-generation widget: - text: >- Create the Forge script for this magic card { "name": "Wrench", "mana_cost": "{W}", "type_line": "Artifact— Clue Equipment", "oracle_text": "Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature." {2}, Sacrifice CARD_NAME: Draw a card. Equip {2}'"} license: mit metrics: - accuracy - perplexity datasets: - 404NotF0und/MtG-json-to-ForgeScript --- # Model Trained Using AutoTrain This model was trained using AutoTrain. For more information, please visit [AutoTrain](https://hf.co/docs/autotrain). # Usage - Do some installations first ``` pip install transformers datasets matplotlib pandas git-lfs jiwer tqdm numpy git clone https://huggingface.co/datasets/404NotF0und/MtG-json-to-ForgeScribe ``` The following code are an example of the usage done on a kaggle notebook ```python import torch import random import csv import pandas as pd from transformers import AutoTokenizer, AutoModelForCausalLM from collections.abc import Sequence # Function to read the CSV files and extract the relevant columns def read_dataset(file_path): print(f"Reading dataset from {file_path}") data = [] with open(file_path, encoding="utf-8") as csv_file: csv_reader = csv.DictReader(csv_file) # Use DictReader to handle columns by name for row in csv_reader: json_input = f"{row['instruction']} {row['input']}" # Assuming 'input' column contains the JSON input target_dsl = row["output"] # Assuming 'output' column contains the target DSL data.append((json_input, target_dsl)) return data # Function to load the model and tokenizer from Hugging Face def load_model(model_name, read_token, device): tokenizer = AutoTokenizer.from_pretrained(model_name, token=read_token) model = AutoModelForCausalLM.from_pretrained(model_name, token=read_token) return tokenizer, model # Function to run inference (text generation) def run_inference(model, tokenizer, prompt, max_length=300): # Encode the prompt text input_ids = tokenizer.encode(prompt, return_tensors='pt') # Generate text using the model output_sequences = model.generate( input_ids=input_ids, max_length=max_length, temperature=0.5, top_k=50, top_p=0.95, pad_token_id=tokenizer.eos_token_id, do_sample=True ) # Decode the generated text generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True) print(generated_text.split('###')[1]) return generated_text.split('###')[1] ``` ```python read_token = 'hf_YOUR_TOKEN' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_name = '404NotF0und/lunar-llm-phi-2-3epoch' # Load the datasets validation_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_validation.csv" test_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_test.csv" train_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_train.csv" # Read the datasets validation_data = read_dataset(validation_path) test_data = read_dataset(test_path) train_data = read_dataset(test_path) ``` ```python def get_random_prompts(dataset, num_samples=3): if not isinstance(dataset, Sequence): dataset = list(dataset) if len(dataset) < num_samples: raise ValueError(f"Dataset does not have enough elements to sample {num_samples} items.") random_elements = random.sample(dataset, num_samples) # Create a list of dictionaries with 'json_input' and 'max_length' for each selected element prompts = [ { 'json_input': element[0], 'max_length': len(f"{element[0]}\n### Response: {element[1]}") # Calculate the length of the response } for element in random_elements ] return prompts # Now you can populate the prompts variable with 6 random elements from each dataset try: prompts = [ { 'json_input': "Create the Forge script for this magic card { \"name\": \"Wrench\", \"mana_cost\": \"{W}\", \"type_line\": \"Artifact\u2014 Clue Equipment\", \"oracle_text\": \"Equipped creature gets +1/+1 and has vigilance and \"{3}, {T}: Tap target creature.\"\n{2}, Sacrifice CARD_NAME: Draw a card.\nEquip {2}'\"}", 'max_length': 100 } ] except ValueError as e: print(e) ``` ```python # Load the model and tokenizer tokenizer, model = load_model(model_name, read_token, device) for prompt in prompts: print(f"### Question: {prompt['json_input']} \n") print("\n" + "-"*80 + "\n") # Run inference (text generation) generated_text = run_inference(model, tokenizer, prompt['json_input']) # Print the generated text # print(generated_text) print("\n" + "="*80 + "\n") # Separator for readability ``` Lastly this is the example of output you should get ``` ### Question: Create the Forge script for this magic card { "name": "Wrench", "mana_cost": "{W}", "type_line": "Artifact— Clue Equipment", "oracle_text": "Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature." {2}, Sacrifice CARD_NAME: Draw a card. Equip {2}'"} -------------------------------------------------------------------------------- Response: Name:Wrench\nManaCost:W\nTypes:Artifact Clue Equipment\nK:Equip:2\nS:Mode$ Continuous | Affected$ Creature.EquippedBy | AddPower$ 1 | AddToughness$ 1 | AddKeyword$ Vigilance | AddAbility$ TrigTap | Description$ Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."\nSVar:TrigTap:AB$ Tap | Cost$ 3 T | ValidTgts$ Creature | TgtPrompt$ Select target creature | SpellDescription$ Tap target creature.\nA:AB$ Draw | Cost$ 2 Sac<1/CARDNAME> | NumCards$ 1 | SpellDescription$ Draw a card.\nOracle:Equipped creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."\n{2}, Sacrifice Wrench: Draw ================================================================================ ```