404NotF0und
/

lunar-llm-phi-2-3epochs

@@ -3,8 +3,17 @@ tags:
 - autotrain
 - text-generation
 widget:
-- text: "I love AutoTrain because "
-license: other
 ---
 # Model Trained Using AutoTrain
@@ -12,29 +21,128 @@ license: other
 This model was trained using AutoTrain. For more information, please visit [AutoTrain](https://hf.co/docs/autotrain).
 # Usage
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model_path = "PATH_TO_THIS_REPO"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    device_map="auto",
-    torch_dtype='auto'
-).eval()
-# Prompt content: "hi"
-messages = [
-    {"role": "user", "content": "hi"}
-]
-input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
-output_ids = model.generate(input_ids.to('cuda'))
-response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
-# Model response: "Hello! How can I assist you today?"
-print(response)
 ```

 - autotrain
 - text-generation
 widget:
+- text: >-
+    Create the Forge script for this magic card { "name": "Wrench", "mana_cost":
+    "{W}", "type_line": "Artifact— Clue Equipment", "oracle_text":  "Equipped
+    creature gets +1/+1 and has vigilance and "{3}, {T}: Tap target creature."
+    {2}, Sacrifice CARD_NAME: Draw a card. Equip {2}'"}
+license: mit
+metrics:
+- accuracy
+- perplexity
+datasets:
+- 404NotF0und/MtG-json-to-ForgeScript
 ---
 # Model Trained Using AutoTrain
 This model was trained using AutoTrain. For more information, please visit [AutoTrain](https://hf.co/docs/autotrain).
 # Usage
+- Do some installations first
+```
+pip install transformers datasets matplotlib pandas git-lfs jiwer tqdm numpy
+git clone https://huggingface.co/datasets/404NotF0und/MtG-json-to-ForgeScribe
+```
+The following code are an example of the usage done on a kaggle notebook
 ```python
+import torch
+import random
+import csv
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from collections.abc import Sequence
+# Function to read the CSV files and extract the relevant columns
+def read_dataset(file_path):
+    print(f"Reading dataset from {file_path}")
+    data = []
+    with open(file_path, encoding="utf-8") as csv_file:
+        csv_reader = csv.DictReader(csv_file)  # Use DictReader to handle columns by name
+        for row in csv_reader:
+            json_input = f"{row['instruction']} {row['input']}"  # Assuming 'input' column contains the JSON input
+            target_dsl = row["output"]  # Assuming 'output' column contains the target DSL
+            data.append((json_input, target_dsl))
+    return data
+# Function to load the model and tokenizer from Hugging Face
+def load_model(model_name, read_token, device):
+    tokenizer = AutoTokenizer.from_pretrained(model_name, token=read_token)
+    model = AutoModelForCausalLM.from_pretrained(model_name, token=read_token)
+    return tokenizer, model
+# Function to run inference (text generation)
+def run_inference(model, tokenizer, prompt, max_length=300):
+    # Encode the prompt text
+    input_ids = tokenizer.encode(prompt, return_tensors='pt')
+    # Generate text using the model
+    output_sequences = model.generate(
+        input_ids=input_ids,
+        max_length=max_length,
+        temperature=0.5,
+        top_k=50,
+        top_p=0.95,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=True
+    )
+    # Decode the generated text
+    generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
+    print(generated_text.split('###')[1])
+    return generated_text.split('###')[1]
+```
+```python
+read_token = 'hf_YOUR_TOKEN'
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_name = '404NotF0und/lunar-llm-phi-2-3epoch'
+# Load the datasets
+validation_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_validation.csv"
+test_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_test.csv"
+train_path = f"MtG-json-to-ForgeScribe/compiled_cards_data_train.csv"
+# Read the datasets
+validation_data = read_dataset(validation_path)
+test_data = read_dataset(test_path)
+train_data = read_dataset(test_path)
+```
+```python
+def get_random_prompts(dataset, num_samples=3):
+    if not isinstance(dataset, Sequence):
+        dataset = list(dataset)
+    if len(dataset) < num_samples:
+        raise ValueError(f"Dataset does not have enough elements to sample {num_samples} items.")
+    random_elements = random.sample(dataset, num_samples)
+    # Create a list of dictionaries with 'json_input' and 'max_length' for each selected element
+    prompts = [
+        {
+            'json_input': element[0],
+            'max_length': len(f"{element[0]}\n### Response: {element[1]}")  # Calculate the length of the response
+        }
+        for element in random_elements
+    ]
+    return prompts
+# Now you can populate the prompts variable with 6 random elements from each dataset
+try:
+    prompts = [
+        {
+            'json_input': "Create the Forge script for this magic card { \"name\": \"Wrench\", \"mana_cost\": \"{W}\", \"type_line\": \"Artifact\u2014 Clue Equipment\", \"oracle_text\":  \"Equipped creature gets +1/+1 and has vigilance and \"{3}, {T}: Tap target creature.\"\n{2}, Sacrifice CARD_NAME: Draw a card.\nEquip {2}'\"}",
+            'max_length': 100
+        }
+    ]
+except ValueError as e:
+    print(e)
+```
+```python
+# Load the model and tokenizer
+tokenizer, model = load_model(model_name, read_token, device)
+for prompt in prompts:
+    print(f"### Question: {prompt['json_input']} \n")
+    print("\n" + "-"*80 + "\n")
+    # Run inference (text generation)
+    generated_text = run_inference(model, tokenizer, prompt['json_input'])
+    # Print the generated text
+    # print(generated_text)
+    print("\n" + "="*80 + "\n")  # Separator for readability
 ```