## Load required modules

import argparse
import torch
import json
from SeedGPT import SeedGPT,Block,MultiHead,Head,FFNet

## Command line arguments

parser = argparse.ArgumentParser(description="Inference")

parser.add_argument("--model_path",type=str,default="./SeedGPT.pt",help="Enter the path of the model")
parser.add_argument("--tokenizer_path",type=str,default="./tokenizer.json",help="Enter the path of the tokenizer")
parser.add_argument("--input",type=str,default="Hello",help="Input for the LLM")
parser.add_argument("--max_token",type=int,default=1000,help="Enter the number of tokens need to be generated")
parser.add_argument("--output_file",type=str,default="./llm_output.txt",help="Enter the path of the output file")
parser.add_argument("--show",action='store_true',help="Show the generated output on stdout (Default: True)")

args = parser.parse_args()

## Load the tokenizer & process

with open(args.tokenizer_path,"r", encoding="utf-8")  as f:
    tok = json.load(f)

ctoi = {k:int(v) for k,v in tok["ctoi"].items()}
itoc = {int(k):v for k,v in tok["itoc"].items()}

def encoder(s):
    return [ctoi[c] for c in s]
def decoder(l):
    return ''.join([itoc[i] for i in l])

## Load the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load(args.model_path,map_location=device,weights_only=False)
model.to(device)
model.eval()

## Preprocess the input

llm_input = torch.tensor(encoder(args.input),dtype=torch.long).to(device=device)

# Fix shape
if llm_input.dim() == 1:
        llm_input = llm_input.unsqueeze(0)

output = model.generate(llm_input,args.max_token)

## Print/Save the generated tokens 

decoded_text = decoder(output[0].tolist())

if(args.show):
    print(decoded_text)


print(f"\nThe output is saved in file {args.output_file}\n")
with open(args.output_file,"w",encoding="utf-8") as f:
    f.write(decoded_text)