import json import argparse from llama_cpp import Llama parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") args = parser.parse_args() llm = Llama(model_path=args.model) output = llm( "Question: What are the names of the planets in the solar system? Answer: ", max_tokens=48, stop=["Q:", "\n"], echo=True, ) print(json.dumps(output, indent=2))