import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

CONST_REPO_ID = "AshtonIsNotHere/CodeLlama_7B_nlp_pp"
CONST_FILENAME = "CodeLlama_7B_nlp_pp_q8_0.gguf"

model = Llama(model_path=hf_hub_download(repo_id=CONST_REPO_ID, filename=CONST_FILENAME))

# These phrases/tokens indicate the start of a pass. For demonstration purposes, it's
# safe to assume that these should not be encountered in the output and represent a hallucination.
stop = ["@NODES", "@CODE", "@DECL"]

def generate(input_text):
    if input_text.strip().startswith("#") and not input_text.strip().endswith("\n"):
        input_text+="\n"
    output = model(input_text, max_tokens=128, stop=stop, echo=True)
    return output['choices'][0]['text']

input_text = gr.inputs.Textbox(lines=5, label="Enter your code to autocomplete")
output_text = gr.Code(elem_id="q-output", lines=30, label="Output")

description = "This is a demo of CodeLlama_7B_nlp_pp: a CodeLlama🦙 model fine-tuned for code completion on the NLP++ programming language. The model has been 8-bit quantized and is running via `llama-cpp-python`."

examples = [
    ['# Find concept named parent under root and print "num" val for each child attribute\n'],
    ['L("iter") = getconcept(findroot(), L("parent_con"));\n'],
    ['# Match node _noun when preceded by _noun\n']
]

gr.Interface(fn=generate, inputs=input_text, outputs=output_text, title="CodeLlama for NLP++", description=description, examples=examples).launch()