import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download CONST_REPO_ID = "AshtonIsNotHere/CodeLlama_7B_nlp_pp" CONST_FILENAME = "CodeLlama_7B_nlp_pp_q8_0.gguf" model = Llama(model_path=hf_hub_download(repo_id=CONST_REPO_ID, filename=CONST_FILENAME)) # These phrases/tokens indicate the start of a pass. For demonstration purposes, it's # safe to assume that these should not be encountered in the output and represent a hallucination. stop = ["@NODES", "@CODE", "@DECL"] def generate(input_text): if input_text.strip().startswith("#") and not input_text.strip().endswith("\n"): input_text+="\n" output = model(input_text, max_tokens=128, stop=stop, echo=True) return output['choices'][0]['text'] input_text = gr.inputs.Textbox(lines=5, label="Enter your code to autocomplete") output_text = gr.Code(elem_id="q-output", lines=30, label="Output") description = "This is a demo of CodeLlama_7B_nlp_pp: a CodeLlama🦙 model fine-tuned for code completion on the NLP++ programming language. The model has been 8-bit quantized and is running via `llama-cpp-python`." examples = [ ['# Find concept named parent under root and print "num" val for each child attribute\n'], ['L("iter") = getconcept(findroot(), L("parent_con"));\n'], ['# Match node _noun when preceded by _noun\n'] ] gr.Interface(fn=generate, inputs=input_text, outputs=output_text, title="CodeLlama for NLP++", description=description, examples=examples).launch()