import gradio as gr import transformers # Load the model and tokenizer model_name = "Qwen/CodeQwen1.5-7B-Chat" tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto") max_new_tokens:int=2048 do_sample:bool=True num_beams:int=1 temperature:float=0.5 top_p:float=0.95 top_k:float=40 repetition_penalty:float=1.1 pipe = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=max_new_tokens, do_sample=do_sample, num_beams=num_beams, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, ) def generate_response(input_text): messages = [ { "role": "system", "content": "You are a helpful coding chatbot. You will answer the user's questions to the best of your ability.", "role": "user", "content": input_text, }, ] return pipe(messages)[0]['generated_text'][-1]['content'].replace("\\n", "\n") # Define the Gradio interface iface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="CodeQwen1.5-7B-Chat", description="A code-generation model from Qwen. Enter a prompt to get code suggestions or completions." ) # Launch the interface iface.launch()