from transformers import pipeline from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr from nltk.tokenize import sent_tokenize import torch model = "janny127/autotrain-5e45b-p5z66" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = pipeline( "text-generation", model=model, torch_dtype=torch.float16, device_map="auto", ) def predict(prompt, history): # Prompt formatted_prompt = ( f"### Human: {prompt}### Assistant:" ) # Generate the Texts sequences = pipeline( formatted_prompt, do_sample=True, top_k=50, top_p = 0.7, num_return_sequences=1, repetition_penalty=1.1, max_new_tokens=500, ) generated_text = sequences[0]['generated_text'] final_result = generated_text.split("### Assistant:")[1] if " Human: " in final_result: final_result = final_result.split(" Human: ")[0] if " #" in final_result: final_result = final_result.split(" #")[0] # return generated_text.strip() return final_result.strip() gr.ChatInterface(predict, title="Tinyllama_chatBot", description="Ask Tiny llama any questions", examples=['How to cook a fish?', 'Who is the president of US now?'] ).launch() # Launching the web interface.