import gradio as gr # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import transformers import torch from huggingface_hub import login from langchain.llms import HuggingFacePipeline from transformers import AutoTokenizer, AutoModelForCausalLM # login(token=token) def greet(name): return str(int(name)+10) tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b") model = AutoModelForCausalLM.from_pretrained("google/gemma-7b") input_text = "Write me a poem about Machine Learning." input_ids = tokenizer(input_text, return_tensors="pt") outputs = model.generate(**input_ids) print(tokenizer.decode(outputs[0])) # Load model directly # Load model directly # from transformers import AutoTokenizer, AutoModelForCausalLM # tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") # model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") # model = "meta-llama/Llama-2-13b-chat-hf" # meta-llama/Llama-2-7b-hf # # tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True) # pipe = pipeline( # "text2text-generation", # model=model, # tokenizer=tokenizer, # max_length=512, # temperature=0.5, # top_p=0.95, # repetition_penalty=1.15 # ) # local_llm = HuggingFacePipeline(pipeline=pipe) # def get_llama_response(prompt: str) -> None: # """ # Generate a response from the Llama model. # Parameters: # prompt (str): The user's input/question for the model. # Returns: # None: Prints the model's response. # """ # sequences = llama_pipeline( # prompt, # do_sample=True, # top_k=10, # num_return_sequences=1, # eos_token_id=tokenizer.eos_token_id, # max_length=256, # truncation=True # ) # print("Chatbot:", sequences[0]['generated_text']) # prompt = 'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n' # get_llama_response(prompt) print('hhh') iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()