Llama_chat / app.py
abdulmalek9's picture
Update app.py
74d1991 verified
import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import transformers
import torch
from huggingface_hub import login
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
# login(token=token)
def greet(name):
return str(int(name)+10)
tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b")
input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))
# Load model directly
# Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
# model = "meta-llama/Llama-2-13b-chat-hf" # meta-llama/Llama-2-7b-hf
#
# tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)
# pipe = pipeline(
# "text2text-generation",
# model=model,
# tokenizer=tokenizer,
# max_length=512,
# temperature=0.5,
# top_p=0.95,
# repetition_penalty=1.15
# )
# local_llm = HuggingFacePipeline(pipeline=pipe)
# def get_llama_response(prompt: str) -> None:
# """
# Generate a response from the Llama model.
# Parameters:
# prompt (str): The user's input/question for the model.
# Returns:
# None: Prints the model's response.
# """
# sequences = llama_pipeline(
# prompt,
# do_sample=True,
# top_k=10,
# num_return_sequences=1,
# eos_token_id=tokenizer.eos_token_id,
# max_length=256,
# truncation=True
# )
# print("Chatbot:", sequences[0]['generated_text'])
# prompt = 'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n'
# get_llama_response(prompt)
print('hhh')
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()