# !pip install -q -U git+https://github.com/huggingface/accelerate.git
# !pip install -q -U bitsandbytes
# !pip install -q -U git+https://github.com/huggingface/transformers.git

model_id = 'Huma97/main'

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TextStreamer

# model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')
# tokenizer = AutoTokenizer.from_pretrained(model_id ,torch_dtype="auto")

# #Llama 2 Inference
# def stream(user_prompt):
#     system_prompt = 'You are a helpful assistant that provides accurate and concise responses'

#     B_INST, E_INST = "[INST]", "[/INST]"
#     B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

#     prompt = f"{B_INST} {B_SYS}{system_prompt.strip()}{E_SYS}{user_prompt.strip()} {E_INST}\n\n"

#     inputs = tokenizer([prompt], return_tensors="pt").to("cuda:0")

#     streamer = TextStreamer(tokenizer)

#     # Despite returning the usual output, the streamer will also print the generated text to stdout.
#     _ = model.generate(**inputs, streamer=streamer, max_new_tokens=500)


import streamlit as st


# st.write(stream('Count to ten'))

# Load pre-trained LLaMA 2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(model_id ,torch_dtype="auto")

# Create a Streamlit text input field
input_text = st.text_input("Enter your prompt:")

# Define a function to generate text using LLaMA 2
def generate_text(input_text):
    inputs = tokenizer.encode(input_text, return_tensors="pt")
    outputs = model.generate(inputs, max_length=250, num_return_sequences=1)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Create a Streamlit button to trigger text generation
if st.button("Generate Text"):
    output_text = generate_text(input_text)
    st.write(output_text)