CogwiseAI falcon7b

# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# model = AutoModelForCausalLM.from_pretrained(
#     "Cogwisechat/falcon-7b-finance",
#     torch_dtype=torch.bfloat16,
#     trust_remote_code=True,
#     device_map="auto",
#     low_cpu_mem_usage=True,
# )
# tokenizer = AutoTokenizer.from_pretrained("Cogwisechat/falcon-7b-finance")


# def generate_text(input_text):
#     global output_text

#     input_ids = tokenizer.encode(input_text, return_tensors="pt")
#     attention_mask = torch.ones(input_ids.shape)

#     output = model.generate(
#         input_ids,
#         attention_mask=attention_mask,
#         max_length=200,
#         do_sample=True,
#         top_k=10,
#         num_return_sequences=1,
#         eos_token_id=tokenizer.eos_token_id,
#     )
    

#     output_text = tokenizer.decode(output[0], skip_special_tokens=True)
#     print(output_text)

#     # Remove Prompt Echo from Generated Text
    
#     cleaned_output_text = output_text.replace(input_text, "")
#     return  cleaned_output_text

# block = gr.Blocks()


# with block:
#     gr.Markdown("""<h1><center>CogwiseAI falcon7b</center></h1>
#     """)
#     # chatbot = gr.Chatbot()
#     message = gr.Textbox(placeholder='Enter Your Question Here')
#     state = gr.State()
#     submit = gr.Button("SEND")
#     submit.click(generate_text, inputs=[message, state], outputs=[output_text, state])

# block.launch(debug = True)


# # logo = (
# #             "<div >"
# #             "<img  src='ai-icon.png'alt='image One'>"
# #             + "</div>"
# #     )
# # text_generation_interface = gr.Interface(
# #     fn=generate_text,
# #     inputs=[
# #         gr.inputs.Textbox(label="Input Text"),
# #     ],
# #     outputs=gr.inputs.Textbox(label="Generated Text"),
# #     title="Falcon-7B Instruct",
# #     image=logo
# # ).launch()


from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch


title = "🦅Falcon 🗨️ChatBot"
description = "Falcon-RW-1B is a 1B parameters causal decoder-only model built by TII and trained on 350B tokens of RefinedWeb."
examples = [["How are you?"]]


tokenizer = AutoTokenizer.from_pretrained("Cogwisechat/falcon-7b-finance")
model = AutoModelForCausalLM.from_pretrained(
    "Cogwisechat/falcon-7b-finance",
    trust_remote_code=True,
)


def predict(input, history=[]):
    # tokenize the new input sentence
    new_user_input_ids = tokenizer.encode(
        input + tokenizer.eos_token, return_tensors="pt"
    )

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)

    # generate a response
    history = model.generate(
        bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
    ).tolist()

    # convert the tokens to text, and then split the responses into lines
    response = tokenizer.decode(history[0]).split("<|endoftext|>")
    # print('decoded_response-->>'+str(response))
    response = [
        (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)
    ]  # convert to tuples of list
    # print('response-->>'+str(response))
    return response, history


gr.Interface(
    fn=predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "state"],
    outputs=["chatbot", "state"],
    theme="finlaymacklon/boxy_violet",
).launch()