import gradio as gr from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer from PIL import Image import gc from pympler import asizeof # Function to clear model from memory def clear_model(model): del model gc.collect() def process_image_and_question(image, question): # Placeholder for your image processing and question answering # Replace this with your actual model processing # For example: # enc_image = model.encode_image(image) # answer = model.answer_question(enc_image, question, tokenizer) # return answer FinalOutput = "" model_id = "vikhyatk/moondream1" if question == "": question = "describe this image?" # Check if the model is already loaded try: model except NameError: # clear_model(model) # Example of clearing the model model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True) tokenizer = Tokenizer.from_pretrained(model_id) # Assuming you have a correct way to process the image #image = Image.open('/content/_57e22ed5-217c-4004-a279-eeecc18cbd55.jpg') #/content/Bard_Generated_Image (3).jpg') # This part of the code is incorrect for a standard transformers model enc_image = model.encode_image(image) FinalOutput += model.answer_question(enc_image, "how many people are there? also explain if the image is weird?", tokenizer) model_size = asizeof.asizeof(model) tokenizer_size = asizeof.asizeof(tokenizer) FinalOutput += f"\nModel size in RAM: {model_size} bytes, Tokenizer size in RAM: {tokenizer_size} bytes" #model load and set-up = 1 min and inference on CPU = 2 min return FinalOutput # Define Gradio interface iface = gr.Interface(fn=process_image_and_question, inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Ask a question about the image...")], outputs="text", title="Image Question Answering", description="Upload an image and ask a question about it. ( 2 - 3 min response time expected )") # Launch the interface iface.launch()