Spaces:

annie08
/

nitish-caption-generator-transformer-for-vision-language

Runtime error

App Files Files Community

nitishhrms commited on 19 days ago

Commit

1ca801f

•

1 Parent(s): aa97573

new space

Browse files

Files changed (3) hide show

app.py +41 -0
model_folder/pytorch_model.bin +3 -0
model_folder/special_tokens_map.json +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from transformers import AutoProcessor, AutoModelForCausalLM
+from PIL import Image
+import gradio as gr
+# Step 1: Load the processor from Hugging Face
+processor = AutoProcessor.from_pretrained("microsoft/git-large-textcaps")
+# Step 2: Load the model architecture from Hugging Face
+model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-textcaps")  # Load model structure
+# Step 3: Load your custom PyTorch weights
+custom_weights_path = "model_folder/pytorch_model.bin"  # Path to your custom weights
+model.load_state_dict(torch.load(custom_weights_path, map_location=torch.device("cpu")))  # Load custom weights
+model.eval()  # Set the model to evaluation mode
+# Step 4: Define the caption generation function
+def generate_caption(image):
+    # Convert the input image to PIL format (if necessary)
+    image = Image.fromarray(image)
+    # Preprocess the image using the processor
+    inputs = processor(images=image, return_tensors="pt")
+    pixel_values = inputs.pixel_values
+    # Generate caption
+    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_caption
+# Step 5: Define the Gradio interface
+interface = gr.Interface(
+    fn=generate_caption,  # Function to process input
+    inputs=gr.Image(),    # Input as image
+    outputs=gr.Textbox(), # Output as text
+    live=True             # Enable live prediction
+)
+# Step 6: Launch the Gradio app
+interface.launch()

model_folder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74f4b3b944f2a3e17c46bf4a028fb4a652b267c12618ac74b8aca20e14919992
+size 989827505

model_folder/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}