Spaces:

anhartit-chetan
/

palm-reader

Sleeping

File size: 2,117 Bytes

6151483
289622b
b6863da
 
 
2ce26d8
e594f31
de423bd
 
9ea2744
b6863da
9ea2744
289622b
 
 
 
 
 
b6863da
2ce26d8
 
 
b6863da
2ce26d8
 
289622b
b519502
 
a19d519
 
6b849c6
b519502
a19d519
 
 
 
6b849c6
 
b6863da
2ce26d8
a19d519
 
2ce26d8

import gradio as gr
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import torch

# Load model & processor
# model_id = "llava-hf/llava-1.5-7b-hf" # Exceeding 16 GB Memory
# model_id = "llava-hf/llava-1.5-7b-hf-int4" #Does not exists
model_id = "bczhou/tiny-llava-v1-hf"

processor = AutoProcessor.from_pretrained(model_id)
processor.patch_size = 14  # ✅ Fix: set manually
model = LlavaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    low_cpu_mem_usage=True,
    device_map="auto",
)


# Main prediction function
def analyze_palm(image, question, history):
    if image is None or not question.strip():
        history.append((question, "Please provide both image and question."))
        return history, ""

    # Add <image> token explicitly in the text
    prompt = f"<image>\n{question}"
    print("Processing image and prompt")

    # Manual prompt construction (works for TinyLLaVA)
    inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
    print("Inputs encoded")

    outputs = model.generate(**inputs, max_new_tokens=256)
    print("Output generated")

    response = processor.decode(outputs[0], skip_special_tokens=True)

    history.append((question, response))
    print("Returning response")

    return history, ""


# Build UI using Blocks
with gr.Blocks() as demo:
    gr.Markdown("## 🖐️ AI Palm Reader\nUpload a palm image and ask a question. Get a palmistry-style response.")

    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(type="pil", label="Palm Image")
            prompt_input = gr.Textbox(lines=2, label="Your Question", placeholder="What does my palm say?")
            submit_btn = gr.Button("Ask")

        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Palmistry Chat")

    state = gr.State([])

    submit_btn.click(
        fn=analyze_palm,
        inputs=[image_input, prompt_input, state],
        outputs=[chatbot, prompt_input]
    )

demo.launch()