Spaces:

Anshu13
/

Prompt-Engine

Runtime error

App Files Files Community

Anshu13 commited on 17 days ago

Commit

4acb49a

verified ·

1 Parent(s): 8da5be1

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -55

app.py CHANGED Viewed

@@ -7,6 +7,11 @@ from transformers import AutoProcessor, AutoModelForImageTextToText
 processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
 model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
 whisper_model = whisper.load_model("base")
 def build_instruction(user_text):
@@ -14,81 +19,63 @@ def build_instruction(user_text):
 def text_to_prompt(user_text):
     instruction = build_instruction(user_text)
-    inputs = processor(text=instruction, return_tensors="pt")
     input_len = inputs.input_ids.shape[1]
-    output = model.generate(**inputs, max_new_tokens=150)
-    return processor.decode(output[0][input_len:], skip_special_tokens=True)
 def image_text_to_prompt(image_path, user_text):
-    image = Image.open(image_path)
     instruction = build_instruction(user_text)
-    inputs = processor(images=image, text=instruction, return_tensors="pt")
     input_len = inputs.input_ids.shape[1]
-    output = model.generate(**inputs, max_new_tokens=150)
-    return processor.decode(output[0][input_len:], skip_special_tokens=True)
 def audio_to_prompt(audio_path):
     result = whisper_model.transcribe(audio_path)
-    text = result["text"]
-    return text_to_prompt(text)
 def generate_prompt_ui(input_type, text, image, audio):
-    if input_type == "Text":
-        return text_to_prompt(text)
-    elif input_type == "Image + Text":
-        if image is None:
-            return "Please upload an image"
-        return image_text_to_prompt(image, text)
-    elif input_type == "Audio":
-        if audio is None:
-            return "Please upload audio"
-        return audio_to_prompt(audio)
-    return "Invalid input"
 with gr.Blocks() as app:
-    gr.Markdown("# 🧠 AI Prompt Generator")
-    input_type = gr.Radio(
-        ["Text", "Image + Text", "Audio"],
-        label="Select Input Type"
-    )
-    text_input = gr.Textbox(label="Enter your idea/prompt")
-    image_input = gr.Image(type="filepath", label="Upload Image")
-    audio_input = gr.Audio(type="filepath", label="Upload Audio")
     output = gr.Textbox(label="Generated Prompt")
-    generate_btn = gr.Button("Generate Prompt 🚀")
-    def update_inputs(choice):
         return (
-            gr.update(visible=(choice == "Text" or choice == "Image + Text")),
             gr.update(visible=(choice == "Image + Text")),
             gr.update(visible=(choice == "Audio"))
         )
-    input_type.change(
-        fn=update_inputs,
-        inputs=input_type,
-        outputs=[text_input, image_input, audio_input]
-    )
-    generate_btn.click(
-        fn=generate_prompt_ui,
-        inputs=[input_type, text_input, image_input, audio_input],
-        outputs=output
-    )
 app.launch()

 processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
 model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
 whisper_model = whisper.load_model("base")
 def build_instruction(user_text):
 def text_to_prompt(user_text):
     instruction = build_instruction(user_text)
+    inputs = processor(text=instruction, return_tensors="pt").to(device)
     input_len = inputs.input_ids.shape[1]
+    output = model.generate(**inputs, max_new_tokens=200)
+    return processor.decode(output[0][input_len:], skip_special_tokens=True).strip()
 def image_text_to_prompt(image_path, user_text):
+    if not user_text:
+        user_text = "Describe this image in detail."
+    image = Image.open(image_path).convert("RGB")
     instruction = build_instruction(user_text)
+    inputs = processor(images=[image], text=instruction, return_tensors="pt").to(device)
     input_len = inputs.input_ids.shape[1]
+    output = model.generate(**inputs, max_new_tokens=200)
+    return processor.decode(output[0][input_len:], skip_special_tokens=True).strip()
 def audio_to_prompt(audio_path):
     result = whisper_model.transcribe(audio_path)
+    return text_to_prompt(result["text"])
 def generate_prompt_ui(input_type, text, image, audio):
+    try:
+        if input_type == "Text":
+            return text_to_prompt(text)
+        elif input_type == "Image + Text":
+            return image_text_to_prompt(image, text)
+        elif input_type == "Audio":
+            return audio_to_prompt(audio)
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio UI setup
 with gr.Blocks() as app:
+    gr.Markdown("# 🧠 Janus-Pro Prompt Generator")
+    input_type = gr.Radio(["Text", "Image + Text", "Audio"], label="Select Input Type", value="Text")
+    text_input = gr.Textbox(label="Enter your idea")
+    image_input = gr.Image(type="filepath", label="Upload Image", visible=False)
+    audio_input = gr.Audio(type="filepath", label="Upload Audio", visible=False)
     output = gr.Textbox(label="Generated Prompt")
+    btn = gr.Button("Generate 🚀")
+    def toggle(choice):
         return (
+            gr.update(visible=(choice != "Audio")),
             gr.update(visible=(choice == "Image + Text")),
             gr.update(visible=(choice == "Audio"))
         )
+    input_type.change(toggle, input_type, [text_input, image_input, audio_input])
+    btn.click(generate_prompt_ui, [input_type, text_input, image_input, audio_input], output)
 app.launch()