Spaces:

VinitT
/

StoryGeneraterFromImages

Sleeping

App Files Files Community

VinitT commited on Sep 17, 2024

Commit

6255a7a

verified ·

1 Parent(s): fbe9130

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -12

app.py CHANGED Viewed

@@ -1,22 +1,20 @@
 import streamlit as st
-from transformers import AutoProcessor, Qwen2VLForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
 from PIL import Image
 import torch
 import cv2
 import tempfile
 # Load the processor and model directly
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
-# Load Meta-Llama model and tokenizer for story generation
-llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
-llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
 # Check if CUDA is available and set the device accordingly
-device = torch.device("cpu")
 model.to(device)
-llama_model.to(device)
 # Streamlit app
 st.title("Media Description Generator")
@@ -96,7 +94,7 @@ if uploaded_files:
             inputs = inputs.to(device)  # Ensure inputs are on the same device as the model
             # Inference: Generation of the output
-            generated_ids = model.generate(**inputs, max_new_tokens=128)
             generated_ids_trimmed = [
                 out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
             ]
@@ -121,10 +119,23 @@ if uploaded_files:
         # Create a custom prompt
         custom_prompt = f"Based on the following descriptions, create a short story:\n\n{combined_text}\n\nStory:"
-        # Generate a story using Meta-Llama
-        inputs = llama_tokenizer.encode(custom_prompt, return_tensors="pt").to(device)
-        story_ids = llama_model.generate(inputs, max_length=500, num_return_sequences=1)
-        story = llama_tokenizer.decode(story_ids[0], skip_special_tokens=True)
         # Display the generated story
         st.write("Generated Story:")

 import streamlit as st
+from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 from PIL import Image
 import torch
 import cv2
 import tempfile
+from langchain import LLMChain, PromptTemplate
+from langchain_community.llms import Ollama
+from langchain_core.output_parsers import StrOutputParser
 # Load the processor and model directly
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 # Check if CUDA is available and set the device accordingly
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Streamlit app
 st.title("Media Description Generator")
             inputs = inputs.to(device)  # Ensure inputs are on the same device as the model
             # Inference: Generation of the output
+            generated_ids = model.generate(**inputs, max_new_tokens=512)
             generated_ids_trimmed = [
                 out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
             ]
         # Create a custom prompt
         custom_prompt = f"Based on the following descriptions, create a short story:\n\n{combined_text}\n\nStory:"
+        # Define the prompt template for LangChain
+        prompt_template = PromptTemplate(
+            input_variables=["descriptions"],
+            template="Based on the following descriptions, create a short story:\n\n{descriptions}\n\nStory:"
+        )
+        # Create the LLMChain with the Ollama model
+        ollama_llm = Ollama(model="llama3.1")
+        output_parser = StrOutputParser()
+        chain = LLMChain(
+            llm=ollama_llm,
+            prompt=prompt_template,
+            output_parser=output_parser
+        )
+        # Generate the story using LangChain
+        story = chain.run({"descriptions": combined_text})
         # Display the generated story
         st.write("Generated Story:")