fffiloni commited on
Commit
53b4fd0
1 Parent(s): 0b14a4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -0
app.py CHANGED
@@ -81,7 +81,15 @@ def extract_frames(video_in, interval=24, output_format='.jpg'):
81
 
82
  return frames
83
 
 
 
 
 
 
 
 
84
  def process_image(image_in):
 
85
  client = Client("https://vikhyatk-moondream1.hf.space/")
86
  result = client.predict(
87
  image_in, # filepath in 'image' Image component
@@ -91,6 +99,12 @@ def process_image(image_in):
91
  )
92
  print(result)
93
  return result
 
 
 
 
 
 
94
 
95
  def extract_audio(video_path):
96
  video_clip = VideoFileClip(video_path)
 
81
 
82
  return frames
83
 
84
+ from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer
85
+ from PIL import Image
86
+
87
+ cap_model_id = "vikhyatk/moondream1"
88
+ cap_model = AutoModelForCausalLM.from_pretrained(cap_model_id, trust_remote_code=True)
89
+ cap_tokenizer = Tokenizer.from_pretrained(cap_model_id)
90
+
91
  def process_image(image_in):
92
+ '''
93
  client = Client("https://vikhyatk-moondream1.hf.space/")
94
  result = client.predict(
95
  image_in, # filepath in 'image' Image component
 
99
  )
100
  print(result)
101
  return result
102
+ '''
103
+ image = Image.open(image_in)
104
+ enc_image = cap_model.encode_image(image)
105
+ result = cap_model.answer_question(enc_image, "Describe precisely the image in one sentence.", cap_tokenizer)
106
+ print(result)
107
+ return result
108
 
109
  def extract_audio(video_path):
110
  video_clip = VideoFileClip(video_path)