Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,9 @@ from PIL import Image
|
|
9 |
device = 'cpu'
|
10 |
|
11 |
# Load the pretrained model, feature extractor, and tokenizer
|
12 |
-
model = VisionEncoderDecoderModel.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-
|
13 |
-
feature_extractor = ViTImageProcessor.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-
|
14 |
-
tokenizer = AutoTokenizer.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-
|
15 |
|
16 |
def predict(image, max_length=64, num_beams=4):
|
17 |
# Process the input image
|
@@ -25,29 +25,50 @@ def predict(image, max_length=64, num_beams=4):
|
|
25 |
caption = tokenizer.decode(caption_ids, skip_special_tokens=True)
|
26 |
return caption
|
27 |
|
28 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# Create a directory to extract images
|
30 |
extract_dir = 'extracted_images'
|
31 |
os.makedirs(extract_dir, exist_ok=True)
|
32 |
-
|
33 |
-
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
|
34 |
-
zip_ref.extractall(extract_dir)
|
35 |
|
36 |
-
# Verify extracted files and process images
|
37 |
captions = []
|
38 |
-
for
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Save the results to a CSV file
|
50 |
-
csv_file_path = '
|
51 |
with open(csv_file_path, mode='w', newline='') as file:
|
52 |
writer = csv.writer(file)
|
53 |
writer.writerow(['Image Name', 'Caption'])
|
@@ -55,9 +76,15 @@ def process_zip_file(zip_file_path):
|
|
55 |
|
56 |
return csv_file_path
|
57 |
|
58 |
-
def
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
css = '''
|
63 |
h1#title {
|
@@ -88,11 +115,12 @@ with demo:
|
|
88 |
|
89 |
with gr.Row():
|
90 |
with gr.Column(scale=1):
|
91 |
-
input_zip = gr.File(label="Upload
|
|
|
92 |
with gr.Column(scale=3):
|
93 |
output_file = gr.File(label="Download Caption File")
|
94 |
|
95 |
btn = gr.Button("Generate Captions")
|
96 |
-
btn.click(fn=
|
97 |
|
98 |
demo.launch()
|
|
|
9 |
device = 'cpu'
|
10 |
|
11 |
# Load the pretrained model, feature extractor, and tokenizer
|
12 |
+
model = VisionEncoderDecoderModel.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-02").to(device)
|
13 |
+
feature_extractor = ViTImageProcessor.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-02")
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-02")
|
15 |
|
16 |
def predict(image, max_length=64, num_beams=4):
|
17 |
# Process the input image
|
|
|
25 |
caption = tokenizer.decode(caption_ids, skip_special_tokens=True)
|
26 |
return caption
|
27 |
|
28 |
+
def process_images(image_files):
|
29 |
+
captions = []
|
30 |
+
for image_file in image_files:
|
31 |
+
try:
|
32 |
+
# Open and verify the image
|
33 |
+
with Image.open(image_file) as img:
|
34 |
+
caption = predict(img)
|
35 |
+
captions.append((os.path.basename(image_file), caption))
|
36 |
+
except Exception as e:
|
37 |
+
print(f"Skipping file {image_file}: {e}")
|
38 |
+
|
39 |
+
# Save the results to a CSV file
|
40 |
+
csv_file_path = 'image_captions.csv'
|
41 |
+
with open(csv_file_path, mode='w', newline='') as file:
|
42 |
+
writer = csv.writer(file)
|
43 |
+
writer.writerow(['Image Name', 'Caption'])
|
44 |
+
writer.writerows(captions)
|
45 |
+
|
46 |
+
return csv_file_path
|
47 |
+
|
48 |
+
def process_zip_files(zip_file_paths):
|
49 |
# Create a directory to extract images
|
50 |
extract_dir = 'extracted_images'
|
51 |
os.makedirs(extract_dir, exist_ok=True)
|
|
|
|
|
|
|
52 |
|
|
|
53 |
captions = []
|
54 |
+
for zip_file_path in zip_file_paths:
|
55 |
+
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
|
56 |
+
zip_ref.extractall(extract_dir)
|
57 |
+
|
58 |
+
# Verify extracted files and process images
|
59 |
+
for root, dirs, files in os.walk(extract_dir):
|
60 |
+
for file in files:
|
61 |
+
file_path = os.path.join(root, file)
|
62 |
+
try:
|
63 |
+
# Open and verify the image
|
64 |
+
with Image.open(file_path) as img:
|
65 |
+
caption = predict(img)
|
66 |
+
captions.append((file, caption))
|
67 |
+
except Exception as e:
|
68 |
+
print(f"Skipping file {file}: {e}")
|
69 |
|
70 |
# Save the results to a CSV file
|
71 |
+
csv_file_path = 'zip_image_captions.csv'
|
72 |
with open(csv_file_path, mode='w', newline='') as file:
|
73 |
writer = csv.writer(file)
|
74 |
writer.writerow(['Image Name', 'Caption'])
|
|
|
76 |
|
77 |
return csv_file_path
|
78 |
|
79 |
+
def gr_process(zip_files, image_files):
|
80 |
+
if zip_files:
|
81 |
+
zip_file_paths = [zip_file.name for zip_file in zip_files]
|
82 |
+
return process_zip_files(zip_file_paths)
|
83 |
+
elif image_files:
|
84 |
+
image_file_paths = [image_file.name for image_file in image_files]
|
85 |
+
return process_images(image_file_paths)
|
86 |
+
else:
|
87 |
+
return None
|
88 |
|
89 |
css = '''
|
90 |
h1#title {
|
|
|
115 |
|
116 |
with gr.Row():
|
117 |
with gr.Column(scale=1):
|
118 |
+
input_zip = gr.File(label="Upload Zip Files", type="filepath", file_count="multiple")
|
119 |
+
input_images = gr.File(label="Upload Images", type="filepath", file_count="multiple")
|
120 |
with gr.Column(scale=3):
|
121 |
output_file = gr.File(label="Download Caption File")
|
122 |
|
123 |
btn = gr.Button("Generate Captions")
|
124 |
+
btn.click(fn=gr_process, inputs=[input_zip, input_images], outputs=output_file)
|
125 |
|
126 |
demo.launch()
|