sunny-annie commited on
Commit
e24264c
1 Parent(s): fab7731

Upload 4 files

Browse files
feature_extractor_v3.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de31b46e55b824d28a5daab95de36f12f7cee1600bda97c7496433415c425c0
3
+ size 361
img-2-txt.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from joblib import load
4
+ from PIL import Image
5
+ from transformers import VisionEncoderDecoderModel
6
+
7
+ device = 'cpu'
8
+
9
+ # tokenizer = load("./pages/tokenizer_v3.joblib")
10
+ # feature_extractor = load("./pages/feature_extractor_v3.joblib")
11
+ tokenizer = load("tokenizer_v3.joblib")
12
+ feature_extractor = load("feature_extractor_v3.joblib")
13
+
14
+ model = VisionEncoderDecoderModel.from_pretrained("dumperize/movie-picture-captioning")
15
+ # model = load("model_img2txt_v3.joblib")
16
+ model.load_state_dict(torch.load("model_weights_i2t_fin.pt", map_location=torch.device('cpu')))
17
+ # model.eval()
18
+
19
+ max_length = 512
20
+ min_length = 32
21
+ num_beams = 7
22
+ gen_kwargs = {"max_length": max_length, "min_length": min_length, "num_beams": num_beams}
23
+
24
+
25
+
26
+ uploaded_file = st.file_uploader("Выберите изображение обложки книги в формате jpeg или jpg...", type=["jpg", "jpeg"])
27
+ if uploaded_file is not None:
28
+ image = Image.open(uploaded_file)
29
+ st.image(image, caption='Загруженное изображение')
30
+ image = image.resize([224,224])
31
+ if image.mode != "RGB":
32
+ image = image.convert(mode="RGB")
33
+
34
+ pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values
35
+ pixel_values = pixel_values.to(device)
36
+
37
+ output_ids = model.generate(pixel_values, **gen_kwargs)
38
+
39
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
40
+ preds = [pred.strip() for pred in preds]
41
+ st.write(preds[0])
42
+
43
+
44
+ # image = Image.open(image_path)
45
+ # image = image.resize([224,224])
46
+ # if image.mode != "RGB":
47
+ # image = image.convert(mode="RGB")
48
+
49
+ # pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values
50
+ # pixel_values = pixel_values.to(device)
51
+
52
+ # output_ids = model.generate(pixel_values, **gen_kwargs)
53
+
54
+ # preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
55
+ # print([pred.strip() for pred in preds])
model_weights_i2t_fin.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6753b600e5d753ad7420a92aa4dcd3f5d860f8e9b5c933f059144e4e579d938e
3
+ size 1171154606
tokenizer_v3.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc0b03a6f6232c82c5d9afa89f8d53f71d1b324e9b3845869d4871fa0ebe87d
3
+ size 2617590