Desm0nt commited on
Commit
e237273
1 Parent(s): b194f04

Upload 2 files

Browse files
Files changed (2) hide show
  1. LICENSE +21 -0
  2. phi_captioning_example.py +86 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) Microsoft Corporation.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE
phi_captioning_example.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
3
+ from swift.tuners import Swift #chinese toolkit for finetunin and inference
4
+
5
+
6
+ from swift.llm import (
7
+ get_model_tokenizer, get_template, inference, ModelType,
8
+ get_default_template_type, inference_stream
9
+ )
10
+ from swift.utils import seed_everything
11
+ import torch
12
+ from tqdm import tqdm
13
+ import time
14
+
15
+ model_type = ModelType.phi3_vision_128k_instruct # model type
16
+ template_type = get_default_template_type(model_type)
17
+ print(f'template_type: {template_type}')
18
+
19
+ model_path = "./phi3-1476" # by default it is the lora path, not sure if it works the same way with merged checkpoint
20
+ model, tokenizer = get_model_tokenizer(model_type, torch.bfloat16, model_kwargs={'device_map': 'auto'})
21
+ model.generation_config.max_new_tokens = 1256 #generation params. As for me - defaults with do_sample=False works better than anything.
22
+ model.generation_config.do_sample = False
23
+ #model.generation_config.top_p = 0.7
24
+ #model.generation_config.temperature = 0.3
25
+ model = Swift.from_pretrained(model, model_path, "lora", inference_mode=True)
26
+ template = get_template(template_type, tokenizer)
27
+ #seed_everything(6321)
28
+
29
+ text = 'Make a caption that describe this image'
30
+ image_dir = './images/' # path to images
31
+ txt_dir = './tags/' # path to txt files with tags (from danbooru or from WD_Tagger)
32
+ maintxt_dir = './maintxt/' # path for result txt caprtions in natureal language
33
+
34
+ # image parsing
35
+ image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
36
+
37
+ total_files = len(image_files)
38
+ start_time = time.time()
39
+
40
+ progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
41
+ total_elapsed_time = 0
42
+ processed_files = 0
43
+
44
+ # Main captioning cycle
45
+ for image_file in image_files:
46
+ image_path = os.path.join(image_dir, image_file)
47
+ if os.path.exists(image_path):
48
+ txt_file = os.path.splitext(image_file)[0] + '.txt'
49
+ txt_path = os.path.join(txt_dir, txt_file)
50
+
51
+ if os.path.exists(txt_path):
52
+ with open(txt_path, 'r', encoding='utf-8') as f:
53
+ tags = f.read().strip()
54
+
55
+ text = f'<img>{image_path}</img> Make a caption that describe this image. Here is the tags describing image: {tags}\n Find the relevant character\'s names in the tags and use it.'
56
+ print(text)
57
+ step_start_time = time.time()
58
+ response, history = inference(model, template, text, do_sample=True, temperature=0, repetition_penalty=1.05)
59
+ step_end_time = time.time()
60
+ step_time = step_end_time - step_start_time
61
+ total_elapsed_time += step_time
62
+ remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files)
63
+
64
+ remaining_hours = int(remaining_time // 3600)
65
+ remaining_minutes = int((remaining_time % 3600) // 60)
66
+ remaining_seconds = int(remaining_time % 60)
67
+
68
+ progress_bar.set_postfix(remaining=f'\n', refresh=False)
69
+ print(f"\n\n\nFile {image_file}\nConsumed time: {step_time:.2f} s\n{response}")
70
+
71
+ # Создаем имя файла для сохранения ответа
72
+ output_file = os.path.splitext(image_file)[0] + '.txt'
73
+ output_path = os.path.join(maintxt_dir, output_file)
74
+
75
+ # Записываем ответ в файл
76
+ with open(output_path, 'w', encoding='utf-8') as f:
77
+ f.write(response)
78
+
79
+ print(f"Caption saved in file: {output_file} \n")
80
+ processed_files += 1
81
+ progress_bar.update(1)
82
+ else:
83
+ print(f"File {txt_file} doesn't exist.")
84
+ else:
85
+ print(f"Image {image_file} not found.")
86
+ progress_bar.close()