EduTechTeam commited on
Commit
c481ee6
·
verified ·
1 Parent(s): 1db4769

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BlipProcessor, BlipForConditionalGeneration
2
+ from PIL import Image, ImageEnhance, ImageOps, ImageFilter
3
+ from translate import Translator
4
+
5
+
6
+ from IPython.display import Image
7
+ try:
8
+ filename = take_photo()
9
+ print('Saved to {}'.format(filename))
10
+
11
+ # Show the image which was just taken.
12
+ display(Image(filename))
13
+ except Exception as err:
14
+ # Errors will be thrown if the user does not have a webcam or if they do not
15
+ # grant the page permission to access it.
16
+ print(str(err))
17
+
18
+ from transformers import BlipProcessor, BlipForConditionalGeneration
19
+ from PIL import Image, ImageEnhance, ImageOps, ImageFilter
20
+ from translate import Translator
21
+
22
+ # 載入BLIP模型和處理器
23
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
24
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
25
+
26
+ def generate_caption(image, language="中文"):
27
+ try:
28
+ # 將圖片轉換為模型可以理解的格式
29
+ inputs = processor(image, return_tensors="pt")
30
+
31
+ # 生成更具體的描述,透過設置 prompt
32
+ prompt = (
33
+ "Describe the image in detail, including objects, actions, "
34
+ "colors, and overall context, to make the description more complete."
35
+ )
36
+ out = model.generate(
37
+ **inputs,
38
+ max_length=100, # 增加描述長度
39
+ num_beams=7, # 提高生成的多樣性
40
+ no_repeat_ngram_size=3, # 降低重複率
41
+ temperature=200, # 增加生成描述的靈活性
42
+ top_k=50,
43
+ top_p=0.95
44
+ )
45
+ caption = processor.decode(out[0], skip_special_tokens=True).strip()
46
+
47
+ # 翻譯為指定語言
48
+ if language != "English": # 如果不是英文,才進行翻譯
49
+ lang_code_map = {
50
+ "中文": "zh-tw",
51
+ "法文": "fr",
52
+ "德文": "de",
53
+ "西班牙文": "es",
54
+ "日文": "ja",
55
+ "阿拉伯文": "ar"
56
+ }
57
+ translator = Translator(to_lang=lang_code_map[language])
58
+ caption = translator.translate(caption)
59
+
60
+ return caption
61
+ except Exception as e:
62
+ return f"描述生成失敗: {str(e)}"
63
+
64
+ def change_style(image, style):
65
+ if style == "黑白":
66
+ image = image.convert("L")
67
+ elif style == "模糊":
68
+ image = image.filter(ImageFilter.BLUR)
69
+ elif style == "銳化":
70
+ image = image.filter(ImageFilter.SHARPEN)
71
+ elif style == "邊緣增強":
72
+ image = image.filter(ImageFilter.EDGE_ENHANCE)
73
+ elif style == "反轉顏色":
74
+ image = ImageOps.invert(image.convert("RGB"))
75
+ elif style == "懷舊":
76
+ sepia_filter = ImageEnhance.Color(image.convert("RGB"))
77
+ image = sepia_filter.enhance(0.3)
78
+ return image
79
+
80
+ def process_image(image, style, language):
81
+ caption = generate_caption(image, language)
82
+ styled_image = change_style(image, style)
83
+ return caption, styled_image
84
+
85
+ # 設定Gradio介面
86
+ import gradio as gr
87
+ interface = gr.Interface(
88
+ fn=process_image,
89
+ inputs=[
90
+ gr.Image(type="pil", label="上傳圖片或使用攝像頭"),
91
+ gr.Radio(["原始", "黑白", "模糊", "銳化", "邊緣增強", "反轉顏色", "懷舊"], label="選擇風格"),
92
+ gr.Radio(["中文", "English", "法文", "德文", "西班牙文", "日文", "阿拉伯文"], label="選擇語言")
93
+ ],
94
+ outputs=[
95
+ gr.Textbox(label="圖片描述"),
96
+ gr.Image(type="pil", label="變換畫風後的圖像")
97
+ ],
98
+ title="圖片描述與畫風變換(更具體描述)"
99
+ )
100
+
101
+ # 啟動介面
102
+ interface.launch()