from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image, ImageEnhance, ImageOps, ImageFilter from translate import Translator from IPython.display import Image try: filename = take_photo() print('Saved to {}'.format(filename)) # Show the image which was just taken. display(Image(filename)) except Exception as err: # Errors will be thrown if the user does not have a webcam or if they do not # grant the page permission to access it. print(str(err)) from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image, ImageEnhance, ImageOps, ImageFilter from translate import Translator # 載入BLIP模型和處理器 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") def generate_caption(image, language="中文"): try: # 將圖片轉換為模型可以理解的格式 inputs = processor(image, return_tensors="pt") # 生成更具體的描述,透過設置 prompt prompt = ( "Describe the image in detail, including objects, actions, " "colors, and overall context, to make the description more complete." ) out = model.generate( **inputs, max_length=100, # 增加描述長度 num_beams=7, # 提高生成的多樣性 no_repeat_ngram_size=3, # 降低重複率 temperature=200, # 增加生成描述的靈活性 top_k=50, top_p=0.95 ) caption = processor.decode(out[0], skip_special_tokens=True).strip() # 翻譯為指定語言 if language != "English": # 如果不是英文,才進行翻譯 lang_code_map = { "中文": "zh-tw", "法文": "fr", "德文": "de", "西班牙文": "es", "日文": "ja", "阿拉伯文": "ar" } translator = Translator(to_lang=lang_code_map[language]) caption = translator.translate(caption) return caption except Exception as e: return f"描述生成失敗: {str(e)}" def change_style(image, style): if style == "黑白": image = image.convert("L") elif style == "模糊": image = image.filter(ImageFilter.BLUR) elif style == "銳化": image = image.filter(ImageFilter.SHARPEN) elif style == "邊緣增強": image = image.filter(ImageFilter.EDGE_ENHANCE) elif style == "反轉顏色": image = ImageOps.invert(image.convert("RGB")) elif style == "懷舊": sepia_filter = ImageEnhance.Color(image.convert("RGB")) image = sepia_filter.enhance(0.3) return image def process_image(image, style, language): caption = generate_caption(image, language) styled_image = change_style(image, style) return caption, styled_image # 設定Gradio介面 import gradio as gr interface = gr.Interface( fn=process_image, inputs=[ gr.Image(type="pil", label="上傳圖片或使用攝像頭"), gr.Radio(["原始", "黑白", "模糊", "銳化", "邊緣增強", "反轉顏色", "懷舊"], label="選擇風格"), gr.Radio(["中文", "English", "法文", "德文", "西班牙文", "日文", "阿拉伯文"], label="選擇語言") ], outputs=[ gr.Textbox(label="圖片描述"), gr.Image(type="pil", label="變換畫風後的圖像") ], title="圖片描述與畫風變換(更具體描述)" ) # 啟動介面 interface.launch()