chanhua commited on
Commit
ef3d2c0
1 Parent(s): fd24d5c
Files changed (3) hide show
  1. app.py +39 -0
  2. requirements.txt +6 -0
  3. testMch.py +118 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import testMch as func
3
+
4
+
5
+ def work(imgurl):
6
+ # output = testMch.describeImage2(img)
7
+ # # output = testMch.agent(img)
8
+ # # output = testMch.agent(img)
9
+ # return output
10
+ # imgurl = "r" + imgurl
11
+ # imgurl = imgurl.replace("\\\\\\", "\\")
12
+ # input = "Describe the following image:\n" + imgurl
13
+ out = func.agent(f"Please describe the following image:\n{imgurl}")
14
+ anws = func.toChinese(out['output'])
15
+ return anws
16
+
17
+
18
+
19
+
20
+ # with gr.Blocks() as demo:
21
+ # image_url = gr.Image(type="filepath", label="请选择一张图片")
22
+ # input = gr.Textbox(label='请描述您的问题', placeholder="", lines=1)
23
+ # output = gr.Textbox(label='答案', placeholder="", lines=2, interactive=False)
24
+ # submit = gr.Button('提问', variant="primary")
25
+ # submit.click(work, inputs=[image_url, input], outputs=output)
26
+ # demo.launch()
27
+
28
+ demo = gr.Interface(title="识别图片",
29
+ css="",
30
+ fn=work,
31
+ inputs=[gr.Image(type="filepath", label="请上传图片")],
32
+ outputs=[gr.Textbox(lines=3, label="识别结果")])
33
+ #
34
+ # # demo = gr.Interface(fn=work, inputs="image,text", outputs="text")
35
+ #
36
+ demo.launch()
37
+
38
+ # # interface = gr.Interface(fn=agent, inputs="image", outputs="text")
39
+ # # interface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ langchain
4
+ openai
5
+ gradio
6
+ pillow
testMch.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from langchain.agents import load_tools
2
+ import requests
3
+ from PIL import Image
4
+ from langchain.agents import initialize_agent
5
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
6
+ # from langchain.agents import AgentType
7
+ # from langchain.llms import OpenAI
8
+ # from langchain.chat_models import ChatOpenAI
9
+ from langchain.chat_models import AzureChatOpenAI
10
+ from langchain.tools import BaseTool
11
+ # from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
12
+ import os
13
+ from transformers import BlipProcessor, BlipForConditionalGeneration
14
+ from langchain import PromptTemplate, FewShotPromptTemplate
15
+ from langchain.chains import LLMChain
16
+
17
+ OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
18
+ OPENAI_API_BASE = os.environ['OPENAI_API_BASE']
19
+ DEPLOYMENT_NAME = os.environ['DEPLOYMENT_NAME']
20
+
21
+ llm = AzureChatOpenAI(deployment_name=DEPLOYMENT_NAME, openai_api_base=OPENAI_API_BASE,
22
+ openai_api_key=OPENAI_API_KEY, openai_api_version="2023-03-15-preview",
23
+ model_name="gpt-3.5-turbo")
24
+
25
+ # OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or 'Your OPENAI API Key'
26
+ # OPENAI_API_KEY = "123"
27
+
28
+ # llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo')
29
+
30
+
31
+ image_to_text_model = "Salesforce/blip-image-captioning-large"
32
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
33
+ device = 'cpu'
34
+
35
+ processor = BlipProcessor.from_pretrained(image_to_text_model)
36
+ model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
37
+
38
+
39
+ def describeImage3(url):
40
+ image_object = Image.open(requests.get(url, stream=True).raw).convert('RGB')
41
+ # image
42
+ inputs = processor(image_object, return_tensors="pt").to(device)
43
+ outputs = model.generate(**inputs)
44
+ return processor.decode(outputs[0], skip_special_tokens=True)
45
+
46
+
47
+ def describeImage(image_url):
48
+ image_obj = Image.open(image_url).convert('RGB')
49
+ inputs = processor(image_obj, return_tensors='pt').to(device)
50
+ outputs = model.generate(**inputs)
51
+ return processor.decode(outputs[0], skip_special_tokens=True)
52
+
53
+
54
+ def describeImage2(image_object):
55
+ # image_object = Image.open(requests.get(url, stream=True).raw).convert('RGB')
56
+ # image
57
+ inputs = processor(image_object, return_tensors="pt").to(device)
58
+ outputs = model.generate(**inputs)
59
+ return processor.decode(outputs[0], skip_special_tokens=True)
60
+
61
+
62
+ def toChinese(en: str):
63
+ pp = "将下面的语句翻译成中文\n{en}"
64
+ prompt = PromptTemplate(
65
+ input_variables=["en"],
66
+ template=pp
67
+ )
68
+ llchain = LLMChain(llm=llm, prompt=prompt)
69
+ return llchain.run(en)
70
+
71
+
72
+ # description = describeImage('https://images.unsplash.com/photo-1673207520321-c27d09eb0955?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=1035&q=80')
73
+ # description = describeImage('https://alifei03.cfp.cn/creative/vcg/800/new/VCG21gic13601846.jpg')
74
+
75
+
76
+ # description
77
+
78
+
79
+ class DescribeImageTool(BaseTool):
80
+ name = "Describe Image Tool"
81
+ description = 'use this tool to describe an image.'
82
+
83
+ def _run(self, url: str):
84
+ description = describeImage(url)
85
+ return description
86
+
87
+ def _arun(self, query: str):
88
+ raise NotImplementedError("Async operation not supported yet")
89
+
90
+
91
+ tools = [DescribeImageTool()]
92
+
93
+ agent = initialize_agent(
94
+ agent='chat-conversational-react-description',
95
+ tools=tools,
96
+ llm=llm,
97
+ verbose=True,
98
+ max_iterations=3,
99
+ early_stopping_method='generate',
100
+ memory=ConversationBufferWindowMemory(
101
+ memory_key='chat_history',
102
+ k=5,
103
+ return_messages=True
104
+ )
105
+ )
106
+
107
+ # image_url = 'https://images.unsplash.com/photo-1673207520321-c27d09eb0955?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=1035&q=80'
108
+ # image_url = 'https://alifei03.cfp.cn/creative/vcg/800/new/VCG21gic13601846.jpg'
109
+ # agent(f"Describe the following image:\n{image_url}")
110
+
111
+
112
+ # agent(f"What is the brand of car in the following image:\n{image_url}")
113
+
114
+ # image_url = 'https://alifei03.cfp.cn/creative/vcg/800/new/VCG21gic13601846.jpg'
115
+ # agent(f"Please describe the following image:\n{image_url}")
116
+
117
+
118
+ # agent.memory.buffer