huaapp / testMch.py
chanhua's picture
621up
ef3d2c0
# from langchain.agents import load_tools
import requests
from PIL import Image
from langchain.agents import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
# from langchain.agents import AgentType
# from langchain.llms import OpenAI
# from langchain.chat_models import ChatOpenAI
from langchain.chat_models import AzureChatOpenAI
from langchain.tools import BaseTool
# from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
import os
from transformers import BlipProcessor, BlipForConditionalGeneration
from langchain import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
OPENAI_API_BASE = os.environ['OPENAI_API_BASE']
DEPLOYMENT_NAME = os.environ['DEPLOYMENT_NAME']
llm = AzureChatOpenAI(deployment_name=DEPLOYMENT_NAME, openai_api_base=OPENAI_API_BASE,
openai_api_key=OPENAI_API_KEY, openai_api_version="2023-03-15-preview",
model_name="gpt-3.5-turbo")
# OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or 'Your OPENAI API Key'
# OPENAI_API_KEY = "123"
# llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo')
image_to_text_model = "Salesforce/blip-image-captioning-large"
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
processor = BlipProcessor.from_pretrained(image_to_text_model)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
def describeImage3(url):
image_object = Image.open(requests.get(url, stream=True).raw).convert('RGB')
# image
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
return processor.decode(outputs[0], skip_special_tokens=True)
def describeImage(image_url):
image_obj = Image.open(image_url).convert('RGB')
inputs = processor(image_obj, return_tensors='pt').to(device)
outputs = model.generate(**inputs)
return processor.decode(outputs[0], skip_special_tokens=True)
def describeImage2(image_object):
# image_object = Image.open(requests.get(url, stream=True).raw).convert('RGB')
# image
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
return processor.decode(outputs[0], skip_special_tokens=True)
def toChinese(en: str):
pp = "将下面的语句翻译成中文\n{en}"
prompt = PromptTemplate(
input_variables=["en"],
template=pp
)
llchain = LLMChain(llm=llm, prompt=prompt)
return llchain.run(en)
# description = describeImage('https://images.unsplash.com/photo-1673207520321-c27d09eb0955?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=1035&q=80')
# description = describeImage('https://alifei03.cfp.cn/creative/vcg/800/new/VCG21gic13601846.jpg')
# description
class DescribeImageTool(BaseTool):
name = "Describe Image Tool"
description = 'use this tool to describe an image.'
def _run(self, url: str):
description = describeImage(url)
return description
def _arun(self, query: str):
raise NotImplementedError("Async operation not supported yet")
tools = [DescribeImageTool()]
agent = initialize_agent(
agent='chat-conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
early_stopping_method='generate',
memory=ConversationBufferWindowMemory(
memory_key='chat_history',
k=5,
return_messages=True
)
)
# image_url = 'https://images.unsplash.com/photo-1673207520321-c27d09eb0955?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=1035&q=80'
# image_url = 'https://alifei03.cfp.cn/creative/vcg/800/new/VCG21gic13601846.jpg'
# agent(f"Describe the following image:\n{image_url}")
# agent(f"What is the brand of car in the following image:\n{image_url}")
# image_url = 'https://alifei03.cfp.cn/creative/vcg/800/new/VCG21gic13601846.jpg'
# agent(f"Please describe the following image:\n{image_url}")
# agent.memory.buffer