Spaces:
Sleeping
Sleeping
from langchain.agents import load_tools | |
from langchain.agents import initialize_agent | |
from langchain.agents import AgentType | |
from langchain.chat_models import AzureChatOpenAI | |
from langchain.llms import OpenAI | |
from langchain.chains.conversation.memory import ConversationBufferWindowMemory | |
import os | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE") | |
DEP_NAME=os.getenv("deployment_name") | |
llm = AzureChatOpenAI(deployment_name=DEP_NAME, openai_api_base=OPENAI_API_BASE, | |
openai_api_key=OPENAI_API_KEY, openai_api_version="2023-03-15-preview", | |
model_name="gpt-3.5-turbo") | |
import torch | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
image_to_text_model = "Salesforce/blip-image-captioning-large" | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
processor = BlipProcessor.from_pretrained(image_to_text_model) | |
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device) | |
from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput | |
import requests | |
from PIL import Image | |
def describeImage(image_url): | |
image_object = Image.open(image_url).convert('RGB') | |
inputs = processor(image_object, return_tensors="pt").to(device) | |
outputs = model.generate(**inputs) | |
return processor.decode(outputs[0], skip_special_tokens=True) | |
from langchain.tools import BaseTool | |
class DescribeImageTool(BaseTool): | |
name = "Describe Image Tool" | |
description = 'use this tool to describe an image.' | |
def _run(self, url: str): | |
description = describeImage(url) | |
return description | |
def _arun(self, query: str): | |
raise NotImplementedError("Async operation not supported yet") | |
tools = [DescribeImageTool()] | |
agent = initialize_agent( | |
agent='chat-conversational-react-description', | |
tools=tools, | |
llm=llm, | |
verbose=True, | |
max_iterations=3, | |
early_stopping_method='generate', | |
memory=ConversationBufferWindowMemory( | |
memory_key='chat_history', | |
k=5, | |
return_messages=True | |
) | |
) | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
def enToChinese(english): | |
pp = "Please translate the following sentence from English to Chinese:{english}" | |
prompt = PromptTemplate( | |
input_variables=["english"], | |
template=pp | |
) | |
llchain = LLMChain(llm=llm, prompt=prompt) | |
return llchain.run(english) | |
def chToEnglish(chinese): | |
pp = "Please translate the following sentence from Chinese to English:{chinese}" | |
prompt = PromptTemplate( | |
input_variables=["chinese"], | |
template=pp | |
) | |
llchain = LLMChain(llm=llm, prompt=prompt) | |
return llchain.run(chinese) | |
import gradio as gr | |
def image_to_txt(image_url, user_input): | |
user_input = chToEnglish(user_input) | |
return enToChinese(agent(f"{input}:\n{image_url}")['output']) | |
with gr.Blocks() as demo: | |
image_url = gr.Image(type="filepath", label="请选择一张图片") | |
print(image_url) | |
user_input = gr.Textbox(placeholder="请输入问题..", lines=1,label="问题") | |
submit_btn = gr.Button('确认', variant="primary") | |
output = gr.Textbox(placeholder="", lines=1,label="回答") | |
submit_btn.click(image_to_txt, inputs=[image_url, user_input], outputs=output) | |
demo.launch() | |