File size: 3,581 Bytes
bbfcc3f 4419e3c bbfcc3f 2ba8a65 bbfcc3f 6754b51 bbfcc3f 2ba8a65 bbfcc3f 2ba8a65 bbfcc3f 2467fe0 bbfcc3f 2ba8a65 bbfcc3f 2ba8a65 bbfcc3f aa0430b 2467fe0 2ee4ae2 2467fe0 c9f542d e9fcbbe aa0430b bbfcc3f f7680bc 2ee4ae2 f7680bc 660cad9 c038dfe 2ee4ae2 c038dfe 2467fe0 4fbc7de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.chat_models import AzureChatOpenAI
from langchain.llms import OpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
import os
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
#llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo',openai_api_base=OPENAI_API_BASE)
llm = AzureChatOpenAI(deployment_name="bitservice_chat_35",openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
image_to_text_model = "Salesforce/blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
processor = BlipProcessor.from_pretrained(image_to_text_model)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
import requests
from PIL import Image
def describeImage(image_url):
image_object = Image.open(image_url).convert('RGB')
# image
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
return processor.decode(outputs[0], skip_special_tokens=True)
from langchain.tools import BaseTool
class DescribeImageTool(BaseTool):
name = "Describe Image Tool"
description = 'use this tool to describe an image.'
def _run(self, url: str):
description = describeImage(url)
return description
def _arun(self, query: str):
raise NotImplementedError("Async operation not supported yet")
tools = [DescribeImageTool()]
agent = initialize_agent(
agent='chat-conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
early_stopping_method='generate',
memory=ConversationBufferWindowMemory(
memory_key='chat_history',
k=5,
return_messages=True
)
)
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
def enToChinese(english):
#ch = llm_fy("Please translate the following sentence from English to Chinese:"+english)
#return ch
pp = "Please translate the following sentence from English to Chinese:{english}"
prompt = PromptTemplate(
input_variables=["english"],
template=pp
)
llchain=LLMChain(llm=llm,prompt=prompt)
return llchain.run(english)
def chToEnglish(chinese):
#en = llm_fy("Please translate the following sentence from Chinese to English:"+chinese)
#return en
pp = "Please translate the following sentence from Chinese to English:{chinese}"
prompt = PromptTemplate(
input_variables=["chinese"],
template=pp
)
llchain=LLMChain(llm=llm,prompt=prompt)
return llchain.run(chinese)
import gradio as gr
def segment(image,text):
#pass # Implement your image segmentation model here...
print(image)
image_url = image
#text = chToEnglish(text)
print(text)
return enToChinese(agent(f"{text}:\n{image_url}").get('output'))
demo = gr.Interface(
fn=segment,
inputs=[
gr.Image(type="filepath",shape=(200, 200),label="请选择一张图片"),
gr.components.Textbox(label="请输入问题"),
],
outputs=[gr.components.Textbox(label="答案",lines=4)])
demo.launch()
|