Spaces:
Sleeping
Sleeping
File size: 3,359 Bytes
4cab9e8 68a6d3e c80b6fd 4c31b77 4cab9e8 68a6d3e bfec104 68a6d3e 016b491 4c31b77 7c575d3 3fb0009 7c575d3 3fb0009 4c31b77 7c575d3 3fb0009 7c575d3 4c31b77 3fb0009 4c31b77 7c575d3 3fb0009 7c575d3 3fb0009 7c575d3 ca72124 987e1ba 321eda5 65ff5ad 321eda5 987e1ba 4c31b77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.chat_models import AzureChatOpenAI
from langchain.llms import OpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
import os
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
DEP_NAME=os.getenv("deployment_name")
llm = AzureChatOpenAI(deployment_name=DEP_NAME, openai_api_base=OPENAI_API_BASE,
openai_api_key=OPENAI_API_KEY, openai_api_version="2023-03-15-preview",
model_name="gpt-3.5-turbo")
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
image_to_text_model = "Salesforce/blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
processor = BlipProcessor.from_pretrained(image_to_text_model)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
import requests
from PIL import Image
def describeImage(image_url):
image_object = Image.open(image_url).convert('RGB')
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
return processor.decode(outputs[0], skip_special_tokens=True)
from langchain.tools import BaseTool
class DescribeImageTool(BaseTool):
name = "Describe Image Tool"
description = 'use this tool to describe an image.'
def _run(self, url: str):
description = describeImage(url)
return description
def _arun(self, query: str):
raise NotImplementedError("Async operation not supported yet")
tools = [DescribeImageTool()]
agent = initialize_agent(
agent='chat-conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
early_stopping_method='generate',
memory=ConversationBufferWindowMemory(
memory_key='chat_history',
k=5,
return_messages=True
)
)
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
def enToChinese(english):
pp = "Please translate the following sentence from English to Chinese:{english}"
prompt = PromptTemplate(
input_variables=["english"],
template=pp
)
llchain = LLMChain(llm=llm, prompt=prompt)
return llchain.run(english)
def chToEnglish(chinese):
pp = "Please translate the following sentence from Chinese to English:{chinese}"
prompt = PromptTemplate(
input_variables=["chinese"],
template=pp
)
llchain = LLMChain(llm=llm, prompt=prompt)
return llchain.run(chinese)
import gradio as gr
def image_to_txt(image_url, user_input):
user_input = chToEnglish(user_input)
return enToChinese(agent(f"{input}:\n{image_url}")['output'])
with gr.Blocks() as demo:
image_url = gr.Image(type="filepath", label="请选择一张图片")
print(image_url)
user_input = gr.Textbox(placeholder="请输入问题..", lines=1,label="问题")
submit_btn = gr.Button('确认', variant="primary")
output = gr.Textbox(placeholder="", lines=1,label="回答")
submit_btn.click(image_to_txt, inputs=[image_url, user_input], outputs=output)
demo.launch()
|