|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
from langchain import ConversationChain, LLMChain |
|
|
|
from langchain.agents import load_tools, initialize_agent, Tool |
|
from langchain.chains.conversation.memory import ConversationBufferMemory |
|
from langchain.llms import OpenAI |
|
from langchain.utilities import ImunAPIWrapper |
|
|
|
MAX_TOKENS = 512 |
|
|
|
llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS) |
|
|
|
tool_names = ['pal-math', 'imun'] |
|
tools = load_tools(tool_names, llm=llm) |
|
|
|
memory = ConversationBufferMemory(memory_key="chat_history") |
|
|
|
|
|
|
|
|
|
|
|
|
|
imun_read = ImunAPIWrapper( |
|
imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-read:analyze", |
|
params="api-version=2022-08-31", |
|
imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"]) |
|
|
|
imun_ocr = ImunAPIWrapper( |
|
imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-receipt:analyze", |
|
params="api-version=2022-08-31", |
|
imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"]) |
|
|
|
|
|
extra_tools = [ |
|
Tool( |
|
name = "OCR Understanding", |
|
func=imun_read.run, |
|
description=( |
|
"A wrapper around OCR Understanding (Optical Character Recognition). " |
|
"Useful after Image Understanding tool has found text or handwriting is present in the image tags." |
|
"This tool can find the actual text, written name, or product name." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
Tool( |
|
name = "Receipt Understanding", |
|
func=imun_ocr.run, |
|
description=( |
|
"A wrapper receipt understanding. " |
|
"Useful after Image Understanding tool has recognized a receipt in the image tags." |
|
"This tool can find the actual receipt text, prices and detailed items." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
] |
|
|
|
chain = initialize_agent(tools + extra_tools, llm, agent="conversational-react-description", verbose=True, memory=memory) |
|
|
|
|
|
|
|
output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/receipt1.png") |
|
print(output) |
|
output = chain.run("how much is the total cost?") |
|
print(output) |
|
|
|
output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/pillbottlefront.png") |
|
print(output) |
|
output = chain.run("what does the medicine label say?") |
|
print(output) |
|
output = chain.run("Is this medicine good for heart?") |
|
|
|
|
|
output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/sign1.png") |
|
print(output) |
|
output = chain.run("what does the sign read?") |
|
print(output) |
|
|
|
bz |
|
|
|
|
|
chain = initialize_agent(tools + extra_tools, llm, agent="conversational-react-description", verbose=True, memory=memory) |
|
|
|
output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/math1.png") |
|
print(output) |
|
|
|
output = chain.run("what is the result of the first text in that image") |
|
print(output) |
|
|
|
|
|
|
|
|
|
|
|
output = chain.run(input="Image_1 https://i.ibb.co/61hS5kh/noodle-ball.jpg") |
|
print(output) |
|
|
|
output = chain.run(input="what tags are in the image") |
|
print(output) |
|
|
|
output = chain.run(input="https://tinyurl.com/footballtestimun1") |
|
print(output) |
|
|
|
output = chain.run("how many players are in the image") |
|
print(output) |
|
|
|
|
|
imun = ImunAPIWrapper() |
|
print(imun.run("https://i.ibb.co/61hS5kh/noodle-ball.jpg")) |
|
|
|
|
|
text = "what is the meaning of life" |
|
output = chain.run(input=text) |
|
|
|
text = "if I have two red balls and a blue ball, with blue balls half as heavy as the red balls. How many more blue balls do I need to have equal weight blue and red balls" |
|
output = chain.run(input=text) |
|
|
|
text = "summarize what you see in this image https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/Football_in_Bloomington%2C_Indiana%2C_1996.jpg/1920px-Football_in_Bloomington%2C_Indiana%2C_1996.jpg" |
|
output = chain.run(input=text) |
|
|