File size: 4,947 Bytes
acc4ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# example call script
# https://dev.azure.com/visionbio/objectdetection/_git/objectdetection?path=/verify/langimg.py&version=GBehazar/langchain&_a=contents

### Set up environment variables:
#
# export IMUN_URL="https://cognitivewudev.azure-api.net/computervision/imageanalysis:analyze"
# export IMUN_SUBSCRIPTION_KEY=a*
# export IMUN_PARAMS="api-version=2023-02-01-preview&model-version=latest&features=denseCaptions,Tags"
# export OPENAI_API_KEY=sk-*
#

import os
from langchain import ConversationChain, LLMChain

from langchain.agents import load_tools, initialize_agent, Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.utilities import ImunAPIWrapper

MAX_TOKENS = 512

llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS)

tool_names = ['pal-math', 'imun']
tools = load_tools(tool_names, llm=llm)

memory = ConversationBufferMemory(memory_key="chat_history")

# imun_read = ImunAPIWrapper(
#     imun_url="https://cognitivewudev.azure-api.net/computervision/imageanalysis:analyze",
#     params="api-version=2023-02-01-preview&model-version=latest&features=Read",
#     imun_subscription_key=os.environ["IMUN_SUBSCRIPTION_KEY2"])

imun_read = ImunAPIWrapper(
    imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-read:analyze",
    params="api-version=2022-08-31",
    imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"])

imun_ocr = ImunAPIWrapper(
    imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-receipt:analyze",
    params="api-version=2022-08-31",
    imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"])

# these tools should not step on each other's toes
extra_tools = [
    Tool(
        name = "OCR Understanding",
        func=imun_read.run,
        description=(
        "A wrapper around OCR Understanding (Optical Character Recognition). "
        "Useful after Image Understanding tool has found text or handwriting is present in the image tags."
        "This tool can find the actual text, written name, or product name."
        "Input should be an image url, or path to an image file (e.g. .jpg, .png)."
        )
    ),
    Tool(
        name = "Receipt Understanding",
        func=imun_ocr.run,
        description=(
        "A wrapper receipt understanding. "
        "Useful after Image Understanding tool has recognized a receipt in the image tags."
        "This tool can find the actual receipt text, prices and detailed items."
        "Input should be an image url, or path to an image file (e.g. .jpg, .png)."
        )
    ),
]

chain = initialize_agent(tools + extra_tools, llm, agent="conversational-react-description", verbose=True, memory=memory)

# chain = initialize_agent(tools + extra_tools, llm, agent="conversational-react-description", verbose=True, memory=memory)

output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/receipt1.png")
print(output)
output = chain.run("how much is the total cost?")
print(output)

output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/pillbottlefront.png")
print(output)
output = chain.run("what does the medicine label say?")
print(output)
output = chain.run("Is this medicine good for heart?")


output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/sign1.png")
print(output)
output = chain.run("what does the sign read?")
print(output)

bz


chain = initialize_agent(tools + extra_tools, llm, agent="conversational-react-description", verbose=True, memory=memory)

output = chain.run("/data/images/langchain_demo/DemoTest/DemoTest/math1.png")
print(output)

output = chain.run("what is the result of the first text in that image")
print(output)

# output = chain.run(input="what is the meaning of life")

# output = chain.run(input="if I have two red balls and a blue ball, with blue balls half as heavy as the red balls. How many more blue balls do I need to have equal weight blue and red balls")

output = chain.run(input="Image_1 https://i.ibb.co/61hS5kh/noodle-ball.jpg")
print(output)

output = chain.run(input="what tags are in the image")
print(output)

output = chain.run(input="https://tinyurl.com/footballtestimun1")
print(output)

output = chain.run("how many players are in the image")
print(output)

# To run imun as a tool
imun = ImunAPIWrapper()
print(imun.run("https://i.ibb.co/61hS5kh/noodle-ball.jpg"))


text = "what is the meaning of life"
output = chain.run(input=text)

text = "if I have two red balls and a blue ball, with blue balls half as heavy as the red balls. How many more blue balls do I need to have equal weight blue and red balls"
output = chain.run(input=text)

text = "summarize what you see in this image https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/Football_in_Bloomington%2C_Indiana%2C_1996.jpg/1920px-Football_in_Bloomington%2C_Indiana%2C_1996.jpg"
output = chain.run(input=text)