JunJiaGuo commited on
Commit
dd5b9d2
1 Parent(s): 7d723fe

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/demo.jpeg filter=lfs diff=lfs merge=lfs -text
37
+ assets/langloc.png filter=lfs diff=lfs merge=lfs -text
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.pt
2
+ # Byte-compiled / optimized / DLL files
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # poetry
99
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
+ #poetry.lock
104
+
105
+ # pdm
106
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
+ #pdm.lock
108
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
+ # in version control.
110
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
111
+ .pdm.toml
112
+ .pdm-python
113
+ .pdm-build/
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
164
+
165
+ # Gradio
166
+ .gradio
.pre-commit-config.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://pre-commit.com for more information
2
+ # See https://pre-commit.com/hooks.html for more hooks
3
+ repos:
4
+ - repo: https://github.com/pre-commit/pre-commit-hooks
5
+ rev: v3.2.0
6
+ hooks:
7
+ - id: trailing-whitespace
8
+ - id: end-of-file-fixer
9
+ - id: check-yaml
10
+ - id: check-added-large-files
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,12 +1,58 @@
1
  ---
2
- title: Lang Ground
3
- emoji: 💻
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.8.0
8
- app_file: app.py
9
- pinned: false
10
  ---
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: lang-ground
3
+ app_file: app_langloc.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.8.0
 
 
6
  ---
7
+ # Language Grounding
8
 
9
+ Localize and keep tracking things based on natural
10
+ language sepecfication is a good idea but remains challenging due to the scarcity of large-scale annotated datasets.
11
+
12
+ This repository provides a practical solution to identify relevant objects in the view and continuously tracking them, which is particularly beneficial for robotics applications.
13
+
14
+ We offer several demos that showcase the following functionalities:
15
+
16
+ ## Language Localize
17
+ Identify and locate objects based on natural language queries.
18
+
19
+ Try our demo to experience an interesting use case: When analyzing food items, the model demonstrates contextual understanding:
20
+ - For food nearing expiration: Suggests storing in the cabinet
21
+ - For expired food: Recommends disposal in the trash can
22
+
23
+ ![langloc](/assets/langloc.jpg)
24
+
25
+
26
+
27
+
28
+ ## **Lang Ground (Localize + Track):**
29
+
30
+ Not only find but also continuously track objects of interest
31
+
32
+ We prioritize making this project highly accessible and customizable:
33
+
34
+ - **Open Box Design:** All components are modular and well-documented for easy understanding
35
+ - **Customizable Pipeline:** Easily adapt the system for different use cases
36
+ - **Extensible Framework:** Simple integration with other vision or language models
37
+
38
+
39
+ ## 🛠️ Install
40
+
41
+ ```bash
42
+ git clone https://github.com/jing-bi/lang-ground.git && cd lang-ground
43
+
44
+ mamba create -n lang-ground python=3.11
45
+ mamba activate lang-ground
46
+
47
+ pip install -e .
48
+ ```
49
+ ## Acknowledgments
50
+
51
+ This project is built upon and inspired by the following repositories:
52
+
53
+ - [Segment-Anything](https://github.com/facebookresearch/segment-anything-2)
54
+ - [Supervision](https://github.com/roboflow/supervision)
55
+
56
+ ## License
57
+
58
+ This project is licensed under the Apache 2.0 License
app_langloc.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langground import LangGround, text_palette
3
+
4
+
5
+ state = {"loc_model": None, "llm_model": None, "model": None}
6
+
7
+
8
+ def load_model(loc_model: str, llm_model: str) -> LangGround:
9
+ if (loc_model, llm_model) != (state["loc_model"], state["llm_model"]):
10
+ gr.Info("Loading models...", duration=5)
11
+ state.update({"model": LangGround(loc_model=loc_model, llm_model=llm_model), "loc_model": loc_model, "llm_model": llm_model})
12
+ gr.Info("Models loaded!", duration=2.5)
13
+ return state["model"]
14
+
15
+
16
+ def predict(frame, question: str, threshold: float, loc_model: str, llm_model: str):
17
+ if not frame or not question.strip():
18
+ gr.Warning("Please provide both an image and a question")
19
+ return "", None, None
20
+
21
+ model = load_model(loc_model, llm_model)
22
+ return model.localize(frame, question, threshold=threshold)
23
+
24
+
25
+ title = """
26
+ <center>
27
+
28
+ <h1> 🔍 Language Localization </h1>
29
+ <b> Upload an image and ask questions to find objects in it. <b>
30
+
31
+ </center>
32
+ """
33
+
34
+ css = """.my-group {max-width: 600px !important; max-height: 600px !important;}
35
+ .my-column {display: flex !important; justify-content: center !important; align-items: center !important;}"""
36
+
37
+ with gr.Blocks(css=css) as demo:
38
+ gr.HTML(title)
39
+
40
+ with gr.Row():
41
+
42
+ with gr.Column(scale=1):
43
+ frame_input = gr.Image(type="pil", label="Upload Frame")
44
+
45
+ with gr.Column(scale=1):
46
+ with gr.Row():
47
+ with gr.Column(scale=1):
48
+
49
+ loc_model_input = gr.Dropdown(
50
+ choices=["yolo", "owl"],
51
+ value="yolo",
52
+ label="Localization Model",
53
+ )
54
+ with gr.Column(scale=2):
55
+
56
+ llm_model_input = gr.Dropdown(
57
+ choices=[
58
+ "Qwen/Qwen2.5-7B-Instruct",
59
+ "OpenGVLab/InternVL2_5-8B",
60
+ "OpenGVLab/InternVL2_5-4B",
61
+ "OpenGVLab/InternVL2_5-2B",
62
+ "OpenGVLab/InternVL2_5-1B",
63
+ ],
64
+ value="Qwen/Qwen2.5-7B-Instruct",
65
+ label="LLM Model",
66
+ )
67
+ threshold_input = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Threshold")
68
+ question_input = gr.Textbox(lines=2, placeholder="Enter your question here", label="Question")
69
+ objs = gr.Highlightedtext(show_legend=False, show_inline_category=False, color_map=text_palette, label="Objects Found")
70
+ submit_btn = gr.Button("Submit")
71
+
72
+ with gr.Row():
73
+ all_bbox_image = gr.Image(label="Found Objects")
74
+ llm_bbox_image = gr.Image(label="Selected Objects")
75
+
76
+ submit_btn.click(
77
+ fn=predict,
78
+ inputs=[frame_input, question_input, threshold_input, loc_model_input, llm_model_input],
79
+ outputs=[objs, all_bbox_image, llm_bbox_image],
80
+ )
81
+ examples = gr.Examples(
82
+ examples=[
83
+ ["assets/demo.jpeg", "I'm thirsty"],
84
+ ["assets/kitchen.webp", "The food has expired and is no longer safe to eat."],
85
+ ["assets/kitchen.webp", "The food is about to expire."],
86
+ ],
87
+ inputs=[frame_input, question_input],
88
+ )
89
+ if __name__ == "__main__":
90
+ demo.launch()
assets/demo.jpeg ADDED

Git LFS Details

  • SHA256: 77c27ad998c49397d866bbe926600336d9dc96c081905389fe35948764b38d20
  • Pointer size: 132 Bytes
  • Size of remote file: 4.59 MB
assets/kitchen.webp ADDED
assets/langloc.jpg ADDED
assets/langloc.png ADDED

Git LFS Details

  • SHA256: ee4bb0da865a2be4a136d37b721f09df63b2d1a3c840087973b1ebb9f69c93c5
  • Pointer size: 132 Bytes
  • Size of remote file: 4.9 MB
example.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from langground import LangGround
2
+ import cv2
3
+
4
+ image = cv2.imread('./assets/demo.jpeg')
5
+ lg = LangGround()
6
+ lg.localize(image, "i'm thirsty")
langground/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .orch import LangGround
2
+ from .utils import text_palette
langground/llm.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import tenacity
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
5
+
6
+
7
+ class LLM:
8
+ def __init__(self, model_id="Qwen/Qwen2.5-7B-Instruct",):
9
+
10
+ self.model_id = model_id
11
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ # Load the model and tokenizer based on the model_id
14
+ if "meta-llama" in self.model_id:
15
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
16
+ self.model = AutoModelForCausalLM.from_pretrained(
17
+ model_id,
18
+ torch_dtype=torch.bfloat16,
19
+ device_map="auto"
20
+ )
21
+
22
+ elif "InternVL" in self.model_id:
23
+ self.model = AutoModel.from_pretrained(
24
+ model_id,
25
+ torch_dtype=torch.bfloat16,
26
+ low_cpu_mem_usage=True,
27
+ trust_remote_code=True,
28
+ device_map="auto"
29
+ ).eval()
30
+
31
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, use_fast=False)
32
+
33
+ else:
34
+ self.model = AutoModelForCausalLM.from_pretrained(
35
+ model_id,
36
+ torch_dtype="auto",
37
+ device_map="auto"
38
+ )
39
+
40
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
41
+
42
+ @torch.no_grad()
43
+ def generate(self, query):
44
+ if "meta-llama" in self.model_id:
45
+ messages = [
46
+ {"role": "user", "content": [
47
+ {"type": "text", "text": f"{query}"}
48
+ ]}
49
+ ]
50
+ text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
51
+ model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
52
+ generated_ids = self.model.generate(model_inputs.input_ids, max_new_tokens=512)
53
+ generated_ids = [output_ids[len(input_ids) :] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
54
+
55
+ response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
56
+ elif "InternVL" in self.model_id:
57
+ generation_config = dict(max_new_tokens=1024, do_sample=True)
58
+ response = self.model.chat(self.tokenizer, None, query, generation_config, history=None, return_history=False)
59
+ else:
60
+ messages = [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": query}]
61
+ text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
62
+ model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
63
+
64
+ generated_ids = self.model.generate(model_inputs.input_ids, max_new_tokens=512)
65
+ generated_ids = [output_ids[len(input_ids) :] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
66
+
67
+ response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
68
+ return response
69
+
70
+ @tenacity.retry(stop=tenacity.stop_after_delay(10))
71
+ def answer(self, query, objects):
72
+ query = f"""
73
+ Extract the object that satisfies the intent of the query or determine the tool that aligns with the purpose of {query}.
74
+ pick the best option from the following: {', '.join(objects)},
75
+ Please return a list of all suitable options as long as they make sense in the format of a Python list in the following format: ```python\n['option1', 'option2', ...]```"""
76
+ res = self.generate(query)
77
+ match = re.search(r"`{3}python\\n(.*)`{3}", res, re.DOTALL)
78
+ if match:
79
+ res = match.group(1)
80
+ res = [r.translate(str.maketrans("", "", "_-")) for r in eval(res)]
81
+ return res
82
+ else:
83
+ # Try to extract content directly from brackets []
84
+ match_brackets = re.search(r"\[(.*?)\]", res, re.DOTALL)
85
+ if match_brackets:
86
+ res = match_brackets.group(0) # Include brackets for eval
87
+ res = [r.translate(str.maketrans("", "", "_-")) for r in eval(res)]
88
+ return res
89
+ else:
90
+ raise ValueError(f"Failed to parse response: {res}")
langground/localizer.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from pathlib import Path
3
+ import torch
4
+ from transformers import Owlv2Processor, Owlv2ForObjectDetection
5
+ from PIL import Image
6
+ from pathlib import Path
7
+ from ultralytics import YOLO
8
+
9
+
10
+ def build_localizer(model_name):
11
+ if model_name == "owl":
12
+ return OWL()
13
+ elif model_name == "yolo":
14
+ return YOLO11()
15
+ else:
16
+ raise ValueError(f"Unknown model name: {model_name}")
17
+
18
+
19
+ class OWL:
20
+
21
+ def __init__(self):
22
+ model_name = "google/owlv2-large-patch14-ensemble"
23
+ self.processor = Owlv2Processor.from_pretrained(model_name)
24
+ self.model = Owlv2ForObjectDetection.from_pretrained(model_name).to("cuda")
25
+ self.model.eval()
26
+ self.objects_f = Path(__file__).parent / "objs" / "owl.txt"
27
+ self.objects = [line.strip() for line in self.objects_f.open().readlines()]
28
+ self.device = "cuda"
29
+
30
+ def localize(self, image, threshold=0.5):
31
+ image = Image.fromarray(image)
32
+ final = defaultdict(list)
33
+ with torch.inference_mode():
34
+ inputs = self.processor(text=self.objects, images=[image], return_tensors="pt").to(self.device)
35
+ outputs = self.model(**inputs)
36
+ target_sizes = torch.Tensor([image.size[::-1]]).to(self.device)
37
+ result = self.processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=threshold)[0]
38
+
39
+ boxes, scores, labels = result["boxes"], result["scores"], result["labels"]
40
+ for box, score, label in zip(boxes, scores, labels):
41
+ final[self.objects[label]].append(box)
42
+ return final
43
+
44
+
45
+ class YOLO11:
46
+ def __init__(self):
47
+ model_name = "yolo11m.pt"
48
+ self.model = YOLO(model_name)
49
+ self.objects_f = Path(__file__).parent / "objs" / "yolo.txt"
50
+ self.objects = [line.strip() for line in self.objects_f.open().readlines()]
51
+
52
+ def localize(self, image, threshold=0.5):
53
+ result = self.model(image, conf=threshold)[0]
54
+ boxes = result.boxes
55
+ bbox_ids = boxes.cls.cpu().numpy().astype(int)
56
+ boxes_xyxy = boxes.xyxy.cpu().numpy()
57
+ final = defaultdict(list)
58
+ for label, box in zip(bbox_ids, boxes_xyxy):
59
+ final[self.objects[label]].append(box)
60
+ return final
langground/objs/owl.txt ADDED
@@ -0,0 +1,1680 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wine glass
2
+ dumpster
3
+ lip balm
4
+ barge
5
+ sock
6
+ tricycle
7
+ napkin
8
+ bubble gum
9
+ speed limit sign
10
+ milestone
11
+ headphones
12
+ knocker (on a door)
13
+ urn
14
+ candy bar
15
+ flash
16
+ noddles
17
+ mint candy
18
+ nosebag (for animals)
19
+ ladder
20
+ boat
21
+ chickpea
22
+ razorblade
23
+ keycard
24
+ chisel
25
+ coaster
26
+ ice skate
27
+ sweater
28
+ onion
29
+ gift wrap
30
+ chocolate cake
31
+ woodpecker
32
+ bob
33
+ serving tray
34
+ flute
35
+ prune
36
+ thread
37
+ potato
38
+ cincture
39
+ crowbar
40
+ bottle cap
41
+ can
42
+ chain mail
43
+ table teniis paddle
44
+ boy
45
+ cassette
46
+ nutcracker
47
+ electric drill
48
+ typewriter
49
+ fish (food)
50
+ notebook
51
+ duck
52
+ studio couch
53
+ kite
54
+ baseball
55
+ trench coat
56
+ streetlight
57
+ coffeemaker
58
+ soup
59
+ muffin
60
+ humidifier
61
+ soap
62
+ dinning table
63
+ egg tart
64
+ fruit juice
65
+ mashed potato
66
+ target
67
+ lifejacket
68
+ bobby pin
69
+ honeycomb
70
+ harpsichord
71
+ taxi
72
+ refrigerator
73
+ signboard
74
+ hummus
75
+ matchbox
76
+ medicine
77
+ cast
78
+ carnivore
79
+ frog
80
+ newsstand
81
+ french toast
82
+ dishwasher
83
+ file cabinet
84
+ motor
85
+ puffer (fish)
86
+ apple
87
+ water tower
88
+ bonnet
89
+ headlight
90
+ trash bin can
91
+ wheelchair
92
+ roller skate
93
+ pan (metal container)
94
+ propeller
95
+ baked goods
96
+ hairnet
97
+ tire
98
+ smoothie
99
+ milk
100
+ recorder
101
+ hinge
102
+ winter melon
103
+ hiking equipment
104
+ bowling ball
105
+ curtain
106
+ cornbread
107
+ coffeepot
108
+ wallet/purse
109
+ license plate
110
+ baseball base
111
+ pomegranate
112
+ cabinet/shelf
113
+ fishbowl
114
+ carton
115
+ boot
116
+ thumbtack
117
+ harmonica
118
+ handle
119
+ combination lock
120
+ pea (food)
121
+ wooden leg
122
+ olive oil
123
+ plant
124
+ corded phone
125
+ tea pot
126
+ popsicle
127
+ pineapple
128
+ hummingbird
129
+ porch
130
+ kettle
131
+ pennant
132
+ statue (sculpture)
133
+ mammal
134
+ cleaning products
135
+ sugar bowl
136
+ limousine
137
+ reamer (juicer)
138
+ vacuum cleaner
139
+ cooler (for food)
140
+ ladle
141
+ fire hydrant
142
+ telephoto lens
143
+ bird
144
+ microphone
145
+ pig
146
+ game board
147
+ squirrel
148
+ trumpet
149
+ punching bag
150
+ notepad
151
+ clutch bag
152
+ billiard table
153
+ hotair ballon
154
+ wagon wheel
155
+ ambulance
156
+ maracas
157
+ drink
158
+ hoverboard
159
+ steak
160
+ cucumber
161
+ hair spray
162
+ fire truck
163
+ scorpion
164
+ violin
165
+ turnip
166
+ walking stick
167
+ cupboard
168
+ green bean
169
+ crabmeat
170
+ parachute
171
+ shot glass
172
+ sail
173
+ skirt
174
+ cayenne (spice)
175
+ water faucet
176
+ train (railroad vehicle)
177
+ wristlet
178
+ sushi
179
+ forklift
180
+ safety pin
181
+ iron (for clothing)
182
+ magnet
183
+ camera lens
184
+ fighter jet
185
+ strawberry
186
+ pouch
187
+ skull
188
+ bust
189
+ bronze sculpture
190
+ radish
191
+ platter
192
+ animal
193
+ footstool
194
+ toast (food)
195
+ date (fruit)
196
+ curling
197
+ chandelier
198
+ slow cooker
199
+ goldfish
200
+ handsaw
201
+ nightshirt
202
+ truffle (chocolate)
203
+ saddle (on an animal)
204
+ duct tape
205
+ water gun
206
+ fan
207
+ cosmetics
208
+ ladybug
209
+ celery
210
+ box
211
+ birdbath
212
+ hat
213
+ lime
214
+ person
215
+ tinsel
216
+ heart
217
+ golf ball
218
+ wine rack
219
+ yoke (animal equipment)
220
+ billards
221
+ gravy boat
222
+ hamster
223
+ sharpie
224
+ picture/frame
225
+ booklet
226
+ scooter
227
+ nest
228
+ arctic (type of shoe)
229
+ swimwear
230
+ flying disc
231
+ ruler
232
+ foal
233
+ grater
234
+ zebra
235
+ glove
236
+ elk
237
+ garbage truck
238
+ mushroon
239
+ tortilla
240
+ dairy
241
+ poncho
242
+ fudge
243
+ coat hanger
244
+ wet suit
245
+ human leg
246
+ hand towel
247
+ elevator car
248
+ tart
249
+ croissant
250
+ hairbrush
251
+ beeper
252
+ lion
253
+ flap
254
+ personal care
255
+ cornice
256
+ handkerchief
257
+ bean curd
258
+ cutting board
259
+ extractor
260
+ tree house
261
+ headset
262
+ seal
263
+ toothbrush
264
+ whisk
265
+ fire extinguisher
266
+ telephone
267
+ antelope
268
+ car
269
+ cider
270
+ pole
271
+ gelatin
272
+ measuring stick
273
+ shoulder bag
274
+ packet
275
+ sparkler (fireworks)
276
+ invertebrate
277
+ deer
278
+ hornet
279
+ drone
280
+ window blind
281
+ horizontal bar
282
+ fume hood
283
+ barrow
284
+ automatic washer
285
+ trombone
286
+ puncher
287
+ hamper
288
+ bathroom accessory
289
+ penny (coin)
290
+ mixer
291
+ apricot
292
+ inhaler
293
+ turtleneck (clothing)
294
+ computer monitor
295
+ belt
296
+ monkey
297
+ bullet train
298
+ tartan
299
+ cube
300
+ flute glass
301
+ dispenser
302
+ pickup truck
303
+ rabbit
304
+ icecream
305
+ skullcap
306
+ sunglasses
307
+ boom microphone
308
+ glasses
309
+ oboe
310
+ lollipop
311
+ musical instrument
312
+ bunk bed
313
+ handcart
314
+ flipper (footwear)
315
+ motor vehicle
316
+ shampoo
317
+ hourglass
318
+ road map
319
+ sword
320
+ walrus
321
+ gourd
322
+ glass (drink container)
323
+ traffic cone
324
+ cd
325
+ ram (animal)
326
+ teddy bear
327
+ bomb
328
+ mechanical fan
329
+ water ski
330
+ fedora
331
+ barrel
332
+ cosmetics brush/eyeliner pencil
333
+ bull
334
+ mouse
335
+ carnation
336
+ calendar
337
+ pitcher
338
+ clothes hamper
339
+ buttefly
340
+ armoire
341
+ cleat (for securing rope)
342
+ cougar
343
+ electric chair
344
+ tortoise
345
+ mammoth
346
+ tongs
347
+ chips
348
+ chaise longue
349
+ webcam
350
+ spider
351
+ hand dryer
352
+ canoe
353
+ tank top (clothing)
354
+ blender
355
+ taillight
356
+ leopard
357
+ stool
358
+ helicopter
359
+ keg
360
+ sausage
361
+ wristband
362
+ thermometer
363
+ stepladder
364
+ bolt
365
+ rearview mirror
366
+ axe
367
+ baseball glove
368
+ furniture
369
+ carrot
370
+ highchair
371
+ rib (food)
372
+ burrito
373
+ passenger ship
374
+ railcar (part of a train)
375
+ short pants
376
+ bowl
377
+ earphone
378
+ cabinet
379
+ light switch
380
+ washing machine/drying machine
381
+ domestic ass
382
+ golfcart
383
+ ostrich
384
+ mixing bowl
385
+ pop (soda)
386
+ flag
387
+ toilet
388
+ suv
389
+ medal
390
+ jeans
391
+ chopstick
392
+ ceiling fan
393
+ tape measure
394
+ reflector
395
+ bulletin board
396
+ turkey (food)
397
+ mat (gym equipment)
398
+ tow truck
399
+ cruise ship
400
+ sports uniform
401
+ clothing
402
+ control
403
+ bench
404
+ orange juice
405
+ clippers (for plants)
406
+ chopsticks
407
+ table
408
+ pressure cooker
409
+ doughnut
410
+ blackboard/whiteboard
411
+ pepper mill
412
+ trophy
413
+ cowbell
414
+ sandal (type of shoe)
415
+ hamimelon
416
+ corkboard
417
+ file (tool)
418
+ nailfile
419
+ cab (taxi)
420
+ head phone
421
+ pinwheel
422
+ briefcase
423
+ birthday card
424
+ barbell
425
+ necklace
426
+ baseball cap
427
+ river boat
428
+ tux
429
+ book
430
+ cistern
431
+ cigar/cigarette
432
+ tennis
433
+ pencil sharpener
434
+ tableware
435
+ turkey
436
+ string cheese
437
+ musical keyboard
438
+ flowerpot
439
+ choker
440
+ rag doll
441
+ dropper
442
+ deck chair
443
+ horse
444
+ bandanna
445
+ lettuce
446
+ steering wheel
447
+ log
448
+ snowplow
449
+ television
450
+ bamboo
451
+ cork (bottle plug)
452
+ cocktail shaker
453
+ chinaware
454
+ corset
455
+ tripod
456
+ reptile
457
+ radio receiver
458
+ peeler (tool for fruit and vegetables)
459
+ diving board
460
+ kitchen appliance
461
+ banana
462
+ calf
463
+ coconut
464
+ filing cabinet
465
+ poker (fire stirring tool)
466
+ birdcage
467
+ green onion
468
+ beanie
469
+ birthday cake
470
+ tea bag
471
+ tarp
472
+ lamb (animal)
473
+ wedding ring
474
+ baby buggy
475
+ laptop
476
+ tree
477
+ palm tree
478
+ wind chime
479
+ segway
480
+ step stool
481
+ bicycle
482
+ hockey stick
483
+ pita (bread)
484
+ crab
485
+ saxophone
486
+ gazelle
487
+ bathtub
488
+ robe
489
+ brake light
490
+ painting
491
+ giraffe
492
+ postbox (public)
493
+ gull
494
+ sunhat
495
+ shaver (electric)
496
+ melon
497
+ bathrobe
498
+ soccer ball
499
+ barrel/bucket
500
+ doorknob
501
+ curling iron
502
+ newspaper
503
+ comb
504
+ soup bowl
505
+ dress hat
506
+ scale (measuring instrument)
507
+ human nose
508
+ postcard
509
+ convenience store
510
+ pantyhose
511
+ nail
512
+ koala
513
+ stationary bicycle
514
+ chap
515
+ seafood
516
+ ski
517
+ chicken
518
+ knob
519
+ rose
520
+ gargoyle
521
+ red panda
522
+ toothpaste
523
+ legging (clothing)
524
+ bulldog
525
+ amplifier
526
+ colander
527
+ drill
528
+ suit (clothing)
529
+ tap
530
+ hair curler
531
+ cover
532
+ washbasin
533
+ ratchet
534
+ tablet
535
+ cream pitcher
536
+ dog bed
537
+ subwoofer
538
+ hamburger
539
+ microwave
540
+ lipstick
541
+ picture frame
542
+ scoreboard
543
+ centipede
544
+ asparagus
545
+ dog
546
+ building
547
+ spatula
548
+ cone
549
+ dental floss
550
+ container
551
+ medical equipment
552
+ funnel
553
+ car (automobile)
554
+ training bench
555
+ pocket watch
556
+ megaphone
557
+ bathroom cabinet
558
+ thermos bottle
559
+ crucifix
560
+ shotgun
561
+ insect
562
+ rhinoceros
563
+ popcorn
564
+ crisp (potato chip)
565
+ kitchen sink
566
+ wrench
567
+ bedpan
568
+ cricket ball
569
+ folding chair
570
+ coin
571
+ rubber band
572
+ minivan
573
+ sherbert
574
+ bakset
575
+ cutting/chopping board
576
+ cigarette
577
+ woman
578
+ suit
579
+ pliers
580
+ yogurt
581
+ crape
582
+ beef (food)
583
+ cream
584
+ jumpsuit
585
+ cornet
586
+ steak (food)
587
+ gameboard
588
+ bath mat
589
+ bidet
590
+ door handle
591
+ desk
592
+ campel
593
+ common fig
594
+ wedding cake
595
+ bib
596
+ kitchen knife
597
+ basketball backboard
598
+ skating and skiing shoes
599
+ swimsuit
600
+ tong
601
+ chime
602
+ thermostat
603
+ tablet computer
604
+ passport
605
+ cowboy hat
606
+ whipped cream
607
+ frisbee
608
+ basket
609
+ blazer
610
+ lemonade
611
+ ship
612
+ bouquet
613
+ stretcher
614
+ red cabbage
615
+ other balls
616
+ shaving cream
617
+ battery
618
+ piano
619
+ peanut butter
620
+ gorilla
621
+ falcon
622
+ fashion accessory
623
+ buoy
624
+ ski boot
625
+ ballon
626
+ pendulum
627
+ windshield wiper
628
+ drinking straw
629
+ gargle
630
+ pen/pencil
631
+ eggbeater
632
+ artichoke
633
+ scallop
634
+ cattle
635
+ high heels
636
+ strainer
637
+ race car
638
+ formula 1
639
+ yak
640
+ earplug
641
+ noseband (for animals)
642
+ nuts
643
+ toiletry
644
+ bow tie
645
+ bread
646
+ juice
647
+ faucet
648
+ pizza
649
+ dollhouse
650
+ air conditioner
651
+ nightstand
652
+ overalls (clothing)
653
+ cello
654
+ chair
655
+ easel
656
+ swan
657
+ footwear
658
+ paper towel
659
+ bow and arrow
660
+ chicken (animal)
661
+ futon
662
+ router (computer equipment)
663
+ sofa bed
664
+ orange/tangerine
665
+ salt and pepper shakers
666
+ strap
667
+ toaster
668
+ pew (church bench)
669
+ worm
670
+ beaker
671
+ wolf
672
+ sneakers
673
+ map
674
+ jar
675
+ stirrup
676
+ ferry
677
+ luggage and bags
678
+ canteen
679
+ toilet paper
680
+ candle holder
681
+ blackboard
682
+ cheetah
683
+ card
684
+ batter (food)
685
+ thimble
686
+ bat
687
+ lighthouse
688
+ slipper (footwear)
689
+ cooker
690
+ brassiere
691
+ starfish
692
+ human head
693
+ coleslaw
694
+ parchment
695
+ shoe
696
+ bus (vehicle)
697
+ polo shirt
698
+ pigeon
699
+ mule
700
+ ice cream
701
+ lizard
702
+ parking meter
703
+ fast food
704
+ grinder
705
+ soya milk
706
+ pitchfork
707
+ gasmask
708
+ pitcher (vessel for liquid)
709
+ human beard
710
+ visor
711
+ telephone pole
712
+ jewelry
713
+ chocolate milk
714
+ salami
715
+ candy cane
716
+ bicycle wheel
717
+ grill
718
+ waffle iron
719
+ shelf
720
+ jean
721
+ grapefruit
722
+ pipe bowl
723
+ bell pepper
724
+ human foot
725
+ perfume
726
+ carriage
727
+ fruit
728
+ checkbook
729
+ orange
730
+ tequila
731
+ hatbox
732
+ magpie
733
+ cornmeal
734
+ police cruiser
735
+ stroller
736
+ joystick
737
+ neckerchief
738
+ computer keyboard
739
+ swing
740
+ compass
741
+ bullhorn
742
+ chili (vegetable)
743
+ alligator
744
+ silo
745
+ shirt
746
+ corn
747
+ shower cap
748
+ racket
749
+ camel
750
+ flower
751
+ cocoa (beverage)
752
+ dollar
753
+ mailbox (at home)
754
+ tool
755
+ lamp
756
+ mousepad
757
+ spectacles
758
+ handgun
759
+ lifesaver
760
+ maple
761
+ sink
762
+ pan (for cooking)
763
+ rocking chair
764
+ vulture
765
+ sandal
766
+ school bus
767
+ cap (headwear)
768
+ scarf
769
+ handbag/satchel
770
+ convertible (automobile)
771
+ igniter
772
+ rays and skates
773
+ isopod
774
+ dinosaur
775
+ hardback book
776
+ telephone booth
777
+ cosmetics mirror
778
+ cleansing agent
779
+ rice
780
+ power shovel
781
+ projector
782
+ crutch
783
+ toolbox
784
+ puffin
785
+ cloak
786
+ mandarin orange
787
+ windmill
788
+ softball
789
+ checkerboard
790
+ squid
791
+ armor
792
+ treadmill
793
+ vinegar
794
+ puppy
795
+ paintbrush
796
+ wrench
797
+ tie
798
+ watch
799
+ weathervane
800
+ urinal
801
+ fork
802
+ turban
803
+ headstall (for horses)
804
+ rollerblade
805
+ volleyball
806
+ salsa
807
+ boots
808
+ ring binder
809
+ kilt
810
+ black sheep
811
+ ice pack
812
+ clothespin
813
+ flamingo
814
+ water cooler
815
+ fig (fruit)
816
+ stylus
817
+ tape (sticky cloth or paper)
818
+ crown
819
+ cupcake
820
+ raincoat
821
+ cracker
822
+ cocktail
823
+ ham
824
+ tray
825
+ tambourine
826
+ lightning rod
827
+ missile
828
+ bowl/basin
829
+ towel rack
830
+ crib
831
+ cabin car
832
+ can opener
833
+ magazine
834
+ hair dryer
835
+ tin can
836
+ human ear
837
+ dinghy
838
+ human hand
839
+ zucchini
840
+ ice maker
841
+ van
842
+ plumbing fixture
843
+ bell
844
+ sombrero
845
+ mound (baseball)
846
+ water jug
847
+ egg roll
848
+ notepaper
849
+ fox
850
+ remote control
851
+ straw (for drinking)
852
+ egg yolk
853
+ quiche
854
+ ginger
855
+ motor scooter
856
+ raccoon
857
+ toothpick
858
+ measuring cup
859
+ cape
860
+ rocket
861
+ washing machine
862
+ scissors
863
+ shawl
864
+ food
865
+ sailboat
866
+ street sign
867
+ canned
868
+ clip
869
+ lobster
870
+ coil
871
+ stew
872
+ waste container
873
+ duckling
874
+ hippopotamus
875
+ bobbin
876
+ squash
877
+ cannon
878
+ jug
879
+ cauliflower
880
+ timer
881
+ honey
882
+ whiteboard
883
+ baguet
884
+ pirate flag
885
+ dog collar
886
+ castle
887
+ butter
888
+ goat
889
+ brownie
890
+ television set
891
+ solar array
892
+ paper plate
893
+ ashtray
894
+ lampshade
895
+ trash can
896
+ cake
897
+ paint brush
898
+ dumbbell
899
+ armchair
900
+ fish
901
+ ironing board
902
+ deadbolt
903
+ awning
904
+ banjo
905
+ coffee cup
906
+ eagle
907
+ bridal gown
908
+ sun hat
909
+ girl
910
+ shorts
911
+ crate
912
+ root beer
913
+ elephant
914
+ bracelet
915
+ computer mouse
916
+ table tennis racket
917
+ apron
918
+ airplane
919
+ hookah
920
+ beer can
921
+ green vegetables
922
+ tractor (farm equipment)
923
+ flower arrangement
924
+ bun
925
+ showerhead
926
+ band-aid
927
+ bagel
928
+ pancake
929
+ countertop
930
+ fleece
931
+ human hair
932
+ digital clock
933
+ anklet
934
+ dumpling
935
+ kiwi fruit
936
+ vending machine
937
+ sandwich
938
+ cub (animal)
939
+ manger
940
+ skateboard
941
+ beachball
942
+ watercraft
943
+ salmon (fish)
944
+ bolo tie
945
+ shredder (for paper)
946
+ die
947
+ bow (weapon)
948
+ trophy cup
949
+ hot-air balloon
950
+ sweat pants
951
+ adhesive tape
952
+ atomizer
953
+ parrot
954
+ suspenders
955
+ mallard
956
+ salmon (food)
957
+ paper cutter
958
+ saucer
959
+ moths and butterflies
960
+ snake
961
+ cabinetry
962
+ board eraser
963
+ brass plaque
964
+ wooden spoon
965
+ tennis racket
966
+ vehicle registration plate
967
+ shepherd dog
968
+ boiled egg
969
+ soupspoon
970
+ tinfoil
971
+ chocolate mousse
972
+ mop
973
+ hot sauce
974
+ wallet
975
+ projectile (weapon)
976
+ otter
977
+ egg
978
+ kitten
979
+ cantaloupe
980
+ mouse (computer equipment)
981
+ pin (non jewelry)
982
+ fire alarm
983
+ brush
984
+ lego
985
+ waffle
986
+ land vehicle
987
+ wok
988
+ tissue
989
+ bass horn
990
+ gag
991
+ mascot
992
+ tennis ball
993
+ towel
994
+ antenna
995
+ pajamas
996
+ raven
997
+ shield
998
+ eggplant
999
+ mast
1000
+ bandage
1001
+ cooking spray
1002
+ drum
1003
+ alarm clock
1004
+ martini
1005
+ cabbage
1006
+ saucepan
1007
+ pen
1008
+ auto part
1009
+ lanyard
1010
+ wig
1011
+ syringe
1012
+ vegetable
1013
+ shopping cart
1014
+ avocado
1015
+ pumpkin
1016
+ wardrobe
1017
+ puppet
1018
+ palette
1019
+ wall socket
1020
+ chopping board
1021
+ pretzel
1022
+ window box (for plants)
1023
+ fountain
1024
+ american football
1025
+ pepper
1026
+ bucket
1027
+ harmonium
1028
+ brown bear
1029
+ marker
1030
+ salad plate
1031
+ underdrawers
1032
+ eel
1033
+ earring
1034
+ parakeet
1035
+ blue jay
1036
+ mango
1037
+ accordion
1038
+ coffee table
1039
+ fireplace
1040
+ mallet
1041
+ clock tower
1042
+ passenger car (part of a train)
1043
+ stuffed toy
1044
+ bagpipe
1045
+ infant bed
1046
+ key
1047
+ aircraft
1048
+ lynx
1049
+ jacuzzi
1050
+ cookie
1051
+ dish
1052
+ cigar box
1053
+ induction cooker
1054
+ jacket
1055
+ trailer truck
1056
+ stirrer
1057
+ hammer
1058
+ wine bottle
1059
+ basketball
1060
+ stove
1061
+ caterpillar
1062
+ human body
1063
+ horse carriage
1064
+ garlic
1065
+ door
1066
+ ant
1067
+ pudding
1068
+ penguin
1069
+ human mouth
1070
+ kitchenware
1071
+ sleeping bag
1072
+ eraser
1073
+ drumstick
1074
+ dining table
1075
+ playpen
1076
+ picnic basket
1077
+ knee pad
1078
+ beer
1079
+ microwave oven
1080
+ almond
1081
+ crocodile
1082
+ cymbal
1083
+ banner
1084
+ houseboat
1085
+ jelly bean
1086
+ cheese
1087
+ power plugs and sockets
1088
+ houseplant
1089
+ padlock
1090
+ runner (carpet)
1091
+ office supplies
1092
+ vase
1093
+ canary
1094
+ umbrella
1095
+ crescent roll
1096
+ tower
1097
+ snail
1098
+ radiator
1099
+ blinker
1100
+ skewer
1101
+ dress
1102
+ gun
1103
+ toy
1104
+ tablecloth
1105
+ meat ball
1106
+ gas stove
1107
+ truck
1108
+ pizza cutter
1109
+ dress suit
1110
+ latch
1111
+ gemstone
1112
+ wild bird
1113
+ other fish
1114
+ baseball bat
1115
+ garden hose
1116
+ trousers
1117
+ place mat
1118
+ applesauce
1119
+ ballet skirt
1120
+ remote
1121
+ traffic sign
1122
+ envelope
1123
+ billboard
1124
+ flask
1125
+ octopus (animal)
1126
+ poker card
1127
+ boxing glove
1128
+ binoculars
1129
+ crock pot
1130
+ paperback book
1131
+ swim cap
1132
+ bulletproof vest
1133
+ duffel bag
1134
+ cooking utensil
1135
+ crumb
1136
+ bookcase
1137
+ headboard
1138
+ nut
1139
+ hammock
1140
+ pillow
1141
+ baboon
1142
+ tachometer
1143
+ tiara
1144
+ snowman
1145
+ tag
1146
+ marine mammal
1147
+ teakettle
1148
+ parasail (sports)
1149
+ pastry
1150
+ tent
1151
+ golf club
1152
+ ferret
1153
+ salad
1154
+ power outlet
1155
+ blimp
1156
+ coat
1157
+ sports car
1158
+ candle
1159
+ bow (decorative ribbons)
1160
+ broom
1161
+ snack
1162
+ flagpole
1163
+ side table
1164
+ trampoline
1165
+ bowler hat
1166
+ indoor rower
1167
+ vehicle
1168
+ shellfish
1169
+ table tennis
1170
+ cargo ship
1171
+ pencil
1172
+ freshener
1173
+ heron
1174
+ fire engine
1175
+ persimmon
1176
+ pocketknife
1177
+ jet ski
1178
+ casserole
1179
+ ottoman
1180
+ machinery vehicle
1181
+ coloring material
1182
+ mobile phone
1183
+ goose
1184
+ tape
1185
+ record player
1186
+ bed
1187
+ phonograph record
1188
+ bible
1189
+ pug-dog
1190
+ spotlight
1191
+ chessboard
1192
+ bow-tie
1193
+ griddle
1194
+ armband
1195
+ cart
1196
+ leather shoes
1197
+ cat
1198
+ cock
1199
+ snowboard
1200
+ loveseat
1201
+ rat
1202
+ tick
1203
+ prawn
1204
+ walking cane
1205
+ teacup
1206
+ bead
1207
+ marine invertebrates
1208
+ handcuff
1209
+ bicycle helmet
1210
+ teapot
1211
+ table lamp
1212
+ canister
1213
+ rugby ball
1214
+ surveillance camera
1215
+ pacifier
1216
+ french fries
1217
+ cockroach
1218
+ comic book
1219
+ detergent
1220
+ cookies
1221
+ patty (food)
1222
+ ping-pong ball
1223
+ phonebook
1224
+ shopping bag
1225
+ cigarette case
1226
+ donkey
1227
+ coatrack
1228
+ coffee
1229
+ oil lamp
1230
+ raspberry
1231
+ stairs
1232
+ water bottle
1233
+ shaker
1234
+ cherry
1235
+ grits
1236
+ rifle
1237
+ poster
1238
+ cellular telephone
1239
+ ski parka
1240
+ surfboard
1241
+ pelican
1242
+ saddle blanket
1243
+ microscope
1244
+ willow
1245
+ blanket
1246
+ organ
1247
+ sweet potato
1248
+ wineglass
1249
+ oyster
1250
+ cushion
1251
+ radar
1252
+ whistle
1253
+ blouse
1254
+ lightbulb
1255
+ soap dispenser
1256
+ pie
1257
+ shower head
1258
+ dalmatian
1259
+ milk can
1260
+ gloves
1261
+ handbag
1262
+ seaplane
1263
+ scale
1264
+ fireplug
1265
+ crayon
1266
+ halter top
1267
+ award
1268
+ sea lion
1269
+ calculator
1270
+ street lights
1271
+ camper (vehicle)
1272
+ tapestry
1273
+ traffic light
1274
+ pencil case
1275
+ plume
1276
+ jam
1277
+ bait
1278
+ vat
1279
+ pet
1280
+ first-aid kit
1281
+ tobacco pipe
1282
+ sportswear
1283
+ bus
1284
+ parasol
1285
+ sponge
1286
+ french
1287
+ recliner
1288
+ wheel
1289
+ bottle opener
1290
+ armadillo
1291
+ shrimp
1292
+ bat (animal)
1293
+ aquarium
1294
+ knitting needle
1295
+ dartboard
1296
+ jellyfish
1297
+ milkshake
1298
+ cantaloup
1299
+ cell phone
1300
+ chalice
1301
+ dove
1302
+ life jacket
1303
+ dirt bike
1304
+ horse buggy
1305
+ baozi
1306
+ watermelon
1307
+ crosswalk sign
1308
+ edible corn
1309
+ pot
1310
+ mitten
1311
+ polar bear
1312
+ clock
1313
+ chocolate bar
1314
+ kitchen & dining room table
1315
+ papaya
1316
+ flip-flop (sandal)
1317
+ gondola (boat)
1318
+ wine bucket
1319
+ converter
1320
+ mug
1321
+ butterfly
1322
+ dresser
1323
+ pencil box
1324
+ army tank
1325
+ cash register
1326
+ rolling pin
1327
+ parka
1328
+ scrubbing brush
1329
+ tea
1330
+ bulldozer
1331
+ carpet
1332
+ vodka
1333
+ donut
1334
+ human face
1335
+ whale
1336
+ jeep
1337
+ lemon
1338
+ office building
1339
+ football (american)
1340
+ breechcloth
1341
+ stereo (sound system)
1342
+ crane
1343
+ aerosol can
1344
+ pasta
1345
+ green beans
1346
+ vent
1347
+ speaker (stero equipment)
1348
+ jaguar
1349
+ facial tissue holder
1350
+ blueberry
1351
+ wood-burning stove
1352
+ ball
1353
+ generator
1354
+ lab coat
1355
+ beer bottle
1356
+ necktie
1357
+ shark
1358
+ lavender
1359
+ blackberry
1360
+ hurdle
1361
+ bedspread
1362
+ plum
1363
+ keyboard
1364
+ grape
1365
+ cylinder
1366
+ roller skates
1367
+ tabasco sauce
1368
+ home plate (baseball)
1369
+ guitar
1370
+ rickshaw
1371
+ sports equipment
1372
+ extention cord
1373
+ orange (fruit)
1374
+ bottle
1375
+ human eye
1376
+ trunk
1377
+ skyscraper
1378
+ needle
1379
+ jewel
1380
+ shower
1381
+ sandals
1382
+ pool table
1383
+ man
1384
+ closet
1385
+ gravestone
1386
+ coverall
1387
+ sweatband
1388
+ horn
1389
+ globe
1390
+ seahorse
1391
+ helmet
1392
+ kitchen table
1393
+ balloon
1394
+ music stool
1395
+ lasagna
1396
+ guacamole
1397
+ spice rack
1398
+ legume
1399
+ sling (bandage)
1400
+ diaper
1401
+ coffee maker
1402
+ lamppost
1403
+ pinecone
1404
+ gondola
1405
+ owl
1406
+ luggage
1407
+ diary
1408
+ dessert
1409
+ okra
1410
+ crow
1411
+ tomato
1412
+ peach
1413
+ bear
1414
+ bee
1415
+ sewing machine
1416
+ shower curtain
1417
+ birdfeeder
1418
+ sweatshirt
1419
+ table-tennis table
1420
+ submarine
1421
+ water heater
1422
+ oar
1423
+ drawer
1424
+ balance beam
1425
+ slippers
1426
+ liquor
1427
+ dishtowel
1428
+ stapler (stapling machine)
1429
+ jersey
1430
+ wreath
1431
+ tape measur/ ruler
1432
+ trolley
1433
+ light bulb
1434
+ lantern
1435
+ raft
1436
+ identity card
1437
+ car battery
1438
+ sawhorse
1439
+ coffee machine
1440
+ camera
1441
+ triangle (musical instrument)
1442
+ oven
1443
+ rodent
1444
+ durian
1445
+ weapon
1446
+ doormat
1447
+ belt buckle
1448
+ figurine
1449
+ chest of drawers
1450
+ heavy truck
1451
+ storage box
1452
+ skiboard
1453
+ submarine sandwich
1454
+ tank
1455
+ mirror
1456
+ pickle
1457
+ ski pole
1458
+ bath towel
1459
+ wine
1460
+ window
1461
+ golf cart
1462
+ underwear
1463
+ water scooter
1464
+ stethoscope
1465
+ sea turtle
1466
+ alpaca
1467
+ seashell
1468
+ hanger
1469
+ alcohol
1470
+ clipboard
1471
+ sled
1472
+ plastic bag
1473
+ other shoes
1474
+ pipe
1475
+ dishwasher detergent
1476
+ chainsaw
1477
+ fire hose
1478
+ tuba
1479
+ manatee
1480
+ pottery
1481
+ camcorder
1482
+ swimming pool
1483
+ couch
1484
+ scarecrow
1485
+ bowling equipment
1486
+ lighter
1487
+ heater
1488
+ kayak
1489
+ leather
1490
+ folder
1491
+ lily
1492
+ flannel
1493
+ scraper
1494
+ crawfish
1495
+ knife
1496
+ ipod
1497
+ slide
1498
+ pistol
1499
+ tiger
1500
+ freight car
1501
+ fishing rod
1502
+ mattress
1503
+ kimono
1504
+ speaker
1505
+ costume
1506
+ rice cooker
1507
+ watering can
1508
+ goggles
1509
+ drum (musical instrument)
1510
+ pear
1511
+ wall clock
1512
+ sour cream
1513
+ pegboard
1514
+ masher
1515
+ hook
1516
+ printer
1517
+ sugarcane (plant)
1518
+ torch
1519
+ blinder (for horses)
1520
+ cufflink
1521
+ vest
1522
+ dishrag
1523
+ headscarf
1524
+ earrings
1525
+ brussels sprouts
1526
+ dragonfly
1527
+ porcupine
1528
+ hand glass
1529
+ mushroom
1530
+ cake stand
1531
+ christmas tree
1532
+ sparrow
1533
+ lamb-chop
1534
+ machine gun
1535
+ motorcycle
1536
+ unicycle
1537
+ broccoli
1538
+ saltshaker
1539
+ saddlebag
1540
+ hedgehog
1541
+ grocery bag
1542
+ pad
1543
+ seat belt
1544
+ squid (food)
1545
+ panda
1546
+ computer box
1547
+ spring rolls
1548
+ soccer
1549
+ miniskirt
1550
+ stop sign
1551
+ potted plant
1552
+ kennel
1553
+ stagecoach
1554
+ dustpan
1555
+ lawn mower
1556
+ giant panda
1557
+ beanbag
1558
+ mail slot
1559
+ satchel
1560
+ street light
1561
+ suitcase
1562
+ seabird
1563
+ beetle
1564
+ corkscrew
1565
+ crab (animal)
1566
+ escargot
1567
+ steak knife
1568
+ binder
1569
+ tights (clothing)
1570
+ food processor
1571
+ plate
1572
+ quilt
1573
+ button
1574
+ cow
1575
+ shovel
1576
+ plow (farm equipment)
1577
+ cat furniture
1578
+ toaster oven
1579
+ mask
1580
+ barrette
1581
+ stapler
1582
+ paddle
1583
+ jet plane
1584
+ mixer (kitchen tool)
1585
+ train
1586
+ locker
1587
+ ferris wheel
1588
+ television camera
1589
+ birdhouse
1590
+ flashlight
1591
+ grizzly
1592
+ dagger
1593
+ tassel
1594
+ sheep
1595
+ fax
1596
+ condiment
1597
+ cappuccino
1598
+ dixie cup
1599
+ candy
1600
+ sculpture
1601
+ kitchen utensil
1602
+ crossbar
1603
+ sofa
1604
+ quesadilla
1605
+ beret
1606
+ cue
1607
+ bread-bin
1608
+ frying pan
1609
+ beehive
1610
+ clasp
1611
+ spear
1612
+ turtle
1613
+ football
1614
+ taco
1615
+ clementine
1616
+ ring
1617
+ doll
1618
+ tank (storage vessel)
1619
+ space shuttle
1620
+ manhole
1621
+ octopus (food)
1622
+ hotplate
1623
+ eclair
1624
+ videotape
1625
+ kangaroo
1626
+ piggy bank
1627
+ sunflower
1628
+ pony
1629
+ paperweight
1630
+ cup
1631
+ band aid
1632
+ clarinet
1633
+ garbage
1634
+ hog
1635
+ yacht
1636
+ meatball
1637
+ inkpad
1638
+ eyepatch
1639
+ face powder
1640
+ house
1641
+ hose
1642
+ tote bag
1643
+ cardigan
1644
+ veil
1645
+ wagon
1646
+ headband
1647
+ harbor seal
1648
+ cassette deck
1649
+ snowmobile
1650
+ screwdriver
1651
+ cd player
1652
+ dice
1653
+ skunk
1654
+ omelet
1655
+ router/modem
1656
+ bookmark
1657
+ backpack
1658
+ cabana
1659
+ laptop computer
1660
+ poker chip
1661
+ life buoy
1662
+ potholder
1663
+ human arm
1664
+ football helmet
1665
+ broach
1666
+ sharpener
1667
+ hot dog
1668
+ harp
1669
+ hairpin
1670
+ windsock
1671
+ crouton
1672
+ receipt
1673
+ business card
1674
+ dolphin
1675
+ spoon
1676
+ shears
1677
+ dish antenna
1678
+ money
1679
+ home appliance
1680
+ ax
langground/objs/yolo.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorcycle
5
+ airplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ couch
59
+ potted plant
60
+ bed
61
+ dining table
62
+ toilet
63
+ tv
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
langground/orch.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .localizer import build_localizer
2
+ from .llm import LLM
3
+ from .utils import image_w_box
4
+ import numpy as np
5
+
6
+ class LangGround:
7
+
8
+ def __init__(self, loc_model="owl", llm_model="Qwen/Qwen2.5-7B-Instruct"):
9
+
10
+ self.loc = build_localizer(loc_model)
11
+ self.llm = LLM(llm_model)
12
+
13
+ def localize(self, frame, question, **kwargs):
14
+
15
+ frame = np.array(frame)
16
+ objxbox = self.loc.localize(frame, kwargs.get("threshold", 0.5))
17
+ locobjs = self.llm.answer(question, objxbox.keys())
18
+ locobjxbox = {k: v for k, v in objxbox.items() if k in locobjs}
19
+ all_box_image = image_w_box(frame, objxbox)
20
+ llm_box_image = image_w_box(frame, locobjxbox)
21
+ texts = [(text, str(idx)) for idx, text in enumerate(locobjs)]
22
+ return texts, all_box_image, llm_box_image
langground/utils.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import supervision as sv
3
+ import numpy as np
4
+ from torch import tensor
5
+ import cv2
6
+
7
+ colors = sv.ColorPalette.from_hex(
8
+ [
9
+ "#a1c9f4",
10
+ "#ffb482",
11
+ "#8de5a1",
12
+ "#ff9f9b",
13
+ "#d0bbff",
14
+ "#debb9b",
15
+ "#fab0e4",
16
+ "#cfcfcf",
17
+ "#fffea3",
18
+ "#b9f2f0",
19
+ "#a1c9f4",
20
+ "#ffb482",
21
+ "#8de5a1",
22
+ "#ff9f9b",
23
+ "#d0bbff",
24
+ "#debb9b",
25
+ "#fab0e4",
26
+ "#cfcfcf",
27
+ "#fffea3",
28
+ "#b9f2f0",
29
+ ]
30
+ )
31
+
32
+ text_palette = {str(idx): colors.by_idx(idx).as_hex() for idx in range(50)}
33
+
34
+
35
+ def image_w_box(image,objxbox):
36
+
37
+ box_annotator = sv.BoxCornerAnnotator(thickness=10, corner_length=30, color=colors)
38
+ label_annotator = sv.LabelAnnotator(color=colors)
39
+ mask_annotator = sv.MaskAnnotator(opacity=0.2, color=colors)
40
+
41
+ xyxys = np.array([v.tolist() for boxes in objxbox.values() for v in boxes])
42
+ unique_labels = sorted(objxbox.keys())
43
+ class_id_map = dict(enumerate(unique_labels))
44
+ labels = [l for l, boxes in objxbox.items() for _ in boxes]
45
+ class_id = [list(class_id_map.values()).index(label) for label in labels]
46
+
47
+ masks = np.zeros((len(xyxys), image.shape[0], image.shape[1]), dtype=bool)
48
+ for i, (x1, y1, x2, y2) in enumerate(xyxys):
49
+ masks[i, int(y1):int(y2), int(x1):int(x2)] = labels[i]
50
+
51
+ if len(xyxys) == 0:
52
+ return image
53
+ detections = sv.Detections(
54
+ xyxy=xyxys,
55
+ mask=masks,
56
+ class_id=np.array(class_id),
57
+ )
58
+ # Convert RGB to BGR for annotation
59
+ image_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
60
+ # After annotation, convert back to RGB
61
+ annotated_image = box_annotator.annotate(scene=image_bgr.copy(), detections=detections)
62
+ annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=labels)
63
+ annotated_image = mask_annotator.annotate(scene=annotated_image, detections=detections)
64
+
65
+ return cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
66
+
67
+
68
+ def image_w_box_cv2(image, objxbox):
69
+ if not isinstance(image, np.ndarray):
70
+ raise ValueError("Input image must be a NumPy array.")
71
+
72
+ image_copy = image.copy()
73
+
74
+ font = cv2.FONT_HERSHEY_SIMPLEX
75
+
76
+ height, width, _ = image.shape
77
+ font_scale = max(0.5, min(width, height) / 1000)
78
+ font_thickness = max(1, int(font_scale * 2))
79
+
80
+ for label, boxes in objxbox.items():
81
+ for box in boxes:
82
+ print("box", box)
83
+
84
+ x1, y1, x2, y2 = map(int, box.tolist())
85
+
86
+ cv2.rectangle(image_copy, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
87
+
88
+ label_text = f"{label}"
89
+
90
+ (text_width, text_height), baseline = cv2.getTextSize(
91
+ label_text, font, font_scale, font_thickness
92
+ )
93
+
94
+ text_x1 = x1
95
+ text_y1 = y1 - text_height - baseline
96
+ text_x2 = x1 + text_width
97
+ text_y2 = y1
98
+
99
+ cv2.rectangle(image_copy, (text_x1, text_y1), (text_x2, text_y2), color=(255, 255, 255), thickness=-1)
100
+
101
+ cv2.putText(
102
+ image_copy,
103
+ label_text,
104
+ (x1, y1 - baseline),
105
+ font,
106
+ font_scale,
107
+ color=(0, 0, 255),
108
+ thickness=font_thickness,
109
+ lineType=cv2.LINE_AA,
110
+ )
111
+
112
+ return image_copy
113
+
114
+ if __name__ == '__main__':
115
+ image = Image.open("assets/demo.jpeg")
116
+ objxbox = {'computer monitor': [tensor([ 169.5367, 301.8970, 3045.2866, 2145.4736], device='cuda:0')], 'lamp': [tensor([3400.5979, 981.1383, 4102.7178, 2417.0103], device='cuda:0')], 'kettle': [tensor([4435.6953, 1981.3882, 5318.8530, 2972.8535], device='cuda:0')], 'table': [tensor([3108.2896, 2602.6494, 5795.3037, 4201.5000], device='cuda:0')], 'business card': [tensor([ 751.5681, 2817.4629, 945.1781, 2976.9883], device='cuda:0')], 'dog': [tensor([2155.5217, 2504.7114, 2562.2791, 3173.9731], device='cuda:0'), tensor([1013.7704, 2669.0864, 1560.3319, 3452.0579], device='cuda:0')], 'inkpad': [tensor([ 755.5402, 2983.9380, 962.8440, 3176.2158], device='cuda:0')], 'mouse': [tensor([2752.5286, 3038.9062, 3046.8740, 3297.1704], device='cuda:0')], 'tray': [tensor([3314.1667, 2722.6509, 4805.7476, 3684.2314], device='cuda:0')], 'computer keyboard': [tensor([ 203.7615, 2907.8442, 737.0474, 3416.8616], device='cuda:0')], 'laptop': [tensor([ 525.8097, 2439.1343, 2882.1917, 4261.9614], device='cuda:0')], 'keyboard': [tensor([ 659.9836, 3511.1763, 2828.9368, 4271.0059], device='cuda:0')], 'cookie': [tensor([4638.1128, 3625.8831, 5082.5796, 4013.4021], device='cuda:0')]}
117
+ image_w_box(image, objxbox).show()
pyproject.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "langground"
7
+ version = "0.1.0"
8
+ description = "Use natural language to ground relevant things."
9
+ readme = "README.md"
10
+ authors = [
11
+ {name = "Jing Bi", email = "jbi5@ur.rochester.edu"},
12
+ {name = "Guangyu Sun", email = "guangyu@ucf.edu"}
13
+ ]
14
+ requires-python = ">=3.8"
15
+ dynamic = ["dependencies"]
16
+
17
+ [tool.setuptools.dynamic]
18
+ dependencies = {file = ["requirements.txt"]}
19
+
20
+ [tool.setuptools.packages.find]
21
+ where = ["."]
22
+ include = ["langground", "langground.*"]
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ opencv-python
4
+ tenacity
5
+ accelerate
6
+ pillow
7
+ scipy
8
+ gradio
9
+ supervision
10
+ ultralytics
11
+ einops
12
+ timm