Mileena taesiri commited on
Commit
5b0b9f1
Β·
0 Parent(s):

Duplicate from taesiri/CLIPxGamePhysics

Browse files

Co-authored-by: taesiri <taesiri@users.noreply.huggingface.co>

Files changed (5) hide show
  1. .gitattributes +27 -0
  2. README.md +39 -0
  3. SimSearch.py +46 -0
  4. app.py +253 -0
  5. requirements.txt +9 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: CLIP GamePhysics
3
+ emoji: 🚚
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.0.5
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: taesiri/CLIPxGamePhysics
11
+ ---
12
+
13
+ # Configuration
14
+
15
+ `title`: _string_
16
+ Display title for the Space
17
+
18
+ `emoji`: _string_
19
+ Space emoji (emoji-only character allowed)
20
+
21
+ `colorFrom`: _string_
22
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
23
+
24
+ `colorTo`: _string_
25
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
26
+
27
+ `sdk`: _string_
28
+ Can be either `gradio` or `streamlit`
29
+
30
+ `sdk_version` : _string_
31
+ Only applicable for `streamlit` SDK.
32
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
33
+
34
+ `app_file`: _string_
35
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code).
36
+ Path is relative to the root of the repository.
37
+
38
+ `pinned`: _boolean_
39
+ Whether the Space stays on top of your list.
SimSearch.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+
4
+ class FaissNeighbors:
5
+ def __init__(self):
6
+ self.index = None
7
+ self.y = None
8
+
9
+ def fit(self, X, y):
10
+ self.index = faiss.IndexFlatL2(X.shape[1])
11
+ self.index.add(X.astype(np.float32))
12
+ self.y = y
13
+
14
+ def get_distances_and_indices(self, X, top_K=1000):
15
+ distances, indices = self.index.search(X.astype(np.float32), k=top_K)
16
+ return np.copy(distances), np.copy(indices), np.copy(self.y[indices])
17
+
18
+ def get_nearest_labels(self, X, top_K=1000):
19
+ distances, indices = self.index.search(X.astype(np.float32), k=top_K)
20
+ return np.copy(self.y[indices])
21
+
22
+
23
+ class FaissCosineNeighbors:
24
+ def __init__(self):
25
+ self.cindex = None
26
+ self.y = None
27
+
28
+ def fit(self, X, y):
29
+ self.cindex = faiss.index_factory(X.shape[1], "Flat", faiss.METRIC_INNER_PRODUCT)
30
+ X = np.copy(X)
31
+ X = X.astype(np.float32)
32
+ faiss.normalize_L2(X)
33
+ self.cindex.add(X)
34
+ self.y = y
35
+
36
+ def get_distances_and_indices(self, Q, topK):
37
+ Q = np.copy(Q)
38
+ faiss.normalize_L2(Q)
39
+ distances, indices = self.cindex.search(Q.astype(np.float32), k=topK)
40
+ return np.copy(distances), np.copy(indices), np.copy(self.y[indices])
41
+
42
+ def get_nearest_labels(self, Q, topK=1000):
43
+ Q = np.copy(Q)
44
+ faiss.normalize_L2(Q)
45
+ distances, indices = self.cindex.search(Q.astype(np.float32), k=topK)
46
+ return np.copy(self.y[indices])
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+ import pickle
4
+ import random
5
+ import sys
6
+ from collections import Counter
7
+ from glob import glob
8
+
9
+ import clip
10
+ import gdown
11
+ import gradio as gr
12
+ import numpy as np
13
+ import psutil
14
+ import torch
15
+ import torchvision
16
+ from datasets import load_dataset
17
+ from tqdm import tqdm
18
+
19
+ from SimSearch import FaissCosineNeighbors
20
+
21
+ csv.field_size_limit(sys.maxsize)
22
+
23
+ # Download Embeddings
24
+ gdown.cached_download(
25
+ url="https://huggingface.co/datasets/taesiri/GTA_V_CLIP_Embeddings/resolve/main/mini-GTA-V-Embeddings.zip",
26
+ path="./GTA-V-Embeddings.zip",
27
+ quiet=False,
28
+ md5="b1228503d5a89eef7e35e2cbf86b2fc0",
29
+ )
30
+
31
+ # EXTRACT
32
+ torchvision.datasets.utils.extract_archive(
33
+ from_path="GTA-V-Embeddings.zip",
34
+ to_path="Embeddings/VIT32/",
35
+ remove_finished=False,
36
+ )
37
+
38
+ # Load videos from Dataset
39
+ gta_v_videos = load_dataset("taesiri/GamePhysics_Grand_Theft_Auto_V")
40
+ post_id_to_video_path = {
41
+ os.path.splitext(os.path.basename(x))[0]: x
42
+ for x in gta_v_videos["Grand_Theft_Auto_V"][:]["video_file_path"]
43
+ }
44
+ # Initialize CLIP model
45
+ clip.available_models()
46
+
47
+
48
+ # Log runtime environment info
49
+ def log_runtime_information():
50
+ print(f"CPU Count: {psutil.cpu_count()}")
51
+ print(f"Virtual Memory: {psutil.virtual_memory()}")
52
+ print(f"Swap Memory: {psutil.swap_memory()}")
53
+
54
+
55
+ # # Searcher
56
+ class GamePhysicsSearcher:
57
+ def __init__(self, CLIP_MODEL, GAME_NAME, EMBEDDING_PATH="./Embeddings/VIT32/"):
58
+ self.CLIP_MODEL = CLIP_MODEL
59
+ self.GAME_NAME = GAME_NAME
60
+ self.simsearcher = FaissCosineNeighbors()
61
+
62
+ self.all_embeddings = glob(f"{EMBEDDING_PATH}{self.GAME_NAME}/*.npy")
63
+
64
+ self.filenames = [os.path.basename(x) for x in self.all_embeddings]
65
+ self.file_to_class_id = {x: i for i, x in enumerate(self.filenames)}
66
+ self.class_id_to_file = {i: x for i, x in enumerate(self.filenames)}
67
+ self.build_index()
68
+
69
+ def read_features(self, file_path):
70
+ with open(file_path, "rb") as f:
71
+ video_features = pickle.load(f)
72
+ return video_features
73
+
74
+ def read_all_features(self):
75
+ features = {}
76
+ filenames_extended = []
77
+
78
+ X_train = []
79
+ y_train = []
80
+
81
+ for i, vfile in enumerate(tqdm(self.all_embeddings)):
82
+ vfeatures = self.read_features(vfile)
83
+ features[vfile.split("/")[-1]] = vfeatures
84
+ X_train.extend(vfeatures)
85
+ y_train.extend([i] * vfeatures.shape[0])
86
+ filenames_extended.extend(vfeatures.shape[0] * [vfile.split("/")[-1]])
87
+
88
+ X_train = np.asarray(X_train)
89
+ y_train = np.asarray(y_train)
90
+
91
+ return X_train, y_train
92
+
93
+ def build_index(self):
94
+ X_train, y_train = self.read_all_features()
95
+ self.simsearcher.fit(X_train, y_train)
96
+
97
+ def text_to_vector(self, query):
98
+ text_tokens = clip.tokenize(query)
99
+ with torch.no_grad():
100
+ text_features = self.CLIP_MODEL.encode_text(text_tokens).float()
101
+ text_features /= text_features.norm(dim=-1, keepdim=True)
102
+ return text_features
103
+
104
+ # Source: https://stackoverflow.com/a/480227
105
+ def f7(self, seq):
106
+ seen = set()
107
+ seen_add = seen.add # This is for performance improvement, don't remove
108
+ return [x for x in seq if not (x in seen or seen_add(x))]
109
+
110
+ def search_top_k(self, q, k=5, pool_size=1000, search_mod="Majority"):
111
+ q = self.text_to_vector(q)
112
+ nearest_data_points = self.simsearcher.get_nearest_labels(q, pool_size)
113
+
114
+ if search_mod == "Majority":
115
+ topKs = [x[0] for x in Counter(nearest_data_points[0]).most_common(k)]
116
+ elif search_mod == "Top-K":
117
+ topKs = list(self.f7(nearest_data_points[0]))[:k]
118
+
119
+ video_filename = [
120
+ post_id_to_video_path[self.class_id_to_file[x].replace(".npy", "")]
121
+ for x in topKs
122
+ ]
123
+
124
+ return video_filename
125
+
126
+
127
+ ################ SEARCH CORE ################
128
+ # CRAETE CLIP MODEL
129
+ vit_model, vit_preprocess = clip.load("ViT-B/32")
130
+ vit_model.eval()
131
+
132
+ saved_searchers = {}
133
+
134
+
135
+ def gradio_search(query, game_name, selected_model, aggregator, pool_size, k=6):
136
+ # print(query, game_name, selected_model, aggregator, pool_size)
137
+ if f"{game_name}_{selected_model}" in saved_searchers.keys():
138
+ searcher = saved_searchers[f"{game_name}_{selected_model}"]
139
+ else:
140
+ if selected_model == "ViT-B/32":
141
+ model = vit_model
142
+ searcher = GamePhysicsSearcher(CLIP_MODEL=model, GAME_NAME=game_name)
143
+ else:
144
+ raise
145
+
146
+ saved_searchers[f"{game_name}_{selected_model}"] = searcher
147
+
148
+ results = []
149
+ relevant_videos = searcher.search_top_k(
150
+ query, k=k, pool_size=pool_size, search_mod=aggregator
151
+ )
152
+
153
+ params = ", ".join(
154
+ map(str, [query, game_name, selected_model, aggregator, pool_size])
155
+ )
156
+ results.append(params)
157
+
158
+ for v in relevant_videos:
159
+ results.append(v)
160
+ sid = v.split("/")[-1].split(".")[0]
161
+ results.append(
162
+ f'<a href="https://www.reddit.com/r/GamePhysics/comments/{sid}/" target="_blank">Link to the post</a>'
163
+ )
164
+
165
+ print(f"found {len(results)} results")
166
+ return results
167
+
168
+
169
+ def main():
170
+ list_of_games = ["Grand Theft Auto V"]
171
+
172
+ # GRADIO APP
173
+ main = gr.Interface(
174
+ fn=gradio_search,
175
+ inputs=[
176
+ gr.Textbox(
177
+ lines=1,
178
+ placeholder="Search Query",
179
+ value="A person flying in the air",
180
+ label="Query",
181
+ ),
182
+ gr.Radio(list_of_games, label="Game To Search"),
183
+ gr.Radio(["ViT-B/32"], label="MODEL"),
184
+ gr.Radio(["Majority", "Top-K"], label="Aggregator"),
185
+ gr.Slider(300, 2000, label="Pool Size", value=1000),
186
+ ],
187
+ outputs=[
188
+ gr.Textbox(type="auto", label="Search Params"),
189
+ gr.Video(type="mp4", label="Result 1"),
190
+ gr.Markdown(),
191
+ gr.Video(type="mp4", label="Result 2"),
192
+ gr.Markdown(),
193
+ gr.Video(type="mp4", label="Result 3"),
194
+ gr.Markdown(),
195
+ gr.Video(type="mp4", label="Result 4"),
196
+ gr.Markdown(),
197
+ gr.Video(type="mp4", label="Result 5"),
198
+ gr.Markdown(),
199
+ ],
200
+ examples=[
201
+ ["A red car", list_of_games[0], "ViT-B/32", "Top-K", 1000],
202
+ ["A person wearing pink", list_of_games[0], "ViT-B/32", "Top-K", 1000],
203
+ ["A car flying in the air", list_of_games[0], "ViT-B/32", "Majority", 1000],
204
+ [
205
+ "A person flying in the air",
206
+ list_of_games[0],
207
+ "ViT-B/32",
208
+ "Majority",
209
+ 1000,
210
+ ],
211
+ [
212
+ "A car in vertical position",
213
+ list_of_games[0],
214
+ "ViT-B/32",
215
+ "Majority",
216
+ 1000,
217
+ ],
218
+ ["A bike inside a car", list_of_games[0], "ViT-B/32", "Majority", 1000],
219
+ ["A bike on a wall", list_of_games[0], "ViT-B/32", "Majority", 1000],
220
+ ["A car stuck in a rock", list_of_games[0], "ViT-B/32", "Majority", 1000],
221
+ ["A car stuck in a tree", list_of_games[0], "ViT-B/32", "Majority", 1000],
222
+ ],
223
+ )
224
+
225
+ blocks = gr.Blocks()
226
+ with blocks:
227
+ gr.Markdown(
228
+ """
229
+ # CLIP + GamePhysics - Searching dataset of Gameplay bugs
230
+
231
+ This demo shows how to use the CLIP model to search for gameplay bugs in a video game.
232
+
233
+ Enter your query and select the game you want to search for.
234
+ """
235
+ )
236
+
237
+ gr.Markdown(
238
+ """
239
+ [Website](https://asgaardlab.github.io/CLIPxGamePhysics/) - [Paper](https://arxiv.org/abs/2203.11096)
240
+ """
241
+ )
242
+
243
+ gr.TabbedInterface([main], ["GTA V Demo"])
244
+
245
+ blocks.launch(
246
+ debug=True,
247
+ enable_queue=True,
248
+ )
249
+
250
+
251
+ if __name__ == "__main__":
252
+ log_runtime_information()
253
+ main()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ tqdm
4
+ gdown
5
+ pandas
6
+ clip @ git+https://github.com/openai/CLIP.git@573315e83f07b53a61ff5098757e8fc885f1703e
7
+ faiss-cpu==1.7.2
8
+ psutil
9
+ numpy