taesiri commited on
Commit
f5b5741
1 Parent(s): b0bbd84

using gdwon as a downloader

Browse files
Files changed (2) hide show
  1. app.py +194 -150
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,97 +1,112 @@
1
- import numpy as np
2
- import torch
3
- from tqdm import tqdm
4
- import clip
5
- from glob import glob
6
- import gradio as gr
7
  import os
8
- import torchvision
9
  import pickle
10
  from collections import Counter
 
 
 
 
 
 
 
 
 
11
 
12
  from SimSearch import FaissCosineNeighbors
13
 
14
  # DOWNLOAD THE DATASET and Files
15
-
16
- torchvision.datasets.utils.download_file_from_google_drive('1kB1vNdVaNS1OGZ3K8BspBUKkPACCsnrG', '.', 'GTAV-Videos.zip')
17
- torchvision.datasets.utils.download_file_from_google_drive('1IF1ljcoFd31C-PA2SO8F5fEblDYf0Bw6', '.', 'GTAV-embedding-vit32.zip')
 
 
 
 
 
18
 
19
  # EXTRACT
20
- torchvision.datasets.utils.extract_archive(from_path='GTAV-embedding-vit32.zip', to_path='Embeddings/VIT32/', remove_finished=False)
21
- torchvision.datasets.utils.extract_archive(from_path='GTAV-Videos.zip', to_path='Videos/', remove_finished=False)
 
 
 
 
 
 
22
 
23
  # Initialize CLIP model
24
  clip.available_models()
25
 
26
  # # Searcher
27
-
28
  class GamePhysicsSearcher:
29
- def __init__(self, CLIP_MODEL, GAME_NAME, EMBEDDING_PATH='./Embeddings/VIT32/'):
30
- self.CLIP_MODEL = CLIP_MODEL
31
- self.GAME_NAME = GAME_NAME
32
- self.simsearcher = FaissCosineNeighbors()
33
-
34
- self.all_embeddings = glob(f'{EMBEDDING_PATH}{self.GAME_NAME}/*.npy')
35
-
36
- self.filenames = [os.path.basename(x) for x in self.all_embeddings]
37
- self.file_to_class_id = {x:i for i, x in enumerate(self.filenames)}
38
- self.class_id_to_file = {i:x for i, x in enumerate(self.filenames)}
39
- self.build_index()
40
-
41
- def read_features(self, file_path):
42
- with open(file_path, 'rb') as f:
43
- video_features = pickle.load(f)
44
- return video_features
45
-
46
- def read_all_features(self):
47
- features = {}
48
- filenames_extended = []
49
-
50
- X_train = []
51
- y_train = []
52
-
53
- for i, vfile in enumerate(tqdm(self.all_embeddings)):
54
- vfeatures = self.read_features(vfile)
55
- features[vfile.split('/')[-1]] = vfeatures
56
- X_train.extend(vfeatures)
57
- y_train.extend([i]*vfeatures.shape[0])
58
- filenames_extended.extend(vfeatures.shape[0]*[vfile.split('/')[-1]])
59
-
60
- X_train = np.asarray(X_train)
61
- y_train = np.asarray(y_train)
62
-
63
- return X_train, y_train
64
-
65
- def build_index(self):
66
- X_train, y_train = self.read_all_features()
67
- self.simsearcher.fit(X_train, y_train)
68
-
69
- def text_to_vector(self, query):
70
- text_tokens = clip.tokenize(query)
71
- with torch.no_grad():
72
- text_features = self.CLIP_MODEL.encode_text(text_tokens).float()
73
- text_features /= text_features.norm(dim=-1, keepdim=True)
74
- return text_features
75
-
76
- # Source: https://stackoverflow.com/a/480227
77
- def f7(self, seq):
78
- seen = set()
79
- seen_add = seen.add # This is for performance improvement, don't remove
80
- return [x for x in seq if not (x in seen or seen_add(x))]
81
-
82
- def search_top_k(self, q, k=5, pool_size=1000, search_mod='Majority'):
83
- q = self.text_to_vector(q)
84
- nearest_data_points = self.simsearcher.get_nearest_labels(q, pool_size)
85
-
86
- if search_mod == 'Majority':
87
- topKs = [x[0] for x in Counter(nearest_data_points[0]).most_common(k)]
88
- elif search_mod == 'Top-K':
89
- topKs = list(self.f7(nearest_data_points[0]))[:k]
90
-
91
- video_filename = [f'./Videos/{self.GAME_NAME}/' + self.class_id_to_file[x].replace('npy', 'mp4') for x in topKs]
92
-
93
- return video_filename
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
  ################ SEARCH CORE ################
@@ -100,77 +115,106 @@ vit_model, vit_preprocess = clip.load("ViT-B/32")
100
  vit_model.eval()
101
 
102
  saved_searchers = {}
 
 
103
  def gradio_search(query, game_name, selected_model, aggregator, pool_size, k=6):
104
- # print(query, game_name, selected_model, aggregator, pool_size)
105
- if f'{game_name}_{selected_model}' in saved_searchers.keys():
106
- searcher = saved_searchers[f'{game_name}_{selected_model}']
107
- else:
108
- if selected_model == 'ViT-B/32':
109
- model = vit_model
110
- searcher = GamePhysicsSearcher(CLIP_MODEL=model, GAME_NAME=game_name)
111
  else:
112
- raise
113
-
114
- saved_searchers[f'{game_name}_{selected_model}'] = searcher
115
-
116
- results = []
117
- relevant_videos = searcher.search_top_k(query, k=k, pool_size=pool_size, search_mod=aggregator)
118
-
119
-
120
- params = ', '.join(map(str, [query, game_name, selected_model, aggregator, pool_size]))
121
- results.append(params)
122
-
123
- for v in relevant_videos:
124
- results.append(v)
125
- sid = v.split('/')[-1].split('.')[0]
126
- results.append(f"https://www.reddit.com/r/GamePhysics/comments/{sid}/")
127
- return results
128
-
129
- list_of_games = ['Grand Theft Auto V']
130
-
131
-
132
- title = "CLIP + GamePhysics - Searching dataset of Gameplay bugs"
133
- description = "Enter your query and select the game you want to search. The results will be displayed in the console."
134
- article = """
135
- This demo shows how to use the CLIP model to search for gameplay bugs in a video game.
136
- """
137
-
138
- # GRADIO APP
139
- iface = gr.Interface(fn=gradio_search,
140
- inputs =[ gr.inputs.Textbox(lines=1, placeholder='Search Query', default="A person flying in the air", label=None),
141
- gr.inputs.Radio(list_of_games, label="Game To Search"),
142
- gr.inputs.Radio(['ViT-B/32'], label="MODEL"),
143
- gr.inputs.Radio(['Majority', 'Top-K'], label="Aggregator"),
144
- gr.inputs.Slider(300, 2000, label="Pool Size", default=1000),
145
- ],
146
- outputs=[
147
- gr.outputs.Textbox(type="auto", label='Search Params'),
148
- gr.outputs.Video(type='mp4', label='Result 1'),
149
- gr.outputs.Textbox(type="auto", label='Submission URL - Result 1'),
150
- gr.outputs.Video(type='mp4', label='Result 2'),
151
- gr.outputs.Textbox(type="auto", label='Submission URL - Result 2'),
152
- gr.outputs.Video(type='mp4', label='Result 3'),
153
- gr.outputs.Textbox(type="auto", label='Submission URL - Result 3'),
154
- gr.outputs.Video(type='mp4', label='Result 4'),
155
- gr.outputs.Textbox(type="auto", label='Submission URL - Result 4'),
156
- gr.outputs.Video(type='mp4', label='Result 5'),
157
- gr.outputs.Textbox(type="auto", label='Submission URL - Result 5'),
158
- ],
159
- examples=[
160
- ["A red car", list_of_games[0], 'ViT-B/32', 'Top-K', 1000],
161
- ["A person wearing pink", list_of_games[0], 'ViT-B/32', 'Top-K', 1000],
162
- ["A car flying in the air", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
163
- ["A person flying in the air", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
164
- ["A car in vertical position", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
165
- ["A bike inside a car", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
166
- ["A bike on a wall", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
167
- ["A car stuck in a rock", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
168
- ["A car stuck in a tree", list_of_games[0], 'ViT-B/32', 'Majority', 1000],
169
- ],
170
- title=title,
171
- description=description,
172
- article=article,
173
- enable_queue=True,
174
- )
175
-
176
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import pickle
3
  from collections import Counter
4
+ from glob import glob
5
+
6
+ import clip
7
+ import gdown
8
+ import gradio as gr
9
+ import numpy as np
10
+ import torch
11
+ import torchvision
12
+ from tqdm import tqdm
13
 
14
  from SimSearch import FaissCosineNeighbors
15
 
16
  # DOWNLOAD THE DATASET and Files
17
+ gdown.download(
18
+ id="1kB1vNdVaNS1OGZ3K8BspBUKkPACCsnrG", output="GTAV-Videos.zip", quiet=False
19
+ )
20
+ gdown.download(
21
+ id="1IF1ljcoFd31C-PA2SO8F5fEblDYf0Bw6",
22
+ output="GTAV-embedding-vit32.zip",
23
+ quiet=False,
24
+ )
25
 
26
  # EXTRACT
27
+ torchvision.datasets.utils.extract_archive(
28
+ from_path="GTAV-embedding-vit32.zip",
29
+ to_path="Embeddings/VIT32/",
30
+ remove_finished=False,
31
+ )
32
+ torchvision.datasets.utils.extract_archive(
33
+ from_path="GTAV-Videos.zip", to_path="Videos/", remove_finished=False
34
+ )
35
 
36
  # Initialize CLIP model
37
  clip.available_models()
38
 
39
  # # Searcher
 
40
  class GamePhysicsSearcher:
41
+ def __init__(self, CLIP_MODEL, GAME_NAME, EMBEDDING_PATH="./Embeddings/VIT32/"):
42
+ self.CLIP_MODEL = CLIP_MODEL
43
+ self.GAME_NAME = GAME_NAME
44
+ self.simsearcher = FaissCosineNeighbors()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ self.all_embeddings = glob(f"{EMBEDDING_PATH}{self.GAME_NAME}/*.npy")
47
+
48
+ self.filenames = [os.path.basename(x) for x in self.all_embeddings]
49
+ self.file_to_class_id = {x: i for i, x in enumerate(self.filenames)}
50
+ self.class_id_to_file = {i: x for i, x in enumerate(self.filenames)}
51
+ self.build_index()
52
+
53
+ def read_features(self, file_path):
54
+ with open(file_path, "rb") as f:
55
+ video_features = pickle.load(f)
56
+ return video_features
57
+
58
+ def read_all_features(self):
59
+ features = {}
60
+ filenames_extended = []
61
+
62
+ X_train = []
63
+ y_train = []
64
+
65
+ for i, vfile in enumerate(tqdm(self.all_embeddings)):
66
+ vfeatures = self.read_features(vfile)
67
+ features[vfile.split("/")[-1]] = vfeatures
68
+ X_train.extend(vfeatures)
69
+ y_train.extend([i] * vfeatures.shape[0])
70
+ filenames_extended.extend(vfeatures.shape[0] * [vfile.split("/")[-1]])
71
+
72
+ X_train = np.asarray(X_train)
73
+ y_train = np.asarray(y_train)
74
+
75
+ return X_train, y_train
76
+
77
+ def build_index(self):
78
+ X_train, y_train = self.read_all_features()
79
+ self.simsearcher.fit(X_train, y_train)
80
+
81
+ def text_to_vector(self, query):
82
+ text_tokens = clip.tokenize(query)
83
+ with torch.no_grad():
84
+ text_features = self.CLIP_MODEL.encode_text(text_tokens).float()
85
+ text_features /= text_features.norm(dim=-1, keepdim=True)
86
+ return text_features
87
+
88
+ # Source: https://stackoverflow.com/a/480227
89
+ def f7(self, seq):
90
+ seen = set()
91
+ seen_add = seen.add # This is for performance improvement, don't remove
92
+ return [x for x in seq if not (x in seen or seen_add(x))]
93
+
94
+ def search_top_k(self, q, k=5, pool_size=1000, search_mod="Majority"):
95
+ q = self.text_to_vector(q)
96
+ nearest_data_points = self.simsearcher.get_nearest_labels(q, pool_size)
97
+
98
+ if search_mod == "Majority":
99
+ topKs = [x[0] for x in Counter(nearest_data_points[0]).most_common(k)]
100
+ elif search_mod == "Top-K":
101
+ topKs = list(self.f7(nearest_data_points[0]))[:k]
102
+
103
+ video_filename = [
104
+ f"./Videos/{self.GAME_NAME}/"
105
+ + self.class_id_to_file[x].replace("npy", "mp4")
106
+ for x in topKs
107
+ ]
108
+
109
+ return video_filename
110
 
111
 
112
  ################ SEARCH CORE ################
115
  vit_model.eval()
116
 
117
  saved_searchers = {}
118
+
119
+
120
  def gradio_search(query, game_name, selected_model, aggregator, pool_size, k=6):
121
+ # print(query, game_name, selected_model, aggregator, pool_size)
122
+ if f"{game_name}_{selected_model}" in saved_searchers.keys():
123
+ searcher = saved_searchers[f"{game_name}_{selected_model}"]
 
 
 
 
124
  else:
125
+ if selected_model == "ViT-B/32":
126
+ model = vit_model
127
+ searcher = GamePhysicsSearcher(CLIP_MODEL=model, GAME_NAME=game_name)
128
+ else:
129
+ raise
130
+
131
+ saved_searchers[f"{game_name}_{selected_model}"] = searcher
132
+
133
+ results = []
134
+ relevant_videos = searcher.search_top_k(
135
+ query, k=k, pool_size=pool_size, search_mod=aggregator
136
+ )
137
+
138
+ params = ", ".join(
139
+ map(str, [query, game_name, selected_model, aggregator, pool_size])
140
+ )
141
+ results.append(params)
142
+
143
+ for v in relevant_videos:
144
+ results.append(v)
145
+ sid = v.split("/")[-1].split(".")[0]
146
+ results.append(f"https://www.reddit.com/r/GamePhysics/comments/{sid}/")
147
+ return results
148
+
149
+
150
+ def main():
151
+ list_of_games = ["Grand Theft Auto V"]
152
+
153
+ title = "CLIP + GamePhysics - Searching dataset of Gameplay bugs"
154
+ description = "Enter your query and select the game you want to search. The results will be displayed in the console."
155
+ article = """
156
+ This demo shows how to use the CLIP model to search for gameplay bugs in a video game.
157
+ """
158
+
159
+ # GRADIO APP
160
+ iface = gr.Interface(
161
+ fn=gradio_search,
162
+ inputs=[
163
+ gr.inputs.Textbox(
164
+ lines=1,
165
+ placeholder="Search Query",
166
+ default="A person flying in the air",
167
+ label=None,
168
+ ),
169
+ gr.inputs.Radio(list_of_games, label="Game To Search"),
170
+ gr.inputs.Radio(["ViT-B/32"], label="MODEL"),
171
+ gr.inputs.Radio(["Majority", "Top-K"], label="Aggregator"),
172
+ gr.inputs.Slider(300, 2000, label="Pool Size", default=1000),
173
+ ],
174
+ outputs=[
175
+ gr.outputs.Textbox(type="auto", label="Search Params"),
176
+ gr.outputs.Video(type="mp4", label="Result 1"),
177
+ gr.outputs.Textbox(type="auto", label="Submission URL - Result 1"),
178
+ gr.outputs.Video(type="mp4", label="Result 2"),
179
+ gr.outputs.Textbox(type="auto", label="Submission URL - Result 2"),
180
+ gr.outputs.Video(type="mp4", label="Result 3"),
181
+ gr.outputs.Textbox(type="auto", label="Submission URL - Result 3"),
182
+ gr.outputs.Video(type="mp4", label="Result 4"),
183
+ gr.outputs.Textbox(type="auto", label="Submission URL - Result 4"),
184
+ gr.outputs.Video(type="mp4", label="Result 5"),
185
+ gr.outputs.Textbox(type="auto", label="Submission URL - Result 5"),
186
+ ],
187
+ examples=[
188
+ ["A red car", list_of_games[0], "ViT-B/32", "Top-K", 1000],
189
+ ["A person wearing pink", list_of_games[0], "ViT-B/32", "Top-K", 1000],
190
+ ["A car flying in the air", list_of_games[0], "ViT-B/32", "Majority", 1000],
191
+ [
192
+ "A person flying in the air",
193
+ list_of_games[0],
194
+ "ViT-B/32",
195
+ "Majority",
196
+ 1000,
197
+ ],
198
+ [
199
+ "A car in vertical position",
200
+ list_of_games[0],
201
+ "ViT-B/32",
202
+ "Majority",
203
+ 1000,
204
+ ],
205
+ ["A bike inside a car", list_of_games[0], "ViT-B/32", "Majority", 1000],
206
+ ["A bike on a wall", list_of_games[0], "ViT-B/32", "Majority", 1000],
207
+ ["A car stuck in a rock", list_of_games[0], "ViT-B/32", "Majority", 1000],
208
+ ["A car stuck in a tree", list_of_games[0], "ViT-B/32", "Majority", 1000],
209
+ ],
210
+ title=title,
211
+ description=description,
212
+ article=article,
213
+ enable_queue=True,
214
+ )
215
+
216
+ iface.launch()
217
+
218
+
219
+ if __name__ == "__main__":
220
+ main()
requirements.txt CHANGED
@@ -6,4 +6,5 @@ scikit-image
6
  gdown
7
  torchvision
8
  git+https://github.com/openai/CLIP.git
9
- faiss-cpu
 
6
  gdown
7
  torchvision
8
  git+https://github.com/openai/CLIP.git
9
+ faiss-cpu
10
+ gdown