DmitriiKhizbullin commited on
Commit
b25fb44
·
1 Parent(s): a59a803

Sync with the main repo

Browse files
apps/common/auto_zip.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
+ import json
15
+ import os
16
+ import zipfile
17
+
18
+
19
+ class AutoZip:
20
+
21
+ def __init__(self, zip_path: str, ext: str = ".json"):
22
+ self.zip_path = zip_path
23
+ self.zip = zipfile.ZipFile(zip_path, "r")
24
+ self.fl = [f for f in self.zip.filelist if f.filename.endswith(ext)]
25
+
26
+ def __next__(self):
27
+ if self.index >= len(self.fl):
28
+ raise StopIteration
29
+ else:
30
+ finfo = self.fl[self.index]
31
+ with self.zip.open(finfo) as f:
32
+ raw_json = json.loads(f.read().decode("utf-8"))
33
+ self.index += 1
34
+ return raw_json
35
+
36
+ def __len__(self):
37
+ return len(self.fl)
38
+
39
+ def __iter__(self):
40
+ self.index = 0
41
+ return self
42
+
43
+ def as_dict(self, include_zip_name: bool = False):
44
+ d = dict()
45
+ for finfo in self.fl:
46
+ with self.zip.open(finfo) as f:
47
+ raw_text = f.read().decode("utf-8")
48
+ if include_zip_name:
49
+ key = os.path.split(self.zip_path)[1] + "/" + finfo.filename
50
+ else:
51
+ key = finfo.filename
52
+ d[key] = raw_text
53
+ return d
apps/data_explorer/data_explorer.py CHANGED
@@ -1,3 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Gradio-based web UI to explore the Camel dataset.
3
  """
@@ -39,7 +52,8 @@ def parse_arguments():
39
  return args
40
 
41
 
42
- def construct_ui(blocks, datasets: Datasets, default_dataset: str = None):
 
43
  """ Build Gradio UI and populate with chat data from JSONs.
44
 
45
  Args:
@@ -213,7 +227,7 @@ def construct_ui(blocks, datasets: Datasets, default_dataset: str = None):
213
  Returns:
214
  List[Tuple]: Chat history in chatbot UI element format.
215
  """
216
- history = []
217
  curr_qa = (None, None)
218
  for k in sorted(messages.keys()):
219
  msg = messages[k]
 
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
  """
15
  Gradio-based web UI to explore the Camel dataset.
16
  """
 
52
  return args
53
 
54
 
55
+ def construct_ui(blocks, datasets: Datasets,
56
+ default_dataset: Optional[str] = None):
57
  """ Build Gradio UI and populate with chat data from JSONs.
58
 
59
  Args:
 
227
  Returns:
228
  List[Tuple]: Chat history in chatbot UI element format.
229
  """
230
+ history: List[Tuple] = []
231
  curr_qa = (None, None)
232
  for k in sorted(messages.keys()):
233
  msg = messages[k]
apps/data_explorer/downloader.py CHANGED
@@ -1,7 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import urllib.request
3
 
4
  from huggingface_hub import hf_hub_download
 
5
 
6
  REPO_ROOT = os.path.realpath(
7
  os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
@@ -23,7 +37,7 @@ def download_data():
23
  hf_hub_download(repo_id="camel-ai/code", repo_type="dataset",
24
  filename="code_chat.zip", local_dir=data_dir,
25
  local_dir_use_symlinks=False)
26
- except:
27
  for name in ("ai_society_chat.zip", "code_chat.zip"):
28
  data_url = ("https://storage.googleapis.com/"
29
  f"camel-bucket/datasets/private/{name}")
 
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
  import os
15
  import urllib.request
16
 
17
  from huggingface_hub import hf_hub_download
18
+ from huggingface_hub.utils._errors import RepositoryNotFoundError
19
 
20
  REPO_ROOT = os.path.realpath(
21
  os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
 
37
  hf_hub_download(repo_id="camel-ai/code", repo_type="dataset",
38
  filename="code_chat.zip", local_dir=data_dir,
39
  local_dir_use_symlinks=False)
40
+ except RepositoryNotFoundError:
41
  for name in ("ai_society_chat.zip", "code_chat.zip"):
42
  data_url = ("https://storage.googleapis.com/"
43
  f"camel-bucket/datasets/private/{name}")
apps/data_explorer/loader.py CHANGED
@@ -1,16 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Everything related to parsing the data JSONs into UI-compatible format.
3
  """
4
 
5
  import glob
6
- import json
7
  import os
8
  import re
9
- import zipfile
10
- from typing import Any, Dict, List, Optional, Tuple, Union
11
 
12
  from tqdm import tqdm
13
 
 
 
14
  ChatHistory = Dict[str, Any]
15
  ParsedChatHistory = Dict[str, Any]
16
  AllChats = Dict[str, Any]
@@ -20,30 +33,6 @@ REPO_ROOT = os.path.realpath(
20
  os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
21
 
22
 
23
- class AutoZip:
24
- def __init__(self, zip_path: str, ext: str = ".json"):
25
- self.zip_path = zip_path
26
- self.zip = zipfile.ZipFile(zip_path, "r")
27
- self.fl = [f for f in self.zip.filelist if f.filename.endswith(ext)]
28
-
29
- def __next__(self):
30
- if self.index >= len(self.fl):
31
- raise StopIteration
32
- else:
33
- finfo = self.fl[self.index]
34
- with self.zip.open(finfo) as f:
35
- raw_json = json.loads(f.read().decode("utf-8"))
36
- self.index += 1
37
- return raw_json
38
-
39
- def __len__(self):
40
- return len(self.fl)
41
-
42
- def __iter__(self):
43
- self.index = 0
44
- return self
45
-
46
-
47
  def parse(raw_chat: ChatHistory) -> Union[ParsedChatHistory, None]:
48
  """ Gets the JSON raw chat data, validates it and transforms
49
  into an easy to work with form.
@@ -122,17 +111,17 @@ def load_zip(zip_path: str) -> AllChats:
122
  continue
123
  parsed_list.append(parsed)
124
 
125
- assistant_roles = set()
126
- user_roles = set()
127
  for parsed in parsed_list:
128
- assistant_roles.add(parsed['assistant_role'])
129
- user_roles.add(parsed['user_role'])
130
- assistant_roles = list(sorted(assistant_roles))
131
- user_roles = list(sorted(user_roles))
132
- matrix: Dict[Tuple[str, str], List[Dict]] = dict()
133
  for parsed in parsed_list:
134
  key = (parsed['assistant_role'], parsed['user_role'])
135
- original_task = parsed['original_task']
136
  new_item = {
137
  k: v
138
  for k, v in parsed.items()
 
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
  """
15
  Everything related to parsing the data JSONs into UI-compatible format.
16
  """
17
 
18
  import glob
 
19
  import os
20
  import re
21
+ from typing import Any, Dict, Optional, Tuple, Union
 
22
 
23
  from tqdm import tqdm
24
 
25
+ from apps.common.auto_zip import AutoZip
26
+
27
  ChatHistory = Dict[str, Any]
28
  ParsedChatHistory = Dict[str, Any]
29
  AllChats = Dict[str, Any]
 
33
  os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def parse(raw_chat: ChatHistory) -> Union[ParsedChatHistory, None]:
37
  """ Gets the JSON raw chat data, validates it and transforms
38
  into an easy to work with form.
 
111
  continue
112
  parsed_list.append(parsed)
113
 
114
+ assistant_roles_set = set()
115
+ user_roles_set = set()
116
  for parsed in parsed_list:
117
+ assistant_roles_set.add(parsed['assistant_role'])
118
+ user_roles_set.add(parsed['user_role'])
119
+ assistant_roles = list(sorted(assistant_roles_set))
120
+ user_roles = list(sorted(user_roles_set))
121
+ matrix: Dict[Tuple[str, str], Dict[str, Dict]] = dict()
122
  for parsed in parsed_list:
123
  key = (parsed['assistant_role'], parsed['user_role'])
124
+ original_task: str = parsed['original_task']
125
  new_item = {
126
  k: v
127
  for k, v in parsed.items()
sync.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TMP_DIR=/tmp/camel_hf_tmp
2
+ echo $TMP_DIR
3
+ HF_REPO_DIR=`realpath .`
4
+ echo $HF_REPO_DIR
5
+
6
+ mkdir -p $TMP_DIR
7
+ git clone -b hf_spaces_2 https://github.com/lightaime/camel.git $TMP_DIR
8
+ cd $TMP_DIR
9
+
10
+ find apps/data_explorer -name "*.py" | grep -v test | xargs -n 1 -I {} rsync -R {} $HF_REPO_DIR
11
+ find apps/common -name "*.py" | grep -v test | xargs -n 1 -I {} rsync -R {} $HF_REPO_DIR
12
+
13
+ rm -rf $TMP_DIR
14
+
15
+ echo Done