Weiyun1025 commited on
Commit
2abfccb
1 Parent(s): 215456d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +25 -0
  2. .gitignore +8 -0
  3. .gitlab-ci.yml +24 -0
  4. INSTALL +2 -0
  5. README.md +304 -0
  6. conf/ceph.conf +24 -0
  7. conf/fake_client.conf +17 -0
  8. conf/keyring +2 -0
  9. conf/petreloss.conf +88 -0
  10. dev.sh +4 -0
  11. examples/pytorch/MyDataset.py +50 -0
  12. examples/pytorch/MyTest.py +61 -0
  13. examples/pytorch/faces/face_landmarks.csv +3 -0
  14. examples/pytorch/log/MyTest.log +3 -0
  15. petrel-sdk-H.tar.gz +3 -0
  16. petrel-sdk/petrel-oss-cpp-sdk/.gitignore +5 -0
  17. petrel-sdk/petrel-oss-cpp-sdk/.gitmodules +3 -0
  18. petrel-sdk/petrel-oss-cpp-sdk/README.md +6 -0
  19. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-common.so +0 -0
  20. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-common.so.1.0.0 +0 -0
  21. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-event-stream.so +0 -0
  22. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-event-stream.so.0unstable +0 -0
  23. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-event-stream.so.1.0.0 +0 -0
  24. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-checksums.so +0 -0
  25. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-checksums.so.0unstable +0 -0
  26. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-checksums.so.1.0.0 +0 -0
  27. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-cognito-identity.so +3 -0
  28. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-config.so +3 -0
  29. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-core.so +3 -0
  30. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-identity-management.so +0 -0
  31. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-s3.so +3 -0
  32. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-sts.so +0 -0
  33. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-sts_duplicate_2023-06-05-17-36-57.so +0 -0
  34. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-transfer.so +0 -0
  35. petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-transfer_duplicate_2023-06-05-17-36-57.so +0 -0
  36. petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpp +0 -0
  37. petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-310-x86_64-linux-gnu.so +0 -0
  38. petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-36-x86_64-linux-gnu.so +3 -0
  39. petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-37-x86_64-linux-gnu.so +0 -0
  40. petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-38-x86_64-linux-gnu.so +3 -0
  41. petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-39-x86_64-linux-gnu.so +0 -0
  42. petrel-sdk/petrel-oss-cpp-sdk/pys3client.pyx +91 -0
  43. petrel-sdk/petrel-oss-cpp-sdk/s3client.cpp +323 -0
  44. petrel-sdk/petrel-oss-cpp-sdk/s3client.h +38 -0
  45. petrel-sdk/petrel-oss-cpp-sdk/s3client.pxd +33 -0
  46. petrel-sdk/petrel-oss-cpp-sdk/setup.py +24 -0
  47. petrel-sdk/petrel-oss-cpp-sdk/setup.py.bak +28 -0
  48. petrel-sdk/petrel-oss-python-sdk/.gitignore +8 -0
  49. petrel-sdk/petrel-oss-python-sdk/.gitlab-ci.yml +24 -0
  50. petrel-sdk/petrel-oss-python-sdk/INSTALL +2 -0
.gitattributes CHANGED
@@ -33,3 +33,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/pytorch/faces/face_landmarks.csv filter=lfs diff=lfs merge=lfs -text
37
+ petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-cognito-identity.so filter=lfs diff=lfs merge=lfs -text
38
+ petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-config.so filter=lfs diff=lfs merge=lfs -text
39
+ petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-core.so filter=lfs diff=lfs merge=lfs -text
40
+ petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-s3.so filter=lfs diff=lfs merge=lfs -text
41
+ petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-36-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
42
+ petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
43
+ petrel-sdk/petrel-oss-python-sdk/examples/pytorch/faces/face_landmarks.csv filter=lfs diff=lfs merge=lfs -text
44
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/cache/mc/petrel_pymc.so filter=lfs diff=lfs merge=lfs -text
45
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/librgw/rados.cpython-36m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
46
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/librgw/rgw.cpython-36m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
47
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/s3cpp/libs/libaws-cpp-sdk-core.so filter=lfs diff=lfs merge=lfs -text
48
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/s3cpp/libs/libaws-cpp-sdk-s3.so filter=lfs diff=lfs merge=lfs -text
49
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/s3cpp/pys3client.cpython-36-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
50
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/s3cpp/pys3client.cpython-36m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
51
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/s3cpp/pys3client.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
52
+ petrel-sdk/petrel-oss-python-sdk/petrel_client/ceph/s3cpp/pys3client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
53
+ petrel_client/cache/mc/petrel_pymc.so filter=lfs diff=lfs merge=lfs -text
54
+ petrel_client/ceph/librgw/rados.cpython-36m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
55
+ petrel_client/ceph/librgw/rgw.cpython-36m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
56
+ petrel_client/ceph/s3cpp/libs/libaws-cpp-sdk-core.so filter=lfs diff=lfs merge=lfs -text
57
+ petrel_client/ceph/s3cpp/libs/libaws-cpp-sdk-s3.so filter=lfs diff=lfs merge=lfs -text
58
+ petrel_client/ceph/s3cpp/pys3client.cpython-36m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
59
+ petrel_client/ceph/s3cpp/pys3client.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
60
+ petrel_client/ceph/s3cpp/pys3client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ petrel_client/version.py
2
+ .vscode
3
+ **/__pycache__
4
+ **/*.pyc
5
+ **/*.egg-info
6
+ venv
7
+ dist
8
+ build
.gitlab-ci.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image: registry.sensetime.com/library/python:3.6
2
+
3
+
4
+ before_script:
5
+ - python -V
6
+ - python -m venv venv
7
+ - source ./venv/bin/activate
8
+ - python setup.py sdist
9
+ - pip install dist/*
10
+
11
+ stages:
12
+ - flake8
13
+ - unit_test
14
+
15
+ flake8:
16
+ stage: flake8
17
+ script:
18
+ - pip install flake8
19
+ - flake8 --ignore E501 --exclude '.git,.tox,*.egg-info,venv,scripts,tests,examples' . # todo remove scripts,tests,examples
20
+
21
+ test:
22
+ stage: unit_test
23
+ script:
24
+ - python tests/run_test.py
INSTALL ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ python setup.py install
2
+
README.md ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Petrel OSS SDK 2.0
2
+ ===
3
+
4
+ 注意:该版本SDK需要python3.6环境
5
+
6
+ 若之前安装过旧版本,请先运行
7
+
8
+ ```bash
9
+ $ pip uninstall pycephs3client
10
+ $ rm -rf ~/.local/lib/python3.6/site-packages/petrel_client
11
+ ```
12
+
13
+ ## 建议在安装之前先升级 pip
14
+
15
+ ```bash
16
+ source /mnt/lustre/share/platform/env/<pat... or pt...> # 请根据实际情况确定是否需要 source
17
+ python3 -m pip install --upgrade pip # 请根据实际情况确定是否需要 `sudo` 或添加 `--user` 参数
18
+ ```
19
+
20
+ ## 训练集群环境上安装
21
+
22
+ ```bash
23
+ $ source /mnt/lustre/share/platform/env/<pat... or pt...>
24
+ $ python setup.py sdist
25
+ $ pip install --user dist/*
26
+ ```
27
+
28
+ ## 通过修改 PYTHONPATH 安装
29
+
30
+ ```bash
31
+ $ source /mnt/lustre/share/platform/env/<pat... or pt...>
32
+
33
+ # 安装SDK依赖
34
+ $ python setup.py egg_info
35
+ $ pip install -r *.egg-info/requires.txt
36
+
37
+ # 将SDK编译到 ./build 目录
38
+ $ python setup.py build
39
+
40
+ # 修改 PYTHONPATH 环境变量
41
+ $ export PYTHONPATH=<path_to_sdk>/build/lib:$PYTHONPATH
42
+ ```
43
+
44
+ ## venv环境上安装
45
+
46
+ ```bash
47
+ $ python3 -m venv your_venv_name # 若已创建venv环境则无需执行
48
+ $ source your_venv_name/bin/active
49
+ $ python setup.py sdist
50
+ $ pip install dist/*
51
+ ```
52
+
53
+ ## 系统环境上安装
54
+
55
+ ```bash
56
+ $ python3 setup.py sdist
57
+ $ python3 -m pip install dist/* # 请根据实际情况确定是否需要 `sudo` 或添加 `--user` 参数
58
+ ```
59
+
60
+ ## 使用
61
+
62
+ SDK 提供 `get` 和 `put` 接口,使用方式为
63
+
64
+ ```python
65
+ data = client.get(url) # 默认情况由配置文件决定是否使用 MC
66
+ data = client.get(url, no_cache=True) # 本次 get 直接从 ceph 读取
67
+ data = client.get(url, update_cache=True) # 本次 get 直接从 ceph 读取,并将数据缓存至 MC
68
+ ```
69
+
70
+ ```python
71
+ client.put(url, data) # 默认 put 不会更新 MC
72
+ client.put(url, data, update_cache=True) # 本次 put 将数据存入 ceph 之后并更新 MC
73
+ ```
74
+
75
+ ``注意:``若配置文件中没有启用 `MC` ,则 `no_cache` 和 `update_cache` 参数将被忽略
76
+
77
+ 以下为使用 SDK 读取图片、进行图片处理后并保存图片的简单例子
78
+
79
+ ```python
80
+ import cv2
81
+ import numpy as np
82
+ from os.path import splitext
83
+ from petrel_client.client import Client
84
+
85
+ conf_path = '~/petreloss.conf'
86
+ client = Client(conf_path) # 若不指定 conf_path ,则从 '~/petreloss.conf' 读取配置文件
87
+ img_url = 's3://bucket1/image.jpeg'
88
+ img_gray_url = 's3://bucket1/image_gray.jpeg'
89
+ img_ext = splitext(img_gray_url)[-1]
90
+
91
+ # 图片读取
92
+ img_bytes = client.get(img_url)
93
+ assert(img_bytes is not None)
94
+ img_mem_view = memoryview(img_bytes)
95
+ img_array = np.frombuffer(img_mem_view, np.uint8)
96
+ img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
97
+
98
+ # 图片处理
99
+ img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
100
+
101
+ # 图片存储
102
+ success, img_gray_array = cv2.imencode(img_ext, img_gray)
103
+ assert(success)
104
+ img_gray_bytes = img_gray_array.tostring()
105
+ client.put(img_gray_url, img_gray_bytes)
106
+ ```
107
+
108
+ 配置文件请参考 [petreloss.conf](./conf/petreloss.conf)
109
+
110
+ ``请注意:配置文件中 `key = value` 的 key 前面不能有空格,否则该行视为上一行配置项 value 的一部分``
111
+
112
+ 使用样例请参考 [multi_cluster_test.py](./tests/multi_cluster_test.py)
113
+
114
+ ## `Tensor` 和 `Json` 数据保存与读取
115
+ 使用样例 [tensor_json_test.py](./tests/tensor_json_test.py)
116
+
117
+ ## 数据过大无法上传,则需要分片上传
118
+ 使用样例 [multipart_test.py](./tests/multipart_test.py)
119
+
120
+ ## 创建 Bucket
121
+ ```python
122
+ client.create_bucket('s3://mybucket')
123
+ ```
124
+
125
+ ## 顺序的读取某个前缀的数据
126
+ ```python
127
+ cluster = 'cluster1'
128
+ files = client.get_file_iterator('cluster1:s3://lili1.test2/test3')
129
+ for p, k in files
130
+ key = '{0}:s3://{1}'.format(cluster, p)
131
+ data = client.get(key)
132
+ ```
133
+ ## 使用 anonymous 账户访问数据
134
+ 若在配置文件中不设置 `access_key` 和 `secret_key`,将以 `anonymous` 账户访问数据。
135
+
136
+
137
+ ## McKeySizeExceed 错误
138
+
139
+ 默认情况下,`MC` 所支持 `key` 的最大长度为250个字节。如果路径过长,将会出现 `McKeySizeExceed` 错误。
140
+ 此时需要用户定义 `key` 的转换规则来避免该错误。
141
+
142
+ ``注意:``中文字符对应多个字节。
143
+
144
+ 例如:
145
+
146
+ ```python
147
+ def trim_key(key):
148
+ if isinstance(key, str):
149
+ key = key.encode('utf-8')
150
+ else:
151
+ assert isinstance(key, bytes)
152
+
153
+ return key[-249:]
154
+
155
+ client = Client('~/petreloss.conf', mc_key_cb=trim_key)
156
+ ```
157
+
158
+ 此外,可使用内置函数 `md5`、`sha256` 等,例如:
159
+
160
+ ```python
161
+ client = Client('~/petreloss.conf', mc_key_cb='sha256')
162
+ ```
163
+
164
+ 或在配置文件中指定:
165
+
166
+ ```conf
167
+ [mc]
168
+ mc_key_cb = sha512
169
+ ```
170
+
171
+ ``请注意``
172
+
173
+ - 用户需要保证转换规则结果的唯一性,内置转换函数也有可能发生哈希碰撞。
174
+ - 如果 `key` 为 `str` 类型且其中出现中文字符,请务必用 `encode('utf-8')` 对其进行编码。
175
+
176
+
177
+ ## 使用伪客户端
178
+
179
+ 在对应客户端添加如下配置:
180
+
181
+ ```conf
182
+ fake = True
183
+ ```
184
+
185
+ 配置文件请参考 [fake_client.conf](./conf/fake_client.conf)
186
+
187
+ 使用样例请参考 [fake_client_test.py](./tests/fake_client_test.py)
188
+
189
+ ## 强制更新MC
190
+
191
+ 使用 `get_and_update` 接口或在 `get` 中传入 `update_cache=True` 参数将直接从存储系统读取数据并更新MC。
192
+
193
+ ## IO 统计信息
194
+
195
+ IO 统计信息可通过以下三种方式修改其`log`输出频度:
196
+ - 由环境变量 `count_disp` 设置
197
+ - 由配置文件 `count_disp` 设置 (若已设置环境变量,则该方式无效)
198
+ - 调用 `client.set_count_disp(count_disp)` (该方式将覆盖上述两种方式),但限于`parrots`和`pytorch`的运行机制,在某些使用场景下可能无法有效修改。
199
+
200
+ 若 `count_disp` 为 `0` ,则将关闭 IO 统计信息打印。
201
+
202
+ 若需要在 `console` 中打印 IO 统计信息,则需要设置 `console_log_level` 为 `INFO` 或更低级别,且 `count_disp` 需大于 `0`。
203
+
204
+
205
+ ## DataLoader
206
+
207
+ `SDK` 提供的 `DataLoader` 额外支持如下参数:
208
+
209
+ - `prefetch_factor`,默认2。每个 `worker` 预读 `batch` 数目。
210
+ - `persistent_workers`,默认 `False`。如果为 `True`,则每轮 `epoch` 迭代完毕后 `worker` 进程将不会关闭,下轮 `epoch` 将复用该 `worker` 进程。
211
+
212
+ 用例:
213
+
214
+ ```python
215
+ from petrel_client.utils.data import DataLoader
216
+ dataloader = DataLoader(dataset=xxx, ..., prefetch_factor=4, persistent_workers=True)
217
+ ```
218
+
219
+ ## SSL 验证
220
+
221
+ 使用 `https` 协议时默认不会对 `SSL` 进行验证。若需要开启验证,请在配置文件中进行如下设置
222
+ ```conf
223
+ verify_ssl = True
224
+ ```
225
+
226
+ ## Presigned URL,生成签名链接
227
+
228
+ ```python
229
+ presigned_url = client.generate_presigned_url(url, client_method ='get_object', expires_in=3600)
230
+ ```
231
+
232
+ `client_method` 取值为 `get_object` (默认值) 或 `put_object`
233
+
234
+ `expires_in` 单位为秒,默认值为 3600
235
+
236
+ ## Presigned POST,生成签名 POST
237
+
238
+ ```python
239
+ presigned_post = client.generate_presigned_post(url, fields=None, conditions=None, expires_in=3600)
240
+ ```
241
+
242
+ 参数及返回值详见 [generate_presigned_post](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.generate_presigned_post),其中参数 bucket 和 key 从 url 中提取。
243
+
244
+ ## 以流的形式读取数据
245
+ ```python
246
+ stream = client.get(url, enable_stream=True)
247
+ ```
248
+ 返回的 `stream` 为 `StreamingBody`,使用方法详见
249
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/response.html
250
+
251
+ ## 判断对象是否存在
252
+ ```python
253
+ exists = client.contains(url)
254
+ ```
255
+
256
+ ## 删除对象
257
+ ```python
258
+ client.delete(url)
259
+ ```
260
+
261
+ ## 列出当前路径包含的对象或目录
262
+ ```python
263
+ contents = client.list(url)
264
+ for content in contents:
265
+ if content.endswith('/'):
266
+ print('directory:', content)
267
+ else:
268
+ print('object:', content)
269
+ ```
270
+
271
+ ## 判断目录是否存在
272
+ ```python
273
+ client.isdir(url)
274
+ ```
275
+
276
+ 注意:`Ceph`中没有目录的概念,本函数返回`True`时代表存在以该`url`作为前缀的对象,其他情况返回`False`。
277
+
278
+
279
+ ## 使用 `/mnt/cache` 目录下的 `Python` 环境
280
+ 相对于 `/mnt/lustre` 目录,在 `/mnt/cache` 目录执行 `Python` 有一定的性能提升。
281
+ 使用方式如下:
282
+ - `source` `/mnt/cache` 目录下的 `Python` 环境
283
+ ```bash
284
+ ### 例如 pt1.3v1
285
+ source /mnt/cache/share/platform/env/pt1.3v1
286
+ ### 或 s0.3.3
287
+ source /mnt/cache/share/spring/s0.3.3
288
+ ```
289
+
290
+ - 检查 `Python` 路径是否正确
291
+ ```bash
292
+ which python
293
+ ### 结果应为 /mnt/cache/...
294
+ ```
295
+
296
+ - 设定 `PYTHONUSERBASE` 环境变量
297
+ ```bash
298
+ export PYTHONUSERBASE=/mnt/cache/<username>/.local
299
+ ```
300
+
301
+ - 重新安装相关依赖库(仅需首次使用时执行)
302
+ ```
303
+ python -m pip install --user <packages>
304
+ ```
conf/ceph.conf ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [global]
2
+ fsid = 85e75c85-ab98-426d-81cf-9daa534887f9
3
+ mon_initial_members = SZ-OFFICE3-172-30-1-75
4
+ mon_host = 172.30.1.75
5
+ auth_cluster_required = cephx
6
+ auth_service_required = cephx
7
+ auth_client_required = cephx
8
+
9
+
10
+ public_network=172.30.1.0/20
11
+ cluster_network=172.30.1.0/20
12
+
13
+ mon_allow_pool_delete = True
14
+
15
+ debug_mon = 20/20
16
+ debug_client = 20/20
17
+
18
+ rgw_enable_gc_threads = False
19
+ rgw_enable_lc_threads = False
20
+ rgw_enable_quota_threads = False
21
+ rgw_run_sync_thread = False
22
+ rgw enable ops log = False
23
+ rgw enable usage log = False
24
+ admin socket = ""
conf/fake_client.conf ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [DEFAULT]
2
+ default_cluster = cluster1
3
+
4
+ [dfs]
5
+ fake = True
6
+ enable_mc = True
7
+
8
+ [mc]
9
+ fake = True
10
+
11
+ [cluster1]
12
+ fake = True
13
+ enable_mc = True
14
+
15
+ [cluster2]
16
+ fake = True
17
+ enable_mc = False
conf/keyring ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [client.rgw.train]
2
+ key = AQBiiqVeaPzSFRAAT1Vc+z8wPI5BkCroB6W/jQ==
conf/petreloss.conf ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 注释以 ’#‘ 或 ‘;’ 开头,单独占一行,不能和配置内容在同一行
2
+ # `key = value` 的 key 前面不能有空格,否则该行视为上一行配置项 value 的一部分
3
+
4
+ [DEFAULT]
5
+
6
+ # 启用 Memcached, 默认 False
7
+ # enable_mc = True
8
+
9
+ # Memcached 相关配置,默认情况下无需设置
10
+ # mc_server_list_path = /mnt/lustre/share/memcached_client/server_list.conf
11
+ # mc_client_config_path = /mnt/lustre/share/memcached_client/client.conf
12
+
13
+ # console log 级别,默认 WARNING, 选项有 CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET
14
+ # 若需要在 console 输出 IO 统计信息,需要设置级别为 INFO
15
+ # console_log_level = WARNING
16
+
17
+ # file log 级别,默认 DEBUG, 选项有 CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET
18
+ # file_log_level = DEBUG
19
+
20
+ # log 文件路径,默认 无 ,即不输出 log 文件
21
+ # 若已配置 log 文件路径,则训练运行之前需要确保 log_file_path 指定的目录已被创建
22
+ # log_file_path = /mnt/lustre/<username>/petrel_log_dir
23
+
24
+ # log 文件最大长度,默认 1GB
25
+ # file_log_max_bytes = 1073741824
26
+
27
+ # log 文件备份数目,默认 1
28
+ # file_log_backup_count = 1
29
+
30
+ # 每隔 count_disp 次 get 操作后,日志记录 IO 统计信息。默认值 5000
31
+ # 如果 IO 统计信息输出过于频繁,可将该数值增大
32
+ # 如果需要关闭 IO 统计信,可将该数值设置为 0
33
+ # count_disp = 5000
34
+
35
+ # 内存统计,默认关闭
36
+ # enable_mem_trace = False
37
+
38
+ # get 操作失败后,允许重试的次数,默认 10
39
+ # get_retry_max = 10
40
+
41
+ # 默认 cluster,即当访问 Ceph 没有指定 cluster 时,从 default_cluster 获取数据
42
+ default_cluster = cluster1
43
+
44
+ [mc]
45
+ # 若访问的路径过长(超过250个字节),mc 将出现 McKeySizeExceed 错误。
46
+ # 配置 mc_key_cb 可将传给 mc 的路径进行转换,可选的参数有:
47
+ # blake2b, blake2s, md5, pbkdf2_hmac, sha1, sha224,
48
+ # sha256, sha384, sha3_224, sha3_256, sha3_384,
49
+ # sha3_512, sha512, shake_128, shake_256
50
+
51
+ # mc_key_cb = sha256
52
+
53
+
54
+ # 是否输出 mc debug log,默认 True
55
+ # 注意最终是否输出到 console 和 file 分别还需要由 console_log_level 和 file_log_level 决定
56
+ # debug_mc = True
57
+
58
+
59
+ [dfs]
60
+ enable_mc = True
61
+
62
+ # 至少需要配置一个 cluster ,否则将出现 ConfigSectionNotFoundError
63
+ [cluster1]
64
+ # 对于每个集群的具体配置,如果没有指定,则以[DEFAULT]作为取值
65
+ # 例如在此处设置 ‘enable_mc = False’ 将覆盖默认配置
66
+ enable_mc = True
67
+
68
+ # 启用 S3 boto,默认 True
69
+ # boto = c++ 将启用 c++ 版本实现的 S3
70
+ boto = True
71
+
72
+ # 若不设置 access_key 和 secret_key,将以 anonymous 账户访问数据
73
+ access_key = lili1
74
+ secret_key = lili1
75
+
76
+ host_base = http://127.0.0.1:7480
77
+
78
+ # 若 boto = False ,则需要增加以下配置
79
+ # conf = conf/ceph.conf
80
+ # keyring = conf/keyring
81
+ # name = client.rgw.train
82
+ # cluster = ceph
83
+
84
+ [cluster2]
85
+
86
+ access_key = lili1
87
+ secret_key = lili1
88
+ host_base = http://127.0.0.1:7480
dev.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ python3 -m venv venv
2
+ source venv/bin/activate
3
+ pip install -U autopep8
4
+ pip install -e .
examples/pytorch/MyDataset.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function, division
2
+ import os
3
+ import torch
4
+ import pandas as pd
5
+ from skimage import io, transform
6
+ import matplotlib.pyplot as plt
7
+ from torch.utils.data import Dataset, DataLoader
8
+ from torchvision import transforms, utils
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import ceph
13
+
14
+ class MyDataset(Dataset):
15
+ def __init__(self, csv_file, root_dir):
16
+ """
17
+ Args:
18
+ csv_file (string): Path to the csv file with annotations.
19
+ root_dir (string): Bucket with all the images, such as s3://faces/
20
+ """
21
+ self.landmarks_frame = pd.read_csv(csv_file)
22
+ self.root_dir = root_dir
23
+
24
+ def __len__(self):
25
+ return len(self.landmarks_frame)
26
+
27
+ def __getitem__(self, idx):
28
+ img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
29
+ s3client = ceph.S3Client()
30
+ value = s3client.get(img_name)
31
+ if not value:
32
+ """
33
+ Picture doesn't exist in ceph, your code here to handle error
34
+ """
35
+ return None
36
+ img_array = np.fromstring(value, np.uint8)
37
+ # load image
38
+ #img = cvb.img_from_bytes(value)
39
+
40
+
41
+ string_data = img_array.tostring()
42
+ #print(string_data)
43
+ #print(value)
44
+ #image = cv2.imdecode(img_array, cv2.CV_LOAD_IMAGE_COLOR)
45
+
46
+ landmarks = self.landmarks_frame.iloc[idx, 1:].as_matrix()
47
+ landmarks = landmarks.astype('float').reshape(-1, 2)
48
+ sample = {'image': img_array, 'landmarks': landmarks}
49
+
50
+ return sample, string_data
examples/pytorch/MyTest.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from __future__ import print_function, division
4
+ import threading
5
+ import logging
6
+
7
+ from MyDataset import MyDataset
8
+
9
+ import ceph
10
+
11
+ class testThread(threading.Thread):
12
+ def __init__(self, threadid):
13
+ threading.Thread.__init__(self)
14
+ self.threadid = threadid
15
+
16
+ def run(self):
17
+ self.do_tasks()
18
+
19
+ def do_tasks(self):
20
+ face_dataset = MyDataset(csv_file='faces/face_landmarks.csv',root_dir='s3://yijianliang.test/train-copy0/')
21
+
22
+ for i in range(len(face_dataset)):
23
+ sample = face_dataset[i]
24
+ #print(i, sample['image'].shape, sample['landmarks'].shape)
25
+ logging.info('{0} {1} {2}'.format(i, sample['image'].shape, sample['landmarks'].shape))
26
+
27
+ '''
28
+ if __name__ == '__main__':
29
+ logging.basicConfig(level=logging.INFO,
30
+ format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s',
31
+ datefmt='[%Y-%m_%d %H:%M:%S]',
32
+ filename='log/MyTest.log',
33
+ filemode='a')
34
+ threads = []
35
+ for i in range(0,1):
36
+ threads.append(testThread(threadid=i))
37
+ for thread in threads:
38
+ thread.start()
39
+ for thread in threads:
40
+ thread.join()
41
+ '''
42
+
43
+ if __name__ == '__main__':
44
+ logging.basicConfig(level=logging.INFO,
45
+ format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s',
46
+ datefmt='[%Y-%m_%d %H:%M:%S]')
47
+ #filename='log/MyTest.log',
48
+ #filemode='a')
49
+
50
+ face_dataset = MyDataset(csv_file='faces/face_landmarks.csv',root_dir='s3://yijianliang.ssd.qos/train')
51
+ for i in range(len(face_dataset)):
52
+ sample, string_data = face_dataset[i]
53
+ #print(i, sample['image'].shape, sample['landmarks'].shape)
54
+ object_name = str(i)
55
+ if sample and string_data:
56
+ #s3client = ceph.S3Client()
57
+ s3client = ceph.S3Client(access_key = "DWD2LKXJHJLGYKRDED7T", secret_key = "tzJ2a0g26deZZux3bLOd29YV9zJlaLM400Fu5tdn")
58
+ ret = s3client.save_from_string('s3://sensestudytest/save_from_string/', object_name, string_data)
59
+ if ret:
60
+ logging.info('Save {0}: {1} bytes'.format(object_name, ret))
61
+ logging.info('{0} {1} {2}'.format(i, sample['image'].shape, sample['landmarks'].shape))
examples/pytorch/faces/face_landmarks.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82bd06902826e8a79658a5946187a4f441b9d21f8edceec4dfa8723ffcfcbad0
3
+ size 63189398
examples/pytorch/log/MyTest.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [2018-04_18 13:45:16] MyTest.py[line:22] INFO 0 (250, 250, 3) (3, 2)
2
+ [2018-04_18 13:45:16] MyTest.py[line:22] INFO 0 (250, 250, 3) (3, 2)
3
+ [2018-04_18 13:45:16] MyTest.py[line:22] INFO 0 (250, 250, 3) (3, 2)
petrel-sdk-H.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4bd0ab2f7ca22aa16c83496358025e136ee2eea3fde591d0574c5b3a6f82d41
3
+ size 21200532
petrel-sdk/petrel-oss-cpp-sdk/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ build
2
+ venv
3
+ pys3client.cpp
4
+ pys3client.cpython-36m-x86_64-linux-gnu.so
5
+ pys3client.so
petrel-sdk/petrel-oss-cpp-sdk/.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "aws-sdk-cpp"]
2
+ path = aws-sdk-cpp
3
+ url = http://gitlab.bj.sensetime.com/platform/StorageSystem/aws-sdk-cpp.git
petrel-sdk/petrel-oss-cpp-sdk/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # petrel-oss-cpp-sdk
2
+
3
+ ```bash
4
+ scl enable devtoolset-8 -- bash
5
+ python3 setup.py build_ext --inplace
6
+ ```
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-common.so ADDED
Binary file (241 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-common.so.1.0.0 ADDED
Binary file (241 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-event-stream.so ADDED
Binary file (36.8 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-event-stream.so.0unstable ADDED
Binary file (36.8 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-c-event-stream.so.1.0.0 ADDED
Binary file (36.8 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-checksums.so ADDED
Binary file (54.6 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-checksums.so.0unstable ADDED
Binary file (54.6 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-checksums.so.1.0.0 ADDED
Binary file (54.6 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-cognito-identity.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0525ae1cf99e3f99352f67cb1dbc5cc3f45e007474e71646e445aff283c167
3
+ size 1203624
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-config.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d91cc9c68c489ba0a16e63eddd8354fc9b8de53bb991d5f2e7a911fcc38f81a
3
+ size 4489616
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-core.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eed1dcb18c4481516fabbbe5d1fe11559af4a7efa196227f75da4f38e8428a1
3
+ size 1465968
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-identity-management.so ADDED
Binary file (202 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-s3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde1e1ba070640c53b51d885ca4ffd12fcedb031ca3a6231507f499c385d39ef
3
+ size 4416312
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-sts.so ADDED
Binary file (492 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-sts_duplicate_2023-06-05-17-36-57.so ADDED
Binary file (492 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-transfer.so ADDED
Binary file (304 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/libs/libaws-cpp-sdk-transfer_duplicate_2023-06-05-17-36-57.so ADDED
Binary file (304 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpp ADDED
The diff for this file is too large to render. See raw diff
 
petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-310-x86_64-linux-gnu.so ADDED
Binary file (233 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-36-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8484af53fea398ba08cd78502a0400b61aeae73315ffed5c2900d17abbc067d
3
+ size 1109320
petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-37-x86_64-linux-gnu.so ADDED
Binary file (246 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:629201bfbedb0b1cddadbe6ec2f982fab941ee1cb04697d2407e596b2fda3cc5
3
+ size 1145912
petrel-sdk/petrel-oss-cpp-sdk/pys3client.cpython-39-x86_64-linux-gnu.so ADDED
Binary file (234 kB). View file
 
petrel-sdk/petrel-oss-cpp-sdk/pys3client.pyx ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # distutils: language = c++
2
+
3
+ from libcpp.string cimport string
4
+ from libcpp.vector cimport vector
5
+ from s3client cimport S3Client, get_error_list
6
+ from s3client cimport init_api as _init_api
7
+ from s3client cimport shutdown_api as _shutdown_api
8
+
9
+
10
+ ERROR_LIST = get_error_list()
11
+ ERROR_MAP = {k:v.decode('utf-8') for k,v in ERROR_LIST}
12
+
13
+ def init_api(log_level):
14
+ log_level = log_level.lower()
15
+ _init_api(log_level.encode('utf-8'))
16
+
17
+ def shutdown_api():
18
+ _shutdown_api()
19
+
20
+ class S3Error(Exception):
21
+ def __init__(self, error_name, error_message):
22
+ self.error_name = error_name
23
+ self.error_message = error_message
24
+
25
+ cdef class PyS3Client:
26
+ cdef S3Client *client
27
+
28
+ def __cinit__(self, string ak, string sk, string endpoint, bint verify_ssl, bint enable_https, bint use_dual_stack, int threads_num):
29
+ self.client = new S3Client(ak, sk, endpoint, verify_ssl, enable_https, use_dual_stack, threads_num)
30
+
31
+ def __dealloc__(self):
32
+ del self.client
33
+
34
+ def get_object(self, string bucket, string key, string range):
35
+ cdef string error_message, result
36
+ cdef int error_type
37
+ ret = self.client.get_object(bucket, key, error_type, error_message, result, range)
38
+ if ret == 0:
39
+ return result
40
+ else:
41
+ error_name = ERROR_MAP.get(error_type, 'Undefined')
42
+ raise S3Error(error_name, error_message)
43
+
44
+ def multipart_download_concurrency(self, string bucket, string key, string filename):
45
+ cdef string error_message, result
46
+ cdef int error_type
47
+ ret = self.client.multipart_download_concurrency(bucket, key, filename, error_type, error_message)
48
+ if ret == 0:
49
+ return ret
50
+ else:
51
+ error_name = ERROR_MAP.get(error_type, 'Undefined')
52
+ raise S3Error(error_name, error_message)
53
+
54
+ def put_object(self, string bucket, string key, string data):
55
+ cdef string error_message, result
56
+ cdef int error_type
57
+ ret = self.client.put_object(bucket, key, data, error_type, error_message)
58
+ if ret == 0:
59
+ return data.size()
60
+ else:
61
+ error_name = ERROR_MAP.get(error_type, 'Undefined')
62
+ raise S3Error(error_name, error_message)
63
+
64
+
65
+ def multipart_upload_concurrency(self, string bucket, string key, string filename):
66
+ cdef string error_message, result
67
+ cdef int error_type
68
+ ret = self.client.multipart_upload_concurrency(bucket, key, filename, error_type, error_message)
69
+ if ret == 0:
70
+ return ret
71
+ else:
72
+ error_name = ERROR_MAP.get(error_type, 'Undefined')
73
+ raise S3Error(error_name, error_message)
74
+
75
+ def delete(self, string bucket, string key):
76
+ cdef string error_message, result
77
+ cdef int error_type
78
+ ret = self.client.delete_obj(bucket, key, error_type, error_message)
79
+ return ret
80
+
81
+ def contains(self, string bucket, string key):
82
+ cdef string error_message, result
83
+ cdef int error_type
84
+ ret = self.client.contains(bucket, key, error_type, error_message)
85
+ return ret
86
+
87
+ def list(self, string bucket, string key):
88
+ cdef string error_message, result
89
+ cdef int error_type
90
+ ret = self.client.list(bucket, key, error_type, error_message)
91
+ return ret
petrel-sdk/petrel-oss-cpp-sdk/s3client.cpp ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <aws/core/Aws.h>
2
+ #include <aws/s3/S3Client.h>
3
+ #include <aws/s3/model/Bucket.h>
4
+ #include <aws/core/auth/AWSCredentialsProvider.h>
5
+ #include <aws/s3/model/GetObjectRequest.h>
6
+ #include <aws/s3/model/PutObjectRequest.h>
7
+ #include <aws/s3/model/ListObjectsRequest.h>
8
+ #include <aws/s3/model/Object.h>
9
+ #include <aws/s3/S3Errors.h>
10
+ #include <string>
11
+ #include <list>
12
+ #include <fstream>
13
+ #include "s3client.h"
14
+
15
+ #include <aws/s3/model/CreateMultipartUploadRequest.h>
16
+ #include <aws/s3/model/CompletedPart.h>
17
+ #include <aws/s3/model/UploadPartRequest.h>
18
+ #include <aws/s3/model/CompleteMultipartUploadRequest.h>
19
+ #include <aws/s3/model/CompletedMultipartUpload.h>
20
+ #include <aws/s3/model/AbortMultipartUploadRequest.h>
21
+
22
+
23
+ #include <iostream>
24
+ #include <vector>
25
+
26
+ // mutipart相关
27
+ #include <aws/core/utils/threading/Executor.h>
28
+ #include <aws/transfer/TransferManager.h>
29
+ #include <aws/transfer/TransferHandle.h>
30
+ #include <aws/core/utils/memory/AWSMemory.h>
31
+ #include <aws/core/utils/memory/stl/AWSStreamFwd.h>
32
+ #include <aws/core/utils/stream/PreallocatedStreamBuf.h>
33
+ #include <aws/core/utils/StringUtils.h>
34
+
35
+ #include <aws/s3/model/DeleteObjectRequest.h>
36
+ #include <aws/s3/model/HeadObjectRequest.h>
37
+ #include <aws/s3/model/ListObjectsV2Request.h>
38
+
39
+ #define ERROR_ITEM(error_name) \
40
+ { \
41
+ static_cast<int>(Aws::S3::S3Errors::error_name), #error_name \
42
+ }
43
+
44
+ static std::list<std::pair<int, std::string>> ERROR_LIST = {
45
+ ERROR_ITEM(INCOMPLETE_SIGNATURE),
46
+ ERROR_ITEM(INTERNAL_FAILURE),
47
+ ERROR_ITEM(INVALID_ACTION),
48
+ ERROR_ITEM(INVALID_CLIENT_TOKEN_ID),
49
+ ERROR_ITEM(INVALID_PARAMETER_COMBINATION),
50
+ ERROR_ITEM(INVALID_QUERY_PARAMETER),
51
+ ERROR_ITEM(INVALID_PARAMETER_VALUE),
52
+ ERROR_ITEM(MISSING_ACTION),
53
+ ERROR_ITEM(MISSING_AUTHENTICATION_TOKEN),
54
+ ERROR_ITEM(MISSING_PARAMETER),
55
+ ERROR_ITEM(OPT_IN_REQUIRED),
56
+ ERROR_ITEM(REQUEST_EXPIRED),
57
+ ERROR_ITEM(SERVICE_UNAVAILABLE),
58
+ ERROR_ITEM(THROTTLING),
59
+ ERROR_ITEM(VALIDATION),
60
+ ERROR_ITEM(ACCESS_DENIED),
61
+ ERROR_ITEM(RESOURCE_NOT_FOUND),
62
+ ERROR_ITEM(UNRECOGNIZED_CLIENT),
63
+ ERROR_ITEM(MALFORMED_QUERY_STRING),
64
+ ERROR_ITEM(SLOW_DOWN),
65
+ ERROR_ITEM(REQUEST_TIME_TOO_SKEWED),
66
+ ERROR_ITEM(INVALID_SIGNATURE),
67
+ ERROR_ITEM(SIGNATURE_DOES_NOT_MATCH),
68
+ ERROR_ITEM(INVALID_ACCESS_KEY_ID),
69
+ ERROR_ITEM(REQUEST_TIMEOUT),
70
+ ERROR_ITEM(NETWORK_CONNECTION),
71
+ ERROR_ITEM(UNKNOWN),
72
+ ERROR_ITEM(BUCKET_ALREADY_EXISTS),
73
+ ERROR_ITEM(BUCKET_ALREADY_OWNED_BY_YOU),
74
+ ERROR_ITEM(NO_SUCH_BUCKET),
75
+ ERROR_ITEM(NO_SUCH_KEY),
76
+ ERROR_ITEM(NO_SUCH_UPLOAD),
77
+ ERROR_ITEM(OBJECT_ALREADY_IN_ACTIVE_TIER),
78
+ ERROR_ITEM(OBJECT_NOT_IN_ACTIVE_TIER),
79
+ };
80
+
81
+ std::list<std::pair<int, std::string>> get_error_list()
82
+ {
83
+ return ERROR_LIST;
84
+ }
85
+ static Aws::SDKOptions options;
86
+
87
+ static std::unordered_map<std::string, Aws::Utils::Logging::LogLevel> log_level_map = {
88
+ {"off", Aws::Utils::Logging::LogLevel::Off},
89
+ {"fatal", Aws::Utils::Logging::LogLevel::Fatal},
90
+ {"error", Aws::Utils::Logging::LogLevel::Error},
91
+ {"warn", Aws::Utils::Logging::LogLevel::Warn},
92
+ {"info", Aws::Utils::Logging::LogLevel::Info},
93
+ {"debug", Aws::Utils::Logging::LogLevel::Debug},
94
+ {"trace", Aws::Utils::Logging::LogLevel::Trace},
95
+ };
96
+
97
+ void init_api(const std::string &level)
98
+ {
99
+ auto itr = log_level_map.find(level);
100
+ if (itr != log_level_map.end())
101
+ {
102
+ options.loggingOptions.logLevel = itr->second;
103
+ }
104
+ Aws::InitAPI(options);
105
+ }
106
+
107
+ void shutdown_api()
108
+ {
109
+ Aws::ShutdownAPI(options);
110
+ }
111
+
112
+ S3Client::S3Client(const std::string &ak, const std::string &sk, const std::string &endpoint, bool verify_ssl, bool enable_https, bool use_dual_stack, int threads_num)
113
+ {
114
+ Aws::Client::ClientConfiguration config;
115
+ config.endpointOverride = endpoint.c_str();
116
+ config.verifySSL = verify_ssl;
117
+ if (enable_https)
118
+ {
119
+ config.scheme = Aws::Http::Scheme::HTTPS;
120
+ }
121
+ else
122
+ {
123
+ config.scheme = Aws::Http::Scheme::HTTP;
124
+ }
125
+
126
+ config.useDualStack = use_dual_stack;
127
+ Aws::Auth::AWSCredentials cred = Aws::Auth::AWSCredentials(ak.c_str(), sk.c_str());
128
+ // this->client = new Aws::S3::S3Client(cred, config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
129
+ this->client = Aws::MakeShared<Aws::S3::S3Client>("S3Client", cred, config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false);
130
+
131
+ // multipart使用
132
+ this->threads_num = threads_num;
133
+
134
+ return;
135
+ }
136
+
137
+ S3Client::~S3Client()
138
+ {
139
+ // delete this->client;
140
+ }
141
+
142
+ int S3Client::get_object(const std::string &bucket, const std::string &key, int &error_type, std::string &error_message, std::string &result, std::string &range)
143
+ {
144
+ Aws::S3::Model::GetObjectRequest object_request;
145
+ object_request.SetBucket(bucket.c_str());
146
+ object_request.SetKey(key.c_str());
147
+ if ("" != range)
148
+ object_request.SetRange(std::string("bytes="+range).c_str());
149
+
150
+ /* 这里真正请求数据 */
151
+ auto get_object_outcome = this->client->GetObject(object_request);
152
+
153
+ if (get_object_outcome.IsSuccess())
154
+ {
155
+ auto &&get_result = get_object_outcome.GetResultWithOwnership();
156
+ auto content_length = get_result.GetContentLength();
157
+ auto &retrieved_object = get_result.GetBody();
158
+
159
+ result.resize(content_length);
160
+ long read_offset = 0;
161
+ while (true)
162
+ {
163
+ retrieved_object.read(&result[read_offset], content_length - read_offset);
164
+ auto read_count = retrieved_object.gcount();
165
+ read_offset += read_count;
166
+ if (read_offset >= content_length || read_count == 0)
167
+ {
168
+ break;
169
+ }
170
+ }
171
+ if (read_offset != content_length)
172
+ {
173
+ //todo
174
+ }
175
+
176
+ return 0;
177
+ }
178
+ else
179
+ {
180
+ auto error = get_object_outcome.GetError();
181
+ auto message = error.GetMessage();
182
+ if (!message.empty())
183
+ {
184
+ error_message = message.c_str();
185
+ }
186
+ error_type = static_cast<int>(error.GetErrorType());
187
+ return -1;
188
+ }
189
+ return 0;
190
+ }
191
+
192
+ int S3Client::put_object(const std::string &bucket, const std::string &key, const std::string &data, int &error_type, std::string &error_message)
193
+ {
194
+ const std::shared_ptr<Aws::IOStream> input_data = Aws::MakeShared<Aws::StringStream>("");
195
+ *input_data << data;
196
+
197
+ Aws::S3::Model::PutObjectRequest request;
198
+ request.SetBucket(bucket.c_str());
199
+ request.SetKey(key.c_str());
200
+ request.SetBody(input_data);
201
+ auto outcome = this->client->PutObject(request);
202
+ if (outcome.IsSuccess())
203
+ {
204
+ return 0;
205
+ }
206
+ else
207
+ {
208
+ auto error = outcome.GetError();
209
+ auto message = error.GetMessage();
210
+ if (!message.empty())
211
+ {
212
+ error_message = message.c_str();
213
+ }
214
+ error_type = static_cast<int>(error.GetErrorType());
215
+ return -1;
216
+ }
217
+ }
218
+
219
+
220
+ int S3Client::multipart_download_concurrency(const std::string &bucket, const std::string &key, const std::string &filename, int &error_type, std::string &error_message)
221
+ {
222
+ auto executor = Aws::MakeShared<Aws::Utils::Threading::PooledThreadExecutor>("executor", this->threads_num);
223
+ Aws::Transfer::TransferManagerConfiguration transfer_config(executor.get());
224
+ transfer_config.s3Client = this->client;
225
+ auto transfer_manager = Aws::Transfer::TransferManager::Create(transfer_config);
226
+
227
+ auto downloadHandle = transfer_manager->DownloadFile(bucket.c_str(),
228
+ key.c_str(),
229
+ [=](){
230
+ return Aws::New<Aws::FStream>("S3_DOWNLOAD", filename.c_str(), std::ios_base::out | std::ios_base::binary);
231
+ });
232
+
233
+ downloadHandle->WaitUntilFinished();// Block calling thread until download is complete.
234
+ auto downStat = downloadHandle->GetStatus();
235
+ if (downStat != Aws::Transfer::TransferStatus::COMPLETED)
236
+ {
237
+ auto error = downloadHandle->GetLastError();
238
+ error_message = error.GetMessage().c_str();
239
+ error_type = static_cast<int>(error.GetErrorType());
240
+ return -1;
241
+ }
242
+
243
+ return 0;
244
+ }
245
+
246
+ int S3Client::multipart_upload_concurrency(const std::string bucket, const std::string key, const std::string filename, int &error_type, std::string &error_message)
247
+ {
248
+ auto executor = Aws::MakeShared<Aws::Utils::Threading::PooledThreadExecutor>("executor", this->threads_num);
249
+ Aws::Transfer::TransferManagerConfiguration transfer_config(executor.get());
250
+ transfer_config.s3Client = this->client;
251
+ auto transfer_manager = Aws::Transfer::TransferManager::Create(transfer_config);
252
+
253
+ auto uploadHandle = transfer_manager->UploadFile(filename.c_str(), bucket.c_str(), key.c_str(), "text/plain", Aws::Map<Aws::String, Aws::String>());
254
+ uploadHandle->WaitUntilFinished();
255
+ bool success = uploadHandle->GetStatus() == Aws::Transfer::TransferStatus::COMPLETED;
256
+
257
+ if (!success)
258
+ {
259
+ auto error = uploadHandle->GetLastError();
260
+ error_message = error.GetMessage().c_str();
261
+ error_type = static_cast<int>(error.GetErrorType());
262
+ return -1;
263
+ }
264
+ else
265
+ {
266
+ return 0;
267
+ }
268
+ }
269
+
270
+ int S3Client::delete_obj(const std::string &bucket, const std::string &key, int error_type, std::string &error_message)
271
+ {
272
+ Aws::S3::Model::DeleteObjectRequest request;
273
+ request.WithBucket(bucket.c_str()).WithKey(key.c_str());
274
+ auto outcome = this->client->DeleteObject(request);
275
+
276
+ return outcome.IsSuccess();
277
+ }
278
+
279
+ int S3Client::contains(const std::string &bucket, const std::string &key, int error_type, std::string &error_message)
280
+ {
281
+ Aws::S3::Model::HeadObjectRequest request;
282
+ request.WithBucket(bucket.c_str()).WithKey(key.c_str());
283
+ const auto response = client->HeadObject(request);
284
+ auto outcome = this->client->HeadObject(request);
285
+
286
+ return outcome.IsSuccess();
287
+ }
288
+
289
+ std::vector<std::string> S3Client::list(const std::string &bucket, const std::string &key, int error_type, std::string &error_message)
290
+ {
291
+ Aws::S3::Model::ListObjectsRequest request;
292
+ request.WithBucket(bucket.c_str()).WithPrefix(key.c_str());
293
+ std::vector<std::string> res;
294
+ auto outcome = this->client->ListObjects(request);
295
+
296
+ if (!outcome.IsSuccess()) {
297
+ auto error = outcome.GetError();
298
+ auto message = error.GetMessage();
299
+ if (!message.empty())
300
+ {
301
+ error_message = message.c_str();
302
+ }
303
+ error_type = static_cast<int>(error.GetErrorType());
304
+ return res;
305
+ }
306
+ else {
307
+ Aws::Vector<Aws::S3::Model::Object> objects =
308
+ outcome.GetResult().GetContents();
309
+
310
+ for (Aws::S3::Model::Object &object: objects) {
311
+ std::string full_path = object.GetKey().c_str();
312
+ int pos = full_path.find('/', key.size());
313
+ int len = pos - key.size();
314
+ if (-1 != pos) {
315
+ len += 1;
316
+ }
317
+ std::string first_path = full_path.substr(key.size(), len);
318
+ res.push_back(first_path);
319
+ }
320
+ }
321
+
322
+ return res;
323
+ }
petrel-sdk/petrel-oss-cpp-sdk/s3client.h ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <aws/core/auth/AWSCredentialsProvider.h>
2
+ #include <aws/s3/model/GetObjectRequest.h>
3
+ #include <aws/s3/model/ListObjectsRequest.h>
4
+ #include <aws/s3/model/Object.h>
5
+ #include <aws/s3/S3Client.h>
6
+ #include <aws/core/Aws.h>
7
+ #include <chrono>
8
+ #include <list>
9
+ #include <vector>
10
+ #include <string>
11
+ #include <fstream>
12
+ #include <aws/s3/model/CompletedPart.h>
13
+ #include <aws/transfer/TransferManager.h>
14
+
15
+ class S3Client
16
+ {
17
+ public:
18
+ S3Client(const std::string &ak, const std::string &sk, const std::string &endpoint, bool verify_ssl, bool enable_https, bool use_dual_stack, int threads_num);
19
+ ~S3Client();
20
+
21
+ int get_object(const std::string &bucket, const std::string &key, int &error_type, std::string &error_message, std::string &result, std::string &range);
22
+ int multipart_download_concurrency(const std::string &bucket, const std::string &key, const std::string &filename, int &error_type, std::string &error_message);
23
+ int put_object(const std::string &bucket, const std::string &key, const std::string &data, int &error_type, std::string &error_message);
24
+ int multipart_upload_concurrency(const std::string bucket, const std::string key, const std::string filename, int &error_type, std::string &error_message);
25
+ int delete_obj(const std::string &bucket, const std::string &key, int error_type, std::string &error_message);
26
+ int contains(const std::string &bucket, const std::string &key, int error_type, std::string &error_message);
27
+ std::vector<std::string> list(const std::string &bucket, const std::string &key, int error_type, std::string &error_message);
28
+
29
+ private:
30
+ std::shared_ptr<Aws::S3::S3Client> client;
31
+ int threads_num;
32
+ };
33
+
34
+ void init_api(const std::string &level);
35
+
36
+ void shutdown_api();
37
+
38
+ std::list<std::pair<int, std::string>> get_error_list();
petrel-sdk/petrel-oss-cpp-sdk/s3client.pxd ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from libcpp.string cimport string
2
+ from libcpp.list cimport list
3
+ from libcpp.pair cimport pair
4
+ from libcpp.vector cimport vector
5
+
6
+
7
+ cdef extern from "s3client.h":
8
+ cdef cppclass S3Client:
9
+ S3Client(
10
+ const string &ak,
11
+ const string &sk,
12
+ const string &endpoint,
13
+ bint verify_ssl,
14
+ bint enable_https,
15
+ bint use_dual_stack,
16
+ int threads_num,
17
+ ) except +
18
+
19
+ int get_object(const string &bucket, const string &key, int &error_type, string &error_message, string &result, string &range) nogil
20
+ int multipart_download_concurrency(const string &bucket, const string &key, const string &filename, int &error_type, string &error_message) nogil
21
+ int put_object(const string &bucket, const string &key, const string &data, int &error_type, string &error_message) nogil
22
+ int multipart_upload_concurrency(const string &bucket, const string &key, const string &filename, int &error_type, string &error_message) nogil
23
+ int delete_obj(const string &bucket, const string &key, int error_type, const string &error_message) nogil
24
+ int contains(const string &bucket, const string &key, int error_type, const string &error_message) nogil
25
+ vector[string] list(const string &bucket, const string &key, int error_type, const string &error_message) nogil
26
+
27
+
28
+
29
+ cdef void init_api(const string &bucket)
30
+
31
+ cdef void shutdown_api()
32
+
33
+ cdef list[pair[int, string]] get_error_list()
petrel-sdk/petrel-oss-cpp-sdk/setup.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup
2
+ from Cython.Build import cythonize
3
+ from distutils.extension import Extension
4
+ from Cython.Distutils import build_ext
5
+ from glob import glob
6
+ from os.path import basename
7
+
8
+ include_dirs = glob('aws-sdk-cpp/aws-cpp-sdk-*/include')
9
+ libraries = [':{0}'.format(basename(i)) for i in glob('libs/libaws-*.so*')]
10
+
11
+ setup(
12
+ cmdclass={'build_ext': build_ext},
13
+ include_dirs=include_dirs,
14
+ ext_modules=[
15
+ Extension(
16
+ 'pys3client',
17
+ ['pys3client.pyx', 's3client.cpp'],
18
+ language="c++",
19
+ libraries=libraries,
20
+ # extra_compile_args=["-O3"],
21
+ extra_link_args=["-Wl,-rpath,./libs", "-L./libs"]
22
+ )
23
+ ]
24
+ )
petrel-sdk/petrel-oss-cpp-sdk/setup.py.bak ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup
2
+ from Cython.Build import cythonize
3
+ from distutils.extension import Extension
4
+ from Cython.Distutils import build_ext
5
+ from glob import glob
6
+ from os.path import basename
7
+
8
+ include_dirs = glob('aws-sdk-cpp/aws-cpp-sdk-*/include')
9
+ # print(include_dirs)
10
+ # include_dirs = ["~/anaconda3/include"]
11
+ # include_dirs=["/home/PJLAB/huipeng/anaconda3/include"]
12
+ libraries = [':{0}'.format(basename(i)) for i in glob('libs/libaws-*.so*')]
13
+ # print(libraries)
14
+
15
+ setup(
16
+ cmdclass={'build_ext': build_ext},
17
+ include_dirs=include_dirs,
18
+ ext_modules=[
19
+ Extension(
20
+ 'pys3client',
21
+ ['pys3client.pyx', 's3client.cpp'],
22
+ language="c++",
23
+ libraries=libraries,
24
+ # extra_compile_args=["-O3"],
25
+ extra_link_args=["-Wl,-rpath,./libs", "-L./libs"]
26
+ )
27
+ ]
28
+ )
petrel-sdk/petrel-oss-python-sdk/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ petrel_client/version.py
2
+ .vscode
3
+ **/__pycache__
4
+ **/*.pyc
5
+ **/*.egg-info
6
+ venv
7
+ dist
8
+ build
petrel-sdk/petrel-oss-python-sdk/.gitlab-ci.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image: registry.sensetime.com/library/python:3.6
2
+
3
+
4
+ before_script:
5
+ - python -V
6
+ - python -m venv venv
7
+ - source ./venv/bin/activate
8
+ - python setup.py sdist
9
+ - pip install dist/*
10
+
11
+ stages:
12
+ - flake8
13
+ - unit_test
14
+
15
+ flake8:
16
+ stage: flake8
17
+ script:
18
+ - pip install flake8
19
+ - flake8 --ignore E501 --exclude '.git,.tox,*.egg-info,venv,scripts,tests,examples' . # todo remove scripts,tests,examples
20
+
21
+ test:
22
+ stage: unit_test
23
+ script:
24
+ - python tests/run_test.py
petrel-sdk/petrel-oss-python-sdk/INSTALL ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ python setup.py install
2
+