Spaces:
Running
Running
Synced repo using 'sync_with_huggingface' Github Action
Browse files- .editorconfig +25 -0
- app.py +60 -26
- poetry.lock +80 -1
- pyproject.toml +3 -0
.editorconfig
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# see http://editorconfig.org
|
2 |
+
|
3 |
+
# Top-level config
|
4 |
+
root = true
|
5 |
+
|
6 |
+
|
7 |
+
# All files
|
8 |
+
[*]
|
9 |
+
charset = utf-8
|
10 |
+
indent_style = space
|
11 |
+
indent_size = 4
|
12 |
+
end_of_line = lf
|
13 |
+
insert_final_newline = true
|
14 |
+
trim_trailing_whitespace = true
|
15 |
+
max_line_length = 100
|
16 |
+
|
17 |
+
|
18 |
+
# YAML files
|
19 |
+
[*.{yml,yaml}]
|
20 |
+
indent_size = 2
|
21 |
+
|
22 |
+
|
23 |
+
# Windows scripts
|
24 |
+
[*.{bat,cmd,ps1}]
|
25 |
+
end_of_line = crlf
|
app.py
CHANGED
@@ -46,7 +46,7 @@ def generate_iscc(file):
|
|
46 |
thumbnail = Image.open(io.BytesIO(data))
|
47 |
metadata = imeta.dict(exclude_unset=False, by_alias=True)
|
48 |
if metadata.get("thumbnail"):
|
49 |
-
del metadata[
|
50 |
return imeta.iscc, thumbnail, metadata
|
51 |
|
52 |
|
@@ -72,31 +72,48 @@ def generate_text_code(text, chunk_size):
|
|
72 |
end = start + size
|
73 |
chunks.append(no_nl(cleaned[start:end]))
|
74 |
start = end
|
75 |
-
result = [
|
|
|
|
|
76 |
idk.sdk_opts.text_avg_chunk_size = original_chunk_size
|
77 |
return result
|
78 |
|
79 |
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
82 |
## π ISCC-CODE Generator - The DNA of digital content
|
83 |
-
"""
|
|
|
84 |
with gr.Row():
|
85 |
with gr.Column(scale=2):
|
86 |
in_file = gr.File(label="Media File")
|
87 |
with gr.Column(scale=1):
|
88 |
-
out_thumbnail = gr.Image(
|
|
|
|
|
89 |
with gr.Row():
|
90 |
out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True)
|
91 |
with gr.Row():
|
92 |
out_meta = gr.Json(label="Metadata")
|
93 |
-
in_file.change(
|
|
|
|
|
94 |
|
|
|
|
|
|
|
95 |
|
96 |
-
with gr.Blocks(
|
97 |
-
gr.Markdown(
|
|
|
98 |
## π A Codec for Self-Describing Compact Binary Codes
|
99 |
-
"""
|
|
|
100 |
with gr.Row():
|
101 |
with gr.Column():
|
102 |
in_iscc = gr.Text(
|
@@ -139,40 +156,57 @@ with gr.Blocks("ENCODING") as demo_decode:
|
|
139 |
info="BASE58-BTC",
|
140 |
show_copy_button=True,
|
141 |
)
|
142 |
-
in_iscc.change(
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
## π Content Defined Chunking for Shift-Resistant Text and Data Segmentation
|
152 |
-
"""
|
|
|
153 |
with gr.Row():
|
154 |
with gr.Column():
|
155 |
in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True)
|
156 |
in_chunksize = gr.Slider(
|
157 |
label="Chunk Size",
|
158 |
info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
|
159 |
-
minimum=32,
|
160 |
-
|
|
|
|
|
|
|
161 |
|
162 |
out_text = gr.HighlightedText(
|
163 |
label="Chunked Text Output",
|
164 |
interactive=False,
|
165 |
elem_id="chunked-text",
|
166 |
)
|
167 |
-
in_text.change(
|
168 |
-
|
|
|
|
|
|
|
|
|
169 |
|
170 |
demo = gr.TabbedInterface(
|
171 |
title="βΆοΈ ISCC Playground",
|
172 |
-
interface_list=[demo_generate, demo_decode,
|
173 |
tab_names=["ISCC-CODE", "ENCODING", "CHUNKING"],
|
174 |
css=custom_css,
|
175 |
)
|
176 |
|
177 |
-
if __name__ ==
|
178 |
demo.launch()
|
|
|
46 |
thumbnail = Image.open(io.BytesIO(data))
|
47 |
metadata = imeta.dict(exclude_unset=False, by_alias=True)
|
48 |
if metadata.get("thumbnail"):
|
49 |
+
del metadata["thumbnail"]
|
50 |
return imeta.iscc, thumbnail, metadata
|
51 |
|
52 |
|
|
|
72 |
end = start + size
|
73 |
chunks.append(no_nl(cleaned[start:end]))
|
74 |
start = end
|
75 |
+
result = [
|
76 |
+
(chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)
|
77 |
+
]
|
78 |
idk.sdk_opts.text_avg_chunk_size = original_chunk_size
|
79 |
return result
|
80 |
|
81 |
|
82 |
+
####################################################################################################
|
83 |
+
# TAB ISCC-CODE #
|
84 |
+
####################################################################################################
|
85 |
+
|
86 |
+
with gr.Blocks() as demo_generate:
|
87 |
+
gr.Markdown(
|
88 |
+
"""
|
89 |
## π ISCC-CODE Generator - The DNA of digital content
|
90 |
+
"""
|
91 |
+
)
|
92 |
with gr.Row():
|
93 |
with gr.Column(scale=2):
|
94 |
in_file = gr.File(label="Media File")
|
95 |
with gr.Column(scale=1):
|
96 |
+
out_thumbnail = gr.Image(
|
97 |
+
label="Extracted Thumbnail", elem_classes=["fixed-height"]
|
98 |
+
)
|
99 |
with gr.Row():
|
100 |
out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True)
|
101 |
with gr.Row():
|
102 |
out_meta = gr.Json(label="Metadata")
|
103 |
+
in_file.change(
|
104 |
+
generate_iscc, inputs=[in_file], outputs=[out_iscc, out_thumbnail, out_meta]
|
105 |
+
)
|
106 |
|
107 |
+
####################################################################################################
|
108 |
+
# TAB ENCODING #
|
109 |
+
####################################################################################################
|
110 |
|
111 |
+
with gr.Blocks() as demo_decode:
|
112 |
+
gr.Markdown(
|
113 |
+
"""
|
114 |
## π A Codec for Self-Describing Compact Binary Codes
|
115 |
+
"""
|
116 |
+
)
|
117 |
with gr.Row():
|
118 |
with gr.Column():
|
119 |
in_iscc = gr.Text(
|
|
|
156 |
info="BASE58-BTC",
|
157 |
show_copy_button=True,
|
158 |
)
|
159 |
+
in_iscc.change(
|
160 |
+
explain_iscc,
|
161 |
+
inputs=[in_iscc],
|
162 |
+
outputs=[
|
163 |
+
out_canonical,
|
164 |
+
out_human,
|
165 |
+
out_decomposed,
|
166 |
+
out_multiformat,
|
167 |
+
],
|
168 |
+
)
|
169 |
+
|
170 |
+
####################################################################################################
|
171 |
+
# CHUNKING #
|
172 |
+
####################################################################################################
|
173 |
+
|
174 |
+
with gr.Blocks() as demo_chunking:
|
175 |
+
gr.Markdown(
|
176 |
+
"""
|
177 |
## π Content Defined Chunking for Shift-Resistant Text and Data Segmentation
|
178 |
+
"""
|
179 |
+
)
|
180 |
with gr.Row():
|
181 |
with gr.Column():
|
182 |
in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True)
|
183 |
in_chunksize = gr.Slider(
|
184 |
label="Chunk Size",
|
185 |
info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
|
186 |
+
minimum=32,
|
187 |
+
maximum=2048,
|
188 |
+
step=32,
|
189 |
+
value=64,
|
190 |
+
)
|
191 |
|
192 |
out_text = gr.HighlightedText(
|
193 |
label="Chunked Text Output",
|
194 |
interactive=False,
|
195 |
elem_id="chunked-text",
|
196 |
)
|
197 |
+
in_text.change(
|
198 |
+
generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]
|
199 |
+
)
|
200 |
+
in_chunksize.change(
|
201 |
+
generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]
|
202 |
+
)
|
203 |
|
204 |
demo = gr.TabbedInterface(
|
205 |
title="βΆοΈ ISCC Playground",
|
206 |
+
interface_list=[demo_generate, demo_decode, demo_chunking],
|
207 |
tab_names=["ISCC-CODE", "ENCODING", "CHUNKING"],
|
208 |
css=custom_css,
|
209 |
)
|
210 |
|
211 |
+
if __name__ == "__main__":
|
212 |
demo.launch()
|
poetry.lock
CHANGED
@@ -232,6 +232,52 @@ files = [
|
|
232 |
{file = "bitarray-2.9.2.tar.gz", hash = "sha256:a8f286a51a32323715d77755ed959f94bef13972e9a2fe71b609e40e6d27957e"},
|
233 |
]
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
[[package]]
|
236 |
name = "blake3"
|
237 |
version = "0.3.4"
|
@@ -1470,6 +1516,17 @@ files = [
|
|
1470 |
{file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"},
|
1471 |
]
|
1472 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1473 |
[[package]]
|
1474 |
name = "numpy"
|
1475 |
version = "1.26.3"
|
@@ -1657,6 +1714,17 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
|
|
1657 |
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
|
1658 |
xml = ["lxml (>=4.9.2)"]
|
1659 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1660 |
[[package]]
|
1661 |
name = "pillow"
|
1662 |
version = "10.2.0"
|
@@ -2688,6 +2756,17 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""
|
|
2688 |
[package.extras]
|
2689 |
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
|
2690 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2691 |
[[package]]
|
2692 |
name = "tomlkit"
|
2693 |
version = "0.12.0"
|
@@ -3050,4 +3129,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|
3050 |
[metadata]
|
3051 |
lock-version = "2.0"
|
3052 |
python-versions = "^3.9"
|
3053 |
-
content-hash = "
|
|
|
232 |
{file = "bitarray-2.9.2.tar.gz", hash = "sha256:a8f286a51a32323715d77755ed959f94bef13972e9a2fe71b609e40e6d27957e"},
|
233 |
]
|
234 |
|
235 |
+
[[package]]
|
236 |
+
name = "black"
|
237 |
+
version = "23.12.1"
|
238 |
+
description = "The uncompromising code formatter."
|
239 |
+
optional = false
|
240 |
+
python-versions = ">=3.8"
|
241 |
+
files = [
|
242 |
+
{file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
|
243 |
+
{file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
|
244 |
+
{file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"},
|
245 |
+
{file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"},
|
246 |
+
{file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"},
|
247 |
+
{file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"},
|
248 |
+
{file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"},
|
249 |
+
{file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"},
|
250 |
+
{file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"},
|
251 |
+
{file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"},
|
252 |
+
{file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"},
|
253 |
+
{file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"},
|
254 |
+
{file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"},
|
255 |
+
{file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"},
|
256 |
+
{file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"},
|
257 |
+
{file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"},
|
258 |
+
{file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"},
|
259 |
+
{file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"},
|
260 |
+
{file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"},
|
261 |
+
{file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"},
|
262 |
+
{file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"},
|
263 |
+
{file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"},
|
264 |
+
]
|
265 |
+
|
266 |
+
[package.dependencies]
|
267 |
+
click = ">=8.0.0"
|
268 |
+
mypy-extensions = ">=0.4.3"
|
269 |
+
packaging = ">=22.0"
|
270 |
+
pathspec = ">=0.9.0"
|
271 |
+
platformdirs = ">=2"
|
272 |
+
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
273 |
+
typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
|
274 |
+
|
275 |
+
[package.extras]
|
276 |
+
colorama = ["colorama (>=0.4.3)"]
|
277 |
+
d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
|
278 |
+
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
279 |
+
uvloop = ["uvloop (>=0.15.2)"]
|
280 |
+
|
281 |
[[package]]
|
282 |
name = "blake3"
|
283 |
version = "0.3.4"
|
|
|
1516 |
{file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"},
|
1517 |
]
|
1518 |
|
1519 |
+
[[package]]
|
1520 |
+
name = "mypy-extensions"
|
1521 |
+
version = "1.0.0"
|
1522 |
+
description = "Type system extensions for programs checked with the mypy type checker."
|
1523 |
+
optional = false
|
1524 |
+
python-versions = ">=3.5"
|
1525 |
+
files = [
|
1526 |
+
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
|
1527 |
+
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
1528 |
+
]
|
1529 |
+
|
1530 |
[[package]]
|
1531 |
name = "numpy"
|
1532 |
version = "1.26.3"
|
|
|
1714 |
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
|
1715 |
xml = ["lxml (>=4.9.2)"]
|
1716 |
|
1717 |
+
[[package]]
|
1718 |
+
name = "pathspec"
|
1719 |
+
version = "0.12.1"
|
1720 |
+
description = "Utility library for gitignore style pattern matching of file paths."
|
1721 |
+
optional = false
|
1722 |
+
python-versions = ">=3.8"
|
1723 |
+
files = [
|
1724 |
+
{file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
|
1725 |
+
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
|
1726 |
+
]
|
1727 |
+
|
1728 |
[[package]]
|
1729 |
name = "pillow"
|
1730 |
version = "10.2.0"
|
|
|
2756 |
[package.extras]
|
2757 |
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
|
2758 |
|
2759 |
+
[[package]]
|
2760 |
+
name = "tomli"
|
2761 |
+
version = "2.0.1"
|
2762 |
+
description = "A lil' TOML parser"
|
2763 |
+
optional = false
|
2764 |
+
python-versions = ">=3.7"
|
2765 |
+
files = [
|
2766 |
+
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
2767 |
+
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
2768 |
+
]
|
2769 |
+
|
2770 |
[[package]]
|
2771 |
name = "tomlkit"
|
2772 |
version = "0.12.0"
|
|
|
3129 |
[metadata]
|
3130 |
lock-version = "2.0"
|
3131 |
python-versions = "^3.9"
|
3132 |
+
content-hash = "003507cb038ece514b8afaf4896b719885450817f0a63852ab74a09357be2894"
|
pyproject.toml
CHANGED
@@ -11,6 +11,9 @@ python = "^3.9"
|
|
11 |
gradio = "^4.12.0"
|
12 |
iscc-sdk = "^0.6.0"
|
13 |
|
|
|
|
|
|
|
14 |
[build-system]
|
15 |
requires = ["poetry-core"]
|
16 |
build-backend = "poetry.core.masonry.api"
|
|
|
11 |
gradio = "^4.12.0"
|
12 |
iscc-sdk = "^0.6.0"
|
13 |
|
14 |
+
[tool.poetry.group.dev.dependencies]
|
15 |
+
black = "^23.12.1"
|
16 |
+
|
17 |
[build-system]
|
18 |
requires = ["poetry-core"]
|
19 |
build-backend = "poetry.core.masonry.api"
|