titusz commited on
Commit
a728009
β€’
1 Parent(s): 909172d

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (4) hide show
  1. .editorconfig +25 -0
  2. app.py +60 -26
  3. poetry.lock +80 -1
  4. pyproject.toml +3 -0
.editorconfig ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # see http://editorconfig.org
2
+
3
+ # Top-level config
4
+ root = true
5
+
6
+
7
+ # All files
8
+ [*]
9
+ charset = utf-8
10
+ indent_style = space
11
+ indent_size = 4
12
+ end_of_line = lf
13
+ insert_final_newline = true
14
+ trim_trailing_whitespace = true
15
+ max_line_length = 100
16
+
17
+
18
+ # YAML files
19
+ [*.{yml,yaml}]
20
+ indent_size = 2
21
+
22
+
23
+ # Windows scripts
24
+ [*.{bat,cmd,ps1}]
25
+ end_of_line = crlf
app.py CHANGED
@@ -46,7 +46,7 @@ def generate_iscc(file):
46
  thumbnail = Image.open(io.BytesIO(data))
47
  metadata = imeta.dict(exclude_unset=False, by_alias=True)
48
  if metadata.get("thumbnail"):
49
- del metadata['thumbnail']
50
  return imeta.iscc, thumbnail, metadata
51
 
52
 
@@ -72,31 +72,48 @@ def generate_text_code(text, chunk_size):
72
  end = start + size
73
  chunks.append(no_nl(cleaned[start:end]))
74
  start = end
75
- result = [(chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)]
 
 
76
  idk.sdk_opts.text_avg_chunk_size = original_chunk_size
77
  return result
78
 
79
 
80
- with gr.Blocks(title="ISCC-CODE") as demo_generate:
81
- gr.Markdown("""
 
 
 
 
 
82
  ## 🌟 ISCC-CODE Generator - The DNA of digital content
83
- """)
 
84
  with gr.Row():
85
  with gr.Column(scale=2):
86
  in_file = gr.File(label="Media File")
87
  with gr.Column(scale=1):
88
- out_thumbnail = gr.Image(label="Extracted Thumbnail", elem_classes=["fixed-height"])
 
 
89
  with gr.Row():
90
  out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True)
91
  with gr.Row():
92
  out_meta = gr.Json(label="Metadata")
93
- in_file.change(generate_iscc, inputs=[in_file], outputs=[out_iscc, out_thumbnail, out_meta])
 
 
94
 
 
 
 
95
 
96
- with gr.Blocks("ENCODING") as demo_decode:
97
- gr.Markdown("""
 
98
  ## 🌟 A Codec for Self-Describing Compact Binary Codes
99
- """)
 
100
  with gr.Row():
101
  with gr.Column():
102
  in_iscc = gr.Text(
@@ -139,40 +156,57 @@ with gr.Blocks("ENCODING") as demo_decode:
139
  info="BASE58-BTC",
140
  show_copy_button=True,
141
  )
142
- in_iscc.change(explain_iscc, inputs=[in_iscc], outputs=[
143
- out_canonical,
144
- out_human,
145
- out_decomposed,
146
- out_multiformat,
147
- ])
148
-
149
- with gr.Blocks(title="CHUNKING") as demo_text_code:
150
- gr.Markdown("""
 
 
 
 
 
 
 
 
 
151
  ## 🌟 Content Defined Chunking for Shift-Resistant Text and Data Segmentation
152
- """)
 
153
  with gr.Row():
154
  with gr.Column():
155
  in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True)
156
  in_chunksize = gr.Slider(
157
  label="Chunk Size",
158
  info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
159
- minimum=32, maximum=2048, step=32,
160
- value=64)
 
 
 
161
 
162
  out_text = gr.HighlightedText(
163
  label="Chunked Text Output",
164
  interactive=False,
165
  elem_id="chunked-text",
166
  )
167
- in_text.change(generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text])
168
- in_chunksize.change(generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text])
 
 
 
 
169
 
170
  demo = gr.TabbedInterface(
171
  title="▢️ ISCC Playground",
172
- interface_list=[demo_generate, demo_decode, demo_text_code],
173
  tab_names=["ISCC-CODE", "ENCODING", "CHUNKING"],
174
  css=custom_css,
175
  )
176
 
177
- if __name__ == '__main__':
178
  demo.launch()
 
46
  thumbnail = Image.open(io.BytesIO(data))
47
  metadata = imeta.dict(exclude_unset=False, by_alias=True)
48
  if metadata.get("thumbnail"):
49
+ del metadata["thumbnail"]
50
  return imeta.iscc, thumbnail, metadata
51
 
52
 
 
72
  end = start + size
73
  chunks.append(no_nl(cleaned[start:end]))
74
  start = end
75
+ result = [
76
+ (chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)
77
+ ]
78
  idk.sdk_opts.text_avg_chunk_size = original_chunk_size
79
  return result
80
 
81
 
82
+ ####################################################################################################
83
+ # TAB ISCC-CODE #
84
+ ####################################################################################################
85
+
86
+ with gr.Blocks() as demo_generate:
87
+ gr.Markdown(
88
+ """
89
  ## 🌟 ISCC-CODE Generator - The DNA of digital content
90
+ """
91
+ )
92
  with gr.Row():
93
  with gr.Column(scale=2):
94
  in_file = gr.File(label="Media File")
95
  with gr.Column(scale=1):
96
+ out_thumbnail = gr.Image(
97
+ label="Extracted Thumbnail", elem_classes=["fixed-height"]
98
+ )
99
  with gr.Row():
100
  out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True)
101
  with gr.Row():
102
  out_meta = gr.Json(label="Metadata")
103
+ in_file.change(
104
+ generate_iscc, inputs=[in_file], outputs=[out_iscc, out_thumbnail, out_meta]
105
+ )
106
 
107
+ ####################################################################################################
108
+ # TAB ENCODING #
109
+ ####################################################################################################
110
 
111
+ with gr.Blocks() as demo_decode:
112
+ gr.Markdown(
113
+ """
114
  ## 🌟 A Codec for Self-Describing Compact Binary Codes
115
+ """
116
+ )
117
  with gr.Row():
118
  with gr.Column():
119
  in_iscc = gr.Text(
 
156
  info="BASE58-BTC",
157
  show_copy_button=True,
158
  )
159
+ in_iscc.change(
160
+ explain_iscc,
161
+ inputs=[in_iscc],
162
+ outputs=[
163
+ out_canonical,
164
+ out_human,
165
+ out_decomposed,
166
+ out_multiformat,
167
+ ],
168
+ )
169
+
170
+ ####################################################################################################
171
+ # CHUNKING #
172
+ ####################################################################################################
173
+
174
+ with gr.Blocks() as demo_chunking:
175
+ gr.Markdown(
176
+ """
177
  ## 🌟 Content Defined Chunking for Shift-Resistant Text and Data Segmentation
178
+ """
179
+ )
180
  with gr.Row():
181
  with gr.Column():
182
  in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True)
183
  in_chunksize = gr.Slider(
184
  label="Chunk Size",
185
  info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
186
+ minimum=32,
187
+ maximum=2048,
188
+ step=32,
189
+ value=64,
190
+ )
191
 
192
  out_text = gr.HighlightedText(
193
  label="Chunked Text Output",
194
  interactive=False,
195
  elem_id="chunked-text",
196
  )
197
+ in_text.change(
198
+ generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]
199
+ )
200
+ in_chunksize.change(
201
+ generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]
202
+ )
203
 
204
  demo = gr.TabbedInterface(
205
  title="▢️ ISCC Playground",
206
+ interface_list=[demo_generate, demo_decode, demo_chunking],
207
  tab_names=["ISCC-CODE", "ENCODING", "CHUNKING"],
208
  css=custom_css,
209
  )
210
 
211
+ if __name__ == "__main__":
212
  demo.launch()
poetry.lock CHANGED
@@ -232,6 +232,52 @@ files = [
232
  {file = "bitarray-2.9.2.tar.gz", hash = "sha256:a8f286a51a32323715d77755ed959f94bef13972e9a2fe71b609e40e6d27957e"},
233
  ]
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  [[package]]
236
  name = "blake3"
237
  version = "0.3.4"
@@ -1470,6 +1516,17 @@ files = [
1470
  {file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"},
1471
  ]
1472
 
 
 
 
 
 
 
 
 
 
 
 
1473
  [[package]]
1474
  name = "numpy"
1475
  version = "1.26.3"
@@ -1657,6 +1714,17 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
1657
  test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
1658
  xml = ["lxml (>=4.9.2)"]
1659
 
 
 
 
 
 
 
 
 
 
 
 
1660
  [[package]]
1661
  name = "pillow"
1662
  version = "10.2.0"
@@ -2688,6 +2756,17 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""
2688
  [package.extras]
2689
  full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
2690
 
 
 
 
 
 
 
 
 
 
 
 
2691
  [[package]]
2692
  name = "tomlkit"
2693
  version = "0.12.0"
@@ -3050,4 +3129,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
3050
  [metadata]
3051
  lock-version = "2.0"
3052
  python-versions = "^3.9"
3053
- content-hash = "7c4666b5f04855126c0fbd9dc39c1b52b3ca2551d338100033f52bb811f46ae7"
 
232
  {file = "bitarray-2.9.2.tar.gz", hash = "sha256:a8f286a51a32323715d77755ed959f94bef13972e9a2fe71b609e40e6d27957e"},
233
  ]
234
 
235
+ [[package]]
236
+ name = "black"
237
+ version = "23.12.1"
238
+ description = "The uncompromising code formatter."
239
+ optional = false
240
+ python-versions = ">=3.8"
241
+ files = [
242
+ {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
243
+ {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
244
+ {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"},
245
+ {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"},
246
+ {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"},
247
+ {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"},
248
+ {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"},
249
+ {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"},
250
+ {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"},
251
+ {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"},
252
+ {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"},
253
+ {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"},
254
+ {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"},
255
+ {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"},
256
+ {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"},
257
+ {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"},
258
+ {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"},
259
+ {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"},
260
+ {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"},
261
+ {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"},
262
+ {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"},
263
+ {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"},
264
+ ]
265
+
266
+ [package.dependencies]
267
+ click = ">=8.0.0"
268
+ mypy-extensions = ">=0.4.3"
269
+ packaging = ">=22.0"
270
+ pathspec = ">=0.9.0"
271
+ platformdirs = ">=2"
272
+ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
273
+ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
274
+
275
+ [package.extras]
276
+ colorama = ["colorama (>=0.4.3)"]
277
+ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
278
+ jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
279
+ uvloop = ["uvloop (>=0.15.2)"]
280
+
281
  [[package]]
282
  name = "blake3"
283
  version = "0.3.4"
 
1516
  {file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"},
1517
  ]
1518
 
1519
+ [[package]]
1520
+ name = "mypy-extensions"
1521
+ version = "1.0.0"
1522
+ description = "Type system extensions for programs checked with the mypy type checker."
1523
+ optional = false
1524
+ python-versions = ">=3.5"
1525
+ files = [
1526
+ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
1527
+ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
1528
+ ]
1529
+
1530
  [[package]]
1531
  name = "numpy"
1532
  version = "1.26.3"
 
1714
  test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
1715
  xml = ["lxml (>=4.9.2)"]
1716
 
1717
+ [[package]]
1718
+ name = "pathspec"
1719
+ version = "0.12.1"
1720
+ description = "Utility library for gitignore style pattern matching of file paths."
1721
+ optional = false
1722
+ python-versions = ">=3.8"
1723
+ files = [
1724
+ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
1725
+ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
1726
+ ]
1727
+
1728
  [[package]]
1729
  name = "pillow"
1730
  version = "10.2.0"
 
2756
  [package.extras]
2757
  full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
2758
 
2759
+ [[package]]
2760
+ name = "tomli"
2761
+ version = "2.0.1"
2762
+ description = "A lil' TOML parser"
2763
+ optional = false
2764
+ python-versions = ">=3.7"
2765
+ files = [
2766
+ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
2767
+ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
2768
+ ]
2769
+
2770
  [[package]]
2771
  name = "tomlkit"
2772
  version = "0.12.0"
 
3129
  [metadata]
3130
  lock-version = "2.0"
3131
  python-versions = "^3.9"
3132
+ content-hash = "003507cb038ece514b8afaf4896b719885450817f0a63852ab74a09357be2894"
pyproject.toml CHANGED
@@ -11,6 +11,9 @@ python = "^3.9"
11
  gradio = "^4.12.0"
12
  iscc-sdk = "^0.6.0"
13
 
 
 
 
14
  [build-system]
15
  requires = ["poetry-core"]
16
  build-backend = "poetry.core.masonry.api"
 
11
  gradio = "^4.12.0"
12
  iscc-sdk = "^0.6.0"
13
 
14
+ [tool.poetry.group.dev.dependencies]
15
+ black = "^23.12.1"
16
+
17
  [build-system]
18
  requires = ["poetry-core"]
19
  build-backend = "poetry.core.masonry.api"