Spaces:
Running
Running
Synced repo using 'sync_with_huggingface' Github Action
Browse files- app.py +27 -191
- demos/__init__.py +0 -0
- demos/chunker.py +144 -0
- demos/compare.py +287 -0
- demos/generate.py +91 -0
- demos/images1/pope1.jpg +0 -0
- demos/images1/pope1b.jpg +0 -0
- demos/images1/ukbench00000.jpg +0 -0
- demos/images1/ukbench00016.jpg +0 -0
- demos/images1/ukbench00044.jpg +0 -0
- demos/images1/ukbench00052.jpg +0 -0
- demos/images2/pope2.jpg +0 -0
- demos/images2/pope2b.jpg +0 -0
- demos/images2/ukbench00002.jpg +0 -0
- demos/images2/ukbench00017.jpg +0 -0
- demos/images2/ukbench00046.jpg +0 -0
- demos/images2/ukbench00053.jpg +0 -0
- demos/inspect_.py +89 -0
- demos/samples/sample.txt +7 -0
- poetry.lock +0 -0
- pyproject.toml +4 -2
- requirements.txt +4 -1
app.py
CHANGED
@@ -1,212 +1,48 @@
|
|
1 |
-
import io
|
2 |
-
import base64
|
3 |
import gradio as gr
|
4 |
-
import
|
5 |
-
import
|
6 |
-
from
|
7 |
-
|
8 |
-
idk.sdk_opts.image_thumbnail_size = 265
|
9 |
-
idk.sdk_opts.image_thumbnail_quality = 80
|
10 |
-
idk.sdk_opts.granular = True
|
11 |
-
|
12 |
|
13 |
custom_css = """
|
14 |
-
.fixed-height
|
15 |
-
height:
|
16 |
object-fit: contain; /* Scale the image to fit within the element */
|
17 |
}
|
18 |
#chunked-text span.label {
|
19 |
text-transform: none !important;
|
20 |
}
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
"\u000a": "⏎", # Line Feed - Represented by the 'Return' symbol
|
25 |
-
"\u000b": "↨", # Vertical Tab - Represented by the 'Up Down Arrow' symbol
|
26 |
-
"\u000c": "␌", # Form Feed - Unicode Control Pictures representation
|
27 |
-
"\u000d": "↵", # Carriage Return - 'Downwards Arrow with Corner Leftwards' symbol
|
28 |
-
"\u0085": "⤓", # Next Line - 'Downwards Arrow with Double Stroke' symbol
|
29 |
-
"\u2028": "↲", # Line Separator - 'Downwards Arrow with Tip Leftwards' symbol
|
30 |
-
"\u2029": "¶", # Paragraph Separator - Represented by the 'Pilcrow' symbol
|
31 |
}
|
32 |
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
def generate_iscc(file):
|
41 |
-
imeta = idk.code_iscc(file.name)
|
42 |
-
thumbnail = None
|
43 |
-
if imeta.thumbnail:
|
44 |
-
header, encoded = imeta.thumbnail.split(",", 1)
|
45 |
-
data = base64.b64decode(encoded)
|
46 |
-
thumbnail = Image.open(io.BytesIO(data))
|
47 |
-
metadata = imeta.dict(exclude_unset=False, by_alias=True)
|
48 |
-
if metadata.get("thumbnail"):
|
49 |
-
del metadata["thumbnail"]
|
50 |
-
return imeta.iscc, thumbnail, metadata
|
51 |
-
|
52 |
-
|
53 |
-
def explain_iscc(code):
|
54 |
-
canonical = ic.iscc_normalize(code)
|
55 |
-
human = " - ".join(ic.iscc_explain(code).split("-"))
|
56 |
-
code_obj = ic.Code(canonical)
|
57 |
-
decomposed = " - ".join(ic.iscc_decompose(canonical))
|
58 |
-
multiformat = code_obj.mf_base58btc
|
59 |
-
return canonical, human, decomposed, multiformat
|
60 |
-
|
61 |
-
|
62 |
-
def generate_text_code(text, chunk_size):
|
63 |
-
original_chunk_size = idk.sdk_opts.text_avg_chunk_size
|
64 |
-
idk.sdk_opts.text_avg_chunk_size = chunk_size
|
65 |
-
cleaned = ic.text_clean(text)
|
66 |
-
processed = idk.text_features(cleaned)
|
67 |
-
features = processed["features"]
|
68 |
-
sizes = processed["sizes"]
|
69 |
-
start = 0
|
70 |
-
chunks = []
|
71 |
-
for size in sizes:
|
72 |
-
end = start + size
|
73 |
-
chunks.append(no_nl(cleaned[start:end]))
|
74 |
-
start = end
|
75 |
-
result = [
|
76 |
-
(chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)
|
77 |
-
]
|
78 |
-
idk.sdk_opts.text_avg_chunk_size = original_chunk_size
|
79 |
-
return result
|
80 |
-
|
81 |
-
|
82 |
-
####################################################################################################
|
83 |
-
# TAB ISCC-CODE #
|
84 |
-
####################################################################################################
|
85 |
-
|
86 |
-
with gr.Blocks() as demo_generate:
|
87 |
-
gr.Markdown(
|
88 |
-
"""
|
89 |
-
## 🌟 ISCC-CODE Generator - The DNA of digital content
|
90 |
-
"""
|
91 |
-
)
|
92 |
-
with gr.Row():
|
93 |
-
with gr.Column(scale=2):
|
94 |
-
in_file = gr.File(label="Media File")
|
95 |
-
with gr.Column(scale=1):
|
96 |
-
out_thumbnail = gr.Image(
|
97 |
-
label="Extracted Thumbnail", elem_classes=["fixed-height"]
|
98 |
-
)
|
99 |
-
with gr.Row():
|
100 |
-
out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True)
|
101 |
-
with gr.Row():
|
102 |
-
out_meta = gr.Json(label="Metadata")
|
103 |
-
in_file.change(
|
104 |
-
generate_iscc, inputs=[in_file], outputs=[out_iscc, out_thumbnail, out_meta]
|
105 |
-
)
|
106 |
-
|
107 |
-
####################################################################################################
|
108 |
-
# TAB ENCODING #
|
109 |
-
####################################################################################################
|
110 |
-
|
111 |
-
with gr.Blocks() as demo_decode:
|
112 |
-
gr.Markdown(
|
113 |
-
"""
|
114 |
-
## 🌟 A Codec for Self-Describing Compact Binary Codes
|
115 |
-
"""
|
116 |
-
)
|
117 |
-
with gr.Row():
|
118 |
-
with gr.Column():
|
119 |
-
in_iscc = gr.Text(
|
120 |
-
label="ISCC",
|
121 |
-
info="INPUT ANY VALID ISCC-CODE OR ISCC-UNIT",
|
122 |
-
autofocus=True,
|
123 |
-
)
|
124 |
-
examples = [
|
125 |
-
"ISCC:AAAWN77F727NXSUS", # Meta-Code
|
126 |
-
"bzqaqaal5rvp72lx2thvq", # Multiformat
|
127 |
-
"ISCC:EAASKDNZNYGUUF5A", # Text-Code
|
128 |
-
"ISCC:GABW5LUBVP23N3DOD7PPINHT5JKBI", # Data-Code 128 bits
|
129 |
-
"ISCC:KUAG5LUBVP23N3DOHCHWIYGXVN7ZS", # ISCC-SUM
|
130 |
-
"ISCC:KAA2Y5NUST7BFD5NN2XIDK7VW3WG4OEPMRQNPK37TE", # ISCC-CDI
|
131 |
-
"z36hVxiqoF8AAmDpZV958hn3tsv2i7v1NfCrSzpq", # ISCC-CDI multiformats
|
132 |
-
"ISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI",
|
133 |
-
]
|
134 |
-
gr.Examples(label="Example ISCCs", examples=examples, inputs=[in_iscc])
|
135 |
-
|
136 |
-
gr.Markdown("## Different Encodings:")
|
137 |
-
with gr.Row():
|
138 |
-
with gr.Column():
|
139 |
-
out_canonical = gr.Text(
|
140 |
-
label="Canonical",
|
141 |
-
info="NORMALIZED STANDARD REPRESENTATION",
|
142 |
-
show_copy_button=True,
|
143 |
-
)
|
144 |
-
out_human = gr.Text(
|
145 |
-
label="Human Readable",
|
146 |
-
info="MAINTYPE - SUBTYPE - VERSION - LENGTH - BODY",
|
147 |
-
show_copy_button=True,
|
148 |
-
)
|
149 |
-
out_decomposed = gr.Text(
|
150 |
-
label="Decomposed",
|
151 |
-
info="ISCC-UNITS",
|
152 |
-
show_copy_button=True,
|
153 |
-
)
|
154 |
-
out_multiformat = gr.Text(
|
155 |
-
label="Multiformat",
|
156 |
-
info="BASE58-BTC",
|
157 |
-
show_copy_button=True,
|
158 |
-
)
|
159 |
-
in_iscc.change(
|
160 |
-
explain_iscc,
|
161 |
-
inputs=[in_iscc],
|
162 |
-
outputs=[
|
163 |
-
out_canonical,
|
164 |
-
out_human,
|
165 |
-
out_decomposed,
|
166 |
-
out_multiformat,
|
167 |
-
],
|
168 |
-
)
|
169 |
|
170 |
-
####################################################################################################
|
171 |
-
# CHUNKING #
|
172 |
-
####################################################################################################
|
173 |
|
174 |
-
|
175 |
-
gr.
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
)
|
180 |
-
with gr.Row():
|
181 |
-
with gr.Column():
|
182 |
-
in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True)
|
183 |
-
in_chunksize = gr.Slider(
|
184 |
-
label="Chunk Size",
|
185 |
-
info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
|
186 |
-
minimum=32,
|
187 |
-
maximum=2048,
|
188 |
-
step=32,
|
189 |
-
value=64,
|
190 |
-
)
|
191 |
|
192 |
-
out_text = gr.HighlightedText(
|
193 |
-
label="Chunked Text Output",
|
194 |
-
interactive=False,
|
195 |
-
elem_id="chunked-text",
|
196 |
-
)
|
197 |
-
in_text.change(
|
198 |
-
generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]
|
199 |
-
)
|
200 |
-
in_chunksize.change(
|
201 |
-
generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]
|
202 |
-
)
|
203 |
|
204 |
demo = gr.TabbedInterface(
|
205 |
-
title="▶️ ISCC Playground",
|
206 |
-
interface_list=[demo_generate,
|
207 |
-
tab_names=["
|
208 |
css=custom_css,
|
|
|
209 |
)
|
210 |
|
|
|
211 |
if __name__ == "__main__":
|
212 |
demo.launch()
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from demos.generate import demo as demo_generate
|
3 |
+
from demos.compare import demo as demo_compare
|
4 |
+
from demos.inspect_ import demo as demo_inspect
|
5 |
+
from demos.chunker import demo as demo_chunker
|
|
|
|
|
|
|
|
|
6 |
|
7 |
custom_css = """
|
8 |
+
.fixed-height {
|
9 |
+
height: 240px; /* Fixed height */
|
10 |
object-fit: contain; /* Scale the image to fit within the element */
|
11 |
}
|
12 |
#chunked-text span.label {
|
13 |
text-transform: none !important;
|
14 |
}
|
15 |
+
.json-holder {
|
16 |
+
word-wrap: break-word;
|
17 |
+
white-space: pre-wrap;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
|
20 |
+
#examples-a, #examples-b {
|
21 |
+
height: 140px; /* Fixed height */
|
22 |
+
object-fit: contain; /* Scale the image to fit within the element */
|
23 |
+
}
|
24 |
|
25 |
+
textarea {
|
26 |
+
font-family: JetBrains Mono;
|
27 |
+
}
|
28 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
|
|
30 |
|
31 |
+
iscc_theme = gr.themes.Default(
|
32 |
+
font=gr.themes.GoogleFont("Readex Pro"),
|
33 |
+
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
|
34 |
+
radius_size=gr.themes.sizes.radius_none,
|
35 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
demo = gr.TabbedInterface(
|
39 |
+
title="▶️ ISCC Playground - The DNA of your digital content",
|
40 |
+
interface_list=[demo_generate, demo_compare, demo_inspect, demo_chunker],
|
41 |
+
tab_names=["GENERATE", "COMPARE", "INSPECT", "CHUNKER"],
|
42 |
css=custom_css,
|
43 |
+
theme=iscc_theme,
|
44 |
)
|
45 |
|
46 |
+
|
47 |
if __name__ == "__main__":
|
48 |
demo.launch()
|
demos/__init__.py
ADDED
File without changes
|
demos/chunker.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import iscc_core as ic
|
3 |
+
import iscc_sdk as idk
|
4 |
+
import pathlib
|
5 |
+
|
6 |
+
|
7 |
+
HERE = pathlib.Path(__file__).parent.absolute()
|
8 |
+
SAMPLE_FILEPATH = HERE / "samples/sample.txt"
|
9 |
+
sample_text = open(SAMPLE_FILEPATH, "rt", encoding="utf-8").read()
|
10 |
+
|
11 |
+
newline_symbols = {
|
12 |
+
"\u000a": "⏎", # Line Feed - Represented by the 'Return' symbol
|
13 |
+
"\u000b": "↨", # Vertical Tab - Represented by the 'Up Down Arrow' symbol
|
14 |
+
"\u000c": "␌", # Form Feed - Unicode Control Pictures representation
|
15 |
+
"\u000d": "↵", # Carriage Return - 'Downwards Arrow with Corner Leftwards' symbol
|
16 |
+
"\u0085": "⤓", # Next Line - 'Downwards Arrow with Double Stroke' symbol
|
17 |
+
"\u2028": "↲", # Line Separator - 'Downwards Arrow with Tip Leftwards' symbol
|
18 |
+
"\u2029": "¶", # Paragraph Separator - Represented by the 'Pilcrow' symbol
|
19 |
+
}
|
20 |
+
|
21 |
+
custom_css = """
|
22 |
+
#chunked-text span.label {
|
23 |
+
text-transform: none !important;
|
24 |
+
}
|
25 |
+
"""
|
26 |
+
|
27 |
+
|
28 |
+
def no_nl(text):
|
29 |
+
"""Replace non-printable newline characters with printable symbols"""
|
30 |
+
for char, symbol in newline_symbols.items():
|
31 |
+
text = text.replace(char, symbol)
|
32 |
+
return text
|
33 |
+
|
34 |
+
|
35 |
+
def chunk_text(text, chunk_size):
|
36 |
+
original_chunk_size = idk.sdk_opts.text_avg_chunk_size
|
37 |
+
idk.sdk_opts.text_avg_chunk_size = chunk_size
|
38 |
+
cleaned = ic.text_clean(text)
|
39 |
+
processed = idk.text_features(cleaned)
|
40 |
+
features = processed["features"]
|
41 |
+
sizes = processed["sizes"]
|
42 |
+
start = 0
|
43 |
+
chunks = []
|
44 |
+
for size in sizes:
|
45 |
+
end = start + size
|
46 |
+
chunks.append(no_nl(cleaned[start:end]))
|
47 |
+
start = end
|
48 |
+
result = [
|
49 |
+
(chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)
|
50 |
+
]
|
51 |
+
idk.sdk_opts.text_avg_chunk_size = original_chunk_size
|
52 |
+
return result
|
53 |
+
|
54 |
+
|
55 |
+
with gr.Blocks(css=custom_css) as demo:
|
56 |
+
with gr.Row(variant="panel"):
|
57 |
+
gr.Markdown(
|
58 |
+
"""
|
59 |
+
## ✂️ ISCC Chunker
|
60 |
+
Demo of Content-Defined Variable-Length Chunking for Shift-Resistant Text and Data Segmentation
|
61 |
+
""",
|
62 |
+
)
|
63 |
+
with gr.Row(variant="panel"):
|
64 |
+
with gr.Column(variant="panel"):
|
65 |
+
in_text = gr.TextArea(
|
66 |
+
label="Text Chunker",
|
67 |
+
placeholder="Paste your text here",
|
68 |
+
lines=12,
|
69 |
+
max_lines=12,
|
70 |
+
)
|
71 |
+
in_chunksize = gr.Slider(
|
72 |
+
label="Chunk Size",
|
73 |
+
info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
|
74 |
+
minimum=64,
|
75 |
+
maximum=2048,
|
76 |
+
step=32,
|
77 |
+
value=64,
|
78 |
+
)
|
79 |
+
gr.Examples(label="Sample Text", examples=[sample_text], inputs=[in_text])
|
80 |
+
|
81 |
+
out_text = gr.HighlightedText(
|
82 |
+
label="Chunked Text Output",
|
83 |
+
interactive=False,
|
84 |
+
elem_id="chunked-text",
|
85 |
+
)
|
86 |
+
with gr.Row():
|
87 |
+
gr.ClearButton(components=[in_text, in_chunksize, out_text])
|
88 |
+
with gr.Row(variant="panel"):
|
89 |
+
gr.Markdown(
|
90 |
+
"""
|
91 |
+
## 📖 Help & Instructions
|
92 |
+
|
93 |
+
This Demo showcases ISCC's shift-resistant chunking algorithm. Here's how to use it:
|
94 |
+
|
95 |
+
A) **Paste your text** into the "Text Chunker" field or select the sample below.
|
96 |
+
|
97 |
+
The **"Chunked Text Output"** will display the results, highlighting each chunk and its
|
98 |
+
number of characters and associated similarity hash.
|
99 |
+
|
100 |
+
B) Edit the text** in the "Text Chunker" field
|
101 |
+
|
102 |
+
Observe how most chunks stay the same (same length and same hash) even if you make edits
|
103 |
+
in the beginning of the text.
|
104 |
+
|
105 |
+
C) **Adjust the "Chunk Size"** slider to control the average number of characters per chunk.
|
106 |
+
|
107 |
+
Observe how the chunks get smaller/larger on average. Smaller sizes result in more,
|
108 |
+
more fine grained chunks, while larger sizes produce fewer, larger chunks on average.
|
109 |
+
|
110 |
+
D) Use the **Clear Button** to start over.
|
111 |
+
|
112 |
+
For more information about ISCC chunking, please visit: https://core.iscc.codes/algorithms/cdc/
|
113 |
+
""",
|
114 |
+
)
|
115 |
+
|
116 |
+
gr.Markdown(
|
117 |
+
"""
|
118 |
+
## What is Content-Defined Chunking?
|
119 |
+
|
120 |
+
This method segments text (or data) into chunks using a content-defined approach, which is
|
121 |
+
resilient to shifts in the text. It ensures that changes in the beginning of the text have
|
122 |
+
minimal impact on the chunk boundaries further in the text, making it ideal for version
|
123 |
+
control, data deduplication, and similar applications where detecting content changes
|
124 |
+
efficiently is crucial.
|
125 |
+
|
126 |
+
## How does ISCC use Content-Defined Chunking?
|
127 |
+
|
128 |
+
The [Data-Code](https://github.com/iscc/iscc-core/blob/main/iscc_core/code_data.py) is
|
129 |
+
generated by chunking the raw file bitstream with an average chunk size of 1024 bytes.
|
130 |
+
The chunks are hashed with `xxhash` and processed with a `minhash` algorithm.
|
131 |
+
|
132 |
+
It is also used by the [iscc-sdk](https://github.com/iscc/iscc-sdk) to generate granular
|
133 |
+
syntactic similarity hashes for textual content with an average chunk size of 1024
|
134 |
+
characters. When activated the granular chunk hashes are attached to the generated ISCC
|
135 |
+
Metadata.
|
136 |
+
"""
|
137 |
+
)
|
138 |
+
|
139 |
+
in_text.change(chunk_text, inputs=[in_text, in_chunksize], outputs=[out_text])
|
140 |
+
in_chunksize.change(chunk_text, inputs=[in_text, in_chunksize], outputs=[out_text])
|
141 |
+
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
demo.launch()
|
demos/compare.py
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import io
|
3 |
+
from loguru import logger as log
|
4 |
+
from pathlib import Path
|
5 |
+
import gradio as gr
|
6 |
+
from PIL import Image
|
7 |
+
import iscc_core as ic
|
8 |
+
import iscc_sdk as idk
|
9 |
+
import iscc_sci as sci
|
10 |
+
import plotly.graph_objects as go
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
|
14 |
+
idk.sdk_opts.image_thumbnail_size = 265
|
15 |
+
idk.sdk_opts.image_thumbnail_quality = 80
|
16 |
+
|
17 |
+
|
18 |
+
HERE = Path(__file__).parent.absolute()
|
19 |
+
IMAGES1 = HERE / "images1"
|
20 |
+
IMAGES2 = HERE / "images2"
|
21 |
+
|
22 |
+
|
23 |
+
custom_css = """
|
24 |
+
.fixed-height {
|
25 |
+
height: 240px; /* Fixed height */
|
26 |
+
object-fit: contain; /* Scale the image to fit within the element */
|
27 |
+
}
|
28 |
+
|
29 |
+
#examples-a, #examples-b {
|
30 |
+
height: 140px; /* Fixed height */
|
31 |
+
object-fit: contain; /* Scale the image to fit within the element */
|
32 |
+
}
|
33 |
+
"""
|
34 |
+
|
35 |
+
|
36 |
+
def iscc_semantic(filepath: str) -> idk.IsccMeta:
|
37 |
+
"""Generate ISCC-CODE extended with Semantic-Code for supported modalities (Image)"""
|
38 |
+
imeta = idk.code_iscc(filepath)
|
39 |
+
if imeta.mode == "image":
|
40 |
+
# Inject Semantic-Code
|
41 |
+
sci_code = sci.code_image_semantic(filepath, bits=64)["iscc"]
|
42 |
+
units = ic.iscc_decompose(imeta.iscc)
|
43 |
+
units.append(sci_code)
|
44 |
+
iscc_code_s = ic.gen_iscc_code(units)["iscc"]
|
45 |
+
imeta.iscc = iscc_code_s
|
46 |
+
return imeta
|
47 |
+
|
48 |
+
|
49 |
+
def dist_to_sim(data, dim=64):
|
50 |
+
result = {}
|
51 |
+
for k, v in data.items():
|
52 |
+
if k == "instance_match":
|
53 |
+
result[k.split("_")[0].title()] = 1.0 if v is True else -1.0
|
54 |
+
else:
|
55 |
+
result[k.split("_")[0].title()] = hamming_to_cosine(v, dim)
|
56 |
+
return result
|
57 |
+
|
58 |
+
|
59 |
+
def hamming_to_cosine(hamming_distance: int, dim: int) -> float:
|
60 |
+
"""Aproximate the cosine similarity for a given hamming distance and dimension"""
|
61 |
+
result = 1 - (2 * hamming_distance) / dim
|
62 |
+
log.debug(f"Hamming distance: {hamming_distance} - Dim: {dim} - Result: {result}")
|
63 |
+
return result
|
64 |
+
|
65 |
+
|
66 |
+
def similarity_plot(sim_data):
|
67 |
+
# type: (dict) -> go.Figure
|
68 |
+
# Convert input dictionary to DataFrame, sort by value for visual consistency
|
69 |
+
data_df = pd.DataFrame(reversed(sim_data.items()), columns=["Category", "Value"])
|
70 |
+
data_df["Percentage"] = data_df["Value"] * 100 # Convert to percentage
|
71 |
+
|
72 |
+
# Define color for bars based on value
|
73 |
+
# data_df["Color"] = ["red" if x < 0 else "green" for x in data_df["Value"]]
|
74 |
+
data_df["Color"] = [
|
75 |
+
f"rgba(224,122,95,{abs(x)})" if x < 0 else f"rgba(118,185,71,{x})"
|
76 |
+
for x in data_df["Value"]
|
77 |
+
]
|
78 |
+
|
79 |
+
# Create Plotly Figure
|
80 |
+
fig = go.Figure()
|
81 |
+
fig.add_trace(
|
82 |
+
go.Bar(
|
83 |
+
x=data_df["Value"],
|
84 |
+
y=data_df["Category"],
|
85 |
+
orientation="h",
|
86 |
+
marker_color=data_df["Color"],
|
87 |
+
text=data_df["Percentage"].apply(lambda x: f"{x:.2f}%"),
|
88 |
+
textposition="inside",
|
89 |
+
)
|
90 |
+
) # Change made here
|
91 |
+
|
92 |
+
# Update layout for aesthetics
|
93 |
+
fig.update_layout(
|
94 |
+
title={"text": "Approximate ISCC-UNIT Similarities", "x": 0.5},
|
95 |
+
xaxis=dict(title="Similarity", tickformat=",.0%"),
|
96 |
+
yaxis=dict(title=""),
|
97 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
98 |
+
height=len(sim_data) * 70,
|
99 |
+
showlegend=False,
|
100 |
+
autosize=True,
|
101 |
+
margin=dict(l=50, r=50, t=50, b=50),
|
102 |
+
)
|
103 |
+
|
104 |
+
# Adjust the x-axis to accommodate percentage labels
|
105 |
+
fig.update_xaxes(range=[-1.1, 1.1])
|
106 |
+
|
107 |
+
return fig
|
108 |
+
|
109 |
+
|
110 |
+
with gr.Blocks(css=custom_css) as demo:
|
111 |
+
gr.Markdown("## 🖼️ ISCC Similarity Comparison")
|
112 |
+
|
113 |
+
with gr.Row(variant="default", equal_height=True):
|
114 |
+
with gr.Column(variant="compact"):
|
115 |
+
in_file_a = gr.File(
|
116 |
+
label="Media File A", type="filepath", elem_classes=["fixed-height"]
|
117 |
+
)
|
118 |
+
out_thumb_a = gr.Image(
|
119 |
+
label="Extracted Thumbnail",
|
120 |
+
visible=False,
|
121 |
+
height=240,
|
122 |
+
elem_classes=["fixed-height"],
|
123 |
+
interactive=True,
|
124 |
+
show_download_button=False,
|
125 |
+
sources=["upload"],
|
126 |
+
)
|
127 |
+
|
128 |
+
# Proxy component to patch image example selection -> gr.File
|
129 |
+
dumy_image_a = gr.Image(visible=False, type="filepath", height=240)
|
130 |
+
|
131 |
+
gr.Examples(
|
132 |
+
examples=IMAGES1.as_posix(),
|
133 |
+
cache_examples=False,
|
134 |
+
inputs=[dumy_image_a],
|
135 |
+
elem_id="examples-a",
|
136 |
+
)
|
137 |
+
|
138 |
+
out_iscc_a = gr.Text(label="ISCC")
|
139 |
+
with gr.Accordion(label="ISCC Metadata", open=False):
|
140 |
+
out_meta_a = gr.Code(language="json", label="JSON-LD")
|
141 |
+
|
142 |
+
with gr.Column(variant="compact"):
|
143 |
+
in_file_b = gr.File(
|
144 |
+
label="Media File B", type="filepath", elem_classes=["fixed-height"]
|
145 |
+
)
|
146 |
+
|
147 |
+
out_thumb_b = gr.Image(
|
148 |
+
label="Extracted Thumbnail",
|
149 |
+
visible=False,
|
150 |
+
height=240,
|
151 |
+
elem_classes=["fixed-height"],
|
152 |
+
interactive=True,
|
153 |
+
show_download_button=False,
|
154 |
+
sources=["upload"],
|
155 |
+
)
|
156 |
+
|
157 |
+
# Proxy component to patch image example selection -> gr.File
|
158 |
+
dumy_image_b = gr.Image(visible=False, type="filepath", height=240)
|
159 |
+
|
160 |
+
gr.Examples(
|
161 |
+
examples=IMAGES2.as_posix(),
|
162 |
+
cache_examples=False,
|
163 |
+
inputs=[dumy_image_b],
|
164 |
+
elem_id="examples-b",
|
165 |
+
)
|
166 |
+
|
167 |
+
out_iscc_b = gr.Text(label="ISCC")
|
168 |
+
with gr.Accordion(label="ISCC Metadata", open=False):
|
169 |
+
out_meta_b = gr.Code(language="json", label="JSON-LD")
|
170 |
+
|
171 |
+
with gr.Row(variant="panel"):
|
172 |
+
out_compare = gr.Plot(
|
173 |
+
label="Approximate ISCC-UNIT Similarities", container=False
|
174 |
+
)
|
175 |
+
|
176 |
+
def rewrite_uri(filepath, sample_set):
|
177 |
+
# type: (str, str) -> str
|
178 |
+
"""Rewrites temporary image URI to original sample URI"""
|
179 |
+
if filepath:
|
180 |
+
inpath = Path(filepath)
|
181 |
+
outpath = HERE / f"{sample_set}/{inpath.name.replace('jpeg', 'jpg')}"
|
182 |
+
|
183 |
+
log.info(filepath)
|
184 |
+
return outpath.as_posix()
|
185 |
+
|
186 |
+
def process_upload(filepath, suffix):
|
187 |
+
# type: (str, str) -> dict
|
188 |
+
"""Generate extended ISCC with experimental Semantic Code (for images)"""
|
189 |
+
|
190 |
+
# Map to active component group
|
191 |
+
in_file_func = globals().get(f"in_file_{suffix}")
|
192 |
+
out_thumb_func = globals().get(f"out_thumb_{suffix}")
|
193 |
+
out_iscc_func = globals().get(f"out_iscc_{suffix}")
|
194 |
+
out_meta_func = globals().get(f"out_meta_{suffix}")
|
195 |
+
|
196 |
+
# Handle emtpy filepath
|
197 |
+
if not filepath:
|
198 |
+
return {
|
199 |
+
in_file_func: None,
|
200 |
+
}
|
201 |
+
|
202 |
+
imeta = iscc_semantic(filepath)
|
203 |
+
|
204 |
+
# Pop Thumbnail for Preview
|
205 |
+
thumbnail = None
|
206 |
+
if imeta.thumbnail:
|
207 |
+
header, encoded = imeta.thumbnail.split(",", 1)
|
208 |
+
data = base64.b64decode(encoded)
|
209 |
+
thumbnail = Image.open(io.BytesIO(data))
|
210 |
+
imeta.thumbnail = None
|
211 |
+
|
212 |
+
result = {
|
213 |
+
in_file_func: gr.File(visible=False, value=None),
|
214 |
+
out_thumb_func: gr.Image(visible=True, value=thumbnail),
|
215 |
+
out_iscc_func: imeta.iscc,
|
216 |
+
out_meta_func: imeta.json(exclude_unset=False, by_alias=True, indent=2),
|
217 |
+
}
|
218 |
+
|
219 |
+
return result
|
220 |
+
|
221 |
+
def iscc_compare(iscc_a, iscc_b):
|
222 |
+
# type: (str, str) -> dict | None
|
223 |
+
"""Compare two ISCCs"""
|
224 |
+
if not all([iscc_a, iscc_b]):
|
225 |
+
return None
|
226 |
+
dist_data = ic.iscc_compare(iscc_a, iscc_b)
|
227 |
+
sim_data = dist_to_sim(dist_data, dim=64)
|
228 |
+
sim_plot = similarity_plot(sim_data)
|
229 |
+
return sim_plot
|
230 |
+
|
231 |
+
# Events
|
232 |
+
in_file_a.change(
|
233 |
+
lambda file: process_upload(file, "a"),
|
234 |
+
inputs=[in_file_a],
|
235 |
+
outputs=[in_file_a, out_thumb_a, out_iscc_a, out_meta_a],
|
236 |
+
show_progress="full",
|
237 |
+
)
|
238 |
+
in_file_b.change(
|
239 |
+
lambda file: process_upload(file, "b"),
|
240 |
+
inputs=[in_file_b],
|
241 |
+
outputs=[in_file_b, out_thumb_b, out_iscc_b, out_meta_b],
|
242 |
+
show_progress="full",
|
243 |
+
)
|
244 |
+
out_thumb_a.clear(
|
245 |
+
lambda: (gr.File(visible=True), gr.Image(visible=False), "", ""),
|
246 |
+
inputs=[],
|
247 |
+
outputs=[in_file_a, out_thumb_a, out_iscc_a, out_meta_a],
|
248 |
+
show_progress="hidden",
|
249 |
+
)
|
250 |
+
|
251 |
+
out_thumb_b.clear(
|
252 |
+
lambda: (gr.File(visible=True), gr.Image(visible=False), "", ""),
|
253 |
+
inputs=[],
|
254 |
+
outputs=[in_file_b, out_thumb_b, out_iscc_b, out_meta_b],
|
255 |
+
show_progress="hidden",
|
256 |
+
)
|
257 |
+
|
258 |
+
out_iscc_a.change(
|
259 |
+
iscc_compare,
|
260 |
+
inputs=[out_iscc_a, out_iscc_b],
|
261 |
+
outputs=[out_compare],
|
262 |
+
show_progress="hidden",
|
263 |
+
)
|
264 |
+
|
265 |
+
out_iscc_b.change(
|
266 |
+
iscc_compare,
|
267 |
+
inputs=[out_iscc_a, out_iscc_b],
|
268 |
+
outputs=[out_compare],
|
269 |
+
show_progress="hidden",
|
270 |
+
)
|
271 |
+
|
272 |
+
dumy_image_a.change(
|
273 |
+
lambda file: rewrite_uri(file, "images1"),
|
274 |
+
inputs=[dumy_image_a],
|
275 |
+
outputs=[in_file_a],
|
276 |
+
show_progress="hidden",
|
277 |
+
)
|
278 |
+
dumy_image_b.change(
|
279 |
+
lambda file: rewrite_uri(file, "images2"),
|
280 |
+
inputs=[dumy_image_b],
|
281 |
+
outputs=[in_file_b],
|
282 |
+
show_progress="hidden",
|
283 |
+
)
|
284 |
+
|
285 |
+
|
286 |
+
if __name__ == "__main__":
|
287 |
+
demo.launch(debug=True)
|
demos/generate.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
import base64
|
3 |
+
import io
|
4 |
+
import gradio as gr
|
5 |
+
import iscc_core as ic
|
6 |
+
import iscc_sdk as idk
|
7 |
+
import iscc_sci as sci
|
8 |
+
import iscc_schema as iss
|
9 |
+
from PIL import Image
|
10 |
+
import json
|
11 |
+
|
12 |
+
idk.sdk_opts.image_thumbnail_size = 240
|
13 |
+
idk.sdk_opts.image_thumbnail_quality = 80
|
14 |
+
|
15 |
+
custom_css = """
|
16 |
+
.fixed-height img {
|
17 |
+
height: 240px; /* Fixed height */
|
18 |
+
object-fit: contain; /* Scale the image to fit within the element */
|
19 |
+
}
|
20 |
+
"""
|
21 |
+
|
22 |
+
|
23 |
+
def generate_iscc(file):
|
24 |
+
imeta = idk.code_iscc(file.name)
|
25 |
+
thumbnail = None
|
26 |
+
if imeta.thumbnail:
|
27 |
+
header, encoded = imeta.thumbnail.split(",", 1)
|
28 |
+
data = base64.b64decode(encoded)
|
29 |
+
thumbnail = Image.open(io.BytesIO(data))
|
30 |
+
metadata = imeta.dict(exclude_unset=False, by_alias=True)
|
31 |
+
if metadata.get("thumbnail"):
|
32 |
+
del metadata["thumbnail"]
|
33 |
+
return (
|
34 |
+
imeta.iscc,
|
35 |
+
thumbnail,
|
36 |
+
imeta.name,
|
37 |
+
imeta.description,
|
38 |
+
json.dumps(metadata, indent=2),
|
39 |
+
None,
|
40 |
+
)
|
41 |
+
|
42 |
+
|
43 |
+
with gr.Blocks(title="ISCC Generator", css=custom_css) as demo:
|
44 |
+
gr.Markdown("## ⚙️ ISCC Generator")
|
45 |
+
with gr.Row():
|
46 |
+
in_file = gr.File(label="Media File")
|
47 |
+
with gr.Row():
|
48 |
+
out_iscc = gr.Text(
|
49 |
+
label="ISCC",
|
50 |
+
info="GENERATED FROM MEDIA FILE",
|
51 |
+
show_copy_button=True,
|
52 |
+
show_label=True,
|
53 |
+
)
|
54 |
+
with gr.Row(variant="panel", equal_height=False):
|
55 |
+
with gr.Column():
|
56 |
+
out_thumbnail = gr.Image(
|
57 |
+
label="Extracted Thumbnail",
|
58 |
+
elem_classes=["fixed-height"],
|
59 |
+
height=240,
|
60 |
+
)
|
61 |
+
with gr.Column(scale=3):
|
62 |
+
with gr.Group():
|
63 |
+
out_name = gr.Text(label="Name", show_copy_button=True)
|
64 |
+
out_description = gr.Textbox(
|
65 |
+
label="Description", lines=4, max_lines=4, show_copy_button=True
|
66 |
+
)
|
67 |
+
|
68 |
+
with gr.Row():
|
69 |
+
with gr.Accordion(label="ISCC Metadata", open=False):
|
70 |
+
out_meta = gr.Code(language="json", label="JSON-LD")
|
71 |
+
in_file.upload(
|
72 |
+
generate_iscc,
|
73 |
+
inputs=[in_file],
|
74 |
+
outputs=[out_iscc, out_thumbnail, out_name, out_description, out_meta, in_file],
|
75 |
+
)
|
76 |
+
|
77 |
+
# Custom footer
|
78 |
+
footer = (
|
79 |
+
"https://github.com/iscc"
|
80 |
+
f" | iscc-core v{ic.__version__}"
|
81 |
+
f" | iscc-sdk v{idk.__version__}"
|
82 |
+
f" | iscc-sci v{sci.__version__}"
|
83 |
+
f" | iscc-schema v{iss.__version__}"
|
84 |
+
)
|
85 |
+
gr.Markdown(
|
86 |
+
footer,
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
demo.launch()
|
demos/images1/pope1.jpg
ADDED
demos/images1/pope1b.jpg
ADDED
demos/images1/ukbench00000.jpg
ADDED
demos/images1/ukbench00016.jpg
ADDED
demos/images1/ukbench00044.jpg
ADDED
demos/images1/ukbench00052.jpg
ADDED
demos/images2/pope2.jpg
ADDED
demos/images2/pope2b.jpg
ADDED
demos/images2/ukbench00002.jpg
ADDED
demos/images2/ukbench00017.jpg
ADDED
demos/images2/ukbench00046.jpg
ADDED
demos/images2/ukbench00053.jpg
ADDED
demos/inspect_.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
from loguru import logger as log
|
3 |
+
import gradio as gr
|
4 |
+
import iscc_core as ic
|
5 |
+
|
6 |
+
|
7 |
+
def explain_iscc(code):
|
8 |
+
result = [gr.Column(visible=False), None, None, None, None]
|
9 |
+
if not code:
|
10 |
+
return tuple(result)
|
11 |
+
try:
|
12 |
+
canonical = ic.iscc_normalize(code)
|
13 |
+
# TODO Update iscc-core validation for MSCDI
|
14 |
+
# ic.iscc_validate(canonical, strict=True)
|
15 |
+
human = " - ".join(ic.iscc_explain(code).split("-"))
|
16 |
+
code_obj = ic.Code(canonical)
|
17 |
+
decomposed = " - ".join(ic.iscc_decompose(canonical))
|
18 |
+
multiformat = code_obj.mf_base58btc
|
19 |
+
except Exception as e:
|
20 |
+
log.error(e)
|
21 |
+
result[1] = str(e)
|
22 |
+
return tuple(result)
|
23 |
+
return gr.Column(visible=True), canonical, human, decomposed, multiformat
|
24 |
+
|
25 |
+
|
26 |
+
with gr.Blocks() as demo:
|
27 |
+
gr.Markdown(
|
28 |
+
"""
|
29 |
+
## 🕵️♂️ ISCC Inspector
|
30 |
+
"""
|
31 |
+
)
|
32 |
+
with gr.Row():
|
33 |
+
with gr.Column():
|
34 |
+
in_iscc = gr.Text(
|
35 |
+
label="ISCC Inspector",
|
36 |
+
info="DECODE & EXPLAIN ISCC STRUCTURE",
|
37 |
+
placeholder="Paste an ISCC here to break it down",
|
38 |
+
autofocus=True,
|
39 |
+
)
|
40 |
+
examples = [
|
41 |
+
"ISCC:AAAWN77F727NXSUS", # Meta-Code
|
42 |
+
"bzqaqaal5rvp72lx2thvq", # Multiformat
|
43 |
+
"ISCC:EAASKDNZNYGUUF5A", # Text-Code
|
44 |
+
"ISCC:GABW5LUBVP23N3DOD7PPINHT5JKBI", # Data-Code 128 bits
|
45 |
+
"ISCC:KUAG5LUBVP23N3DOHCHWIYGXVN7ZS", # ISCC-SUM
|
46 |
+
"ISCC:KAA2Y5NUST7BFD5NN2XIDK7VW3WG4OEPMRQNPK37TE", # ISCC-CDI
|
47 |
+
"z36hVxiqoF8AAmDpZV958hn3tsv2i7v1NfCrSzpq", # ISCC-CDI multiformats
|
48 |
+
"ISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI",
|
49 |
+
]
|
50 |
+
gr.Examples(label="Example ISCCs", examples=examples, inputs=[in_iscc])
|
51 |
+
|
52 |
+
with gr.Row():
|
53 |
+
with gr.Column(visible=False) as out_column:
|
54 |
+
out_canonical = gr.Text(
|
55 |
+
label="Canonical",
|
56 |
+
info="NORMALIZED STANDARD REPRESENTATION",
|
57 |
+
show_copy_button=True,
|
58 |
+
value=None,
|
59 |
+
)
|
60 |
+
out_human = gr.Text(
|
61 |
+
label="Human Readable",
|
62 |
+
info="MAINTYPE - SUBTYPE - VERSION - LENGTH - BODY",
|
63 |
+
show_copy_button=True,
|
64 |
+
)
|
65 |
+
out_decomposed = gr.Text(
|
66 |
+
label="Decomposed",
|
67 |
+
info="ISCC-UNITS",
|
68 |
+
show_copy_button=True,
|
69 |
+
)
|
70 |
+
out_multiformat = gr.Text(
|
71 |
+
label="Multiformat",
|
72 |
+
info="BASE58-BTC",
|
73 |
+
show_copy_button=True,
|
74 |
+
)
|
75 |
+
in_iscc.change(
|
76 |
+
explain_iscc,
|
77 |
+
inputs=[in_iscc],
|
78 |
+
outputs=[
|
79 |
+
out_column,
|
80 |
+
out_canonical,
|
81 |
+
out_human,
|
82 |
+
out_decomposed,
|
83 |
+
out_multiformat,
|
84 |
+
],
|
85 |
+
show_progress="hidden",
|
86 |
+
)
|
87 |
+
|
88 |
+
if __name__ == "__main__":
|
89 |
+
demo.launch()
|
demos/samples/sample.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# What is the ISCC
|
2 |
+
|
3 |
+
The ISCC is a similarity preserving fingerprint and identifier for digital media assets.
|
4 |
+
|
5 |
+
ISCCs are generated algorithmically from digital content, just like cryptographic hashes. However, instead of using a single cryptographic hash function to identify data only, the ISCC uses various algorithms to create a composite identifier that exhibits similarity-preserving properties (soft hash).
|
6 |
+
|
7 |
+
The component-based structure of the ISCC identifies content at multiple levels of abstraction. Each component is self-describing, modular, and can be used separately or with others to aid in various content identification tasks. The algorithmic design supports content deduplication, database synchronization, indexing, integrity verification, timestamping, versioning, data provenance, similarity clustering, anomaly detection, usage tracking, allocation of royalties, fact-checking and general digital asset management use-cases.
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -8,8 +8,10 @@ readme = "README.md"
|
|
8 |
|
9 |
[tool.poetry.dependencies]
|
10 |
python = "^3.9"
|
11 |
-
gradio = "
|
12 |
-
iscc-sdk = "^0.6.
|
|
|
|
|
13 |
|
14 |
[tool.poetry.group.dev.dependencies]
|
15 |
black = "^23.12.1"
|
|
|
8 |
|
9 |
[tool.poetry.dependencies]
|
10 |
python = "^3.9"
|
11 |
+
gradio = "*"
|
12 |
+
iscc-sdk = "^0.6.1"
|
13 |
+
iscc-sci = "^0.1.0"
|
14 |
+
plotly = "^5.18.0"
|
15 |
|
16 |
[tool.poetry.group.dev.dependencies]
|
17 |
black = "^23.12.1"
|
requirements.txt
CHANGED
@@ -1 +1,4 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
1 |
+
gradio==4.19.1
|
2 |
+
iscc-sdk==0.6.1
|
3 |
+
iscc-sci==0.1.0
|
4 |
+
plotly==5.19.0
|