Spaces:
Running
Running
import io | |
import base64 | |
import gradio as gr | |
import iscc_core as ic | |
import iscc_sdk as idk | |
from PIL import Image | |
idk.sdk_opts.image_thumbnail_size = 265 | |
idk.sdk_opts.image_thumbnail_quality = 80 | |
idk.sdk_opts.granular = True | |
custom_css = """ | |
.fixed-height img { | |
height: 265px; /* Fixed height */ | |
object-fit: contain; /* Scale the image to fit within the element */ | |
} | |
#chunked-text span.label { | |
text-transform: none !important; | |
} | |
""" | |
newline_symbols = { | |
"\u000a": "β", # Line Feed - Represented by the 'Return' symbol | |
"\u000b": "β¨", # Vertical Tab - Represented by the 'Up Down Arrow' symbol | |
"\u000c": "β", # Form Feed - Unicode Control Pictures representation | |
"\u000d": "β΅", # Carriage Return - 'Downwards Arrow with Corner Leftwards' symbol | |
"\u0085": "β€", # Next Line - 'Downwards Arrow with Double Stroke' symbol | |
"\u2028": "β²", # Line Separator - 'Downwards Arrow with Tip Leftwards' symbol | |
"\u2029": "ΒΆ", # Paragraph Separator - Represented by the 'Pilcrow' symbol | |
} | |
def no_nl(text): | |
for char, symbol in newline_symbols.items(): | |
text = text.replace(char, symbol) | |
return text | |
def generate_iscc(file): | |
imeta = idk.code_iscc(file.name) | |
thumbnail = None | |
if imeta.thumbnail: | |
header, encoded = imeta.thumbnail.split(",", 1) | |
data = base64.b64decode(encoded) | |
thumbnail = Image.open(io.BytesIO(data)) | |
metadata = imeta.dict(exclude_unset=False, by_alias=True) | |
if metadata.get("thumbnail"): | |
del metadata['thumbnail'] | |
return imeta.iscc, thumbnail, metadata | |
def explain_iscc(code): | |
canonical = ic.iscc_normalize(code) | |
human = " - ".join(ic.iscc_explain(code).split("-")) | |
code_obj = ic.Code(canonical) | |
decomposed = " - ".join(ic.iscc_decompose(canonical)) | |
multiformat = code_obj.mf_base58btc | |
return canonical, human, decomposed, multiformat | |
def generate_text_code(text, chunk_size): | |
original_chunk_size = idk.sdk_opts.text_avg_chunk_size | |
idk.sdk_opts.text_avg_chunk_size = chunk_size | |
cleaned = ic.text_clean(text) | |
processed = idk.text_features(cleaned) | |
features = processed["features"] | |
sizes = processed["sizes"] | |
start = 0 | |
chunks = [] | |
for size in sizes: | |
end = start + size | |
chunks.append(no_nl(cleaned[start:end])) | |
start = end | |
result = [(chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)] | |
idk.sdk_opts.text_avg_chunk_size = original_chunk_size | |
return result | |
with gr.Blocks(title="ISCC-CODE") as demo_generate: | |
gr.Markdown(""" | |
## π ISCC-CODE Generator - The DNA of digital content | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
in_file = gr.File(label="Media File") | |
with gr.Column(scale=1): | |
out_thumbnail = gr.Image(label="Extracted Thumbnail", elem_classes=["fixed-height"]) | |
with gr.Row(): | |
out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True) | |
with gr.Row(): | |
out_meta = gr.Json(label="Metadata") | |
in_file.change(generate_iscc, inputs=[in_file], outputs=[out_iscc, out_thumbnail, out_meta]) | |
with gr.Blocks("ENCODING") as demo_decode: | |
gr.Markdown(""" | |
## π A Codec for Self-Describing Compact Binary Codes | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
in_iscc = gr.Text( | |
label="ISCC", | |
info="INPUT ANY VALID ISCC-CODE OR ISCC-UNIT", | |
autofocus=True, | |
) | |
examples = [ | |
"ISCC:AAAWN77F727NXSUS", # Meta-Code | |
"bzqaqaal5rvp72lx2thvq", # Multiformat | |
"ISCC:EAASKDNZNYGUUF5A", # Text-Code | |
"ISCC:GABW5LUBVP23N3DOD7PPINHT5JKBI", # Data-Code 128 bits | |
"ISCC:KUAG5LUBVP23N3DOHCHWIYGXVN7ZS", # ISCC-SUM | |
"ISCC:KAA2Y5NUST7BFD5NN2XIDK7VW3WG4OEPMRQNPK37TE", # ISCC-CDI | |
"z36hVxiqoF8AAmDpZV958hn3tsv2i7v1NfCrSzpq", # ISCC-CDI multiformats | |
"ISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI", | |
] | |
gr.Examples(label="Example ISCCs", examples=examples, inputs=[in_iscc]) | |
gr.Markdown("## Different Encodings:") | |
with gr.Row(): | |
with gr.Column(): | |
out_canonical = gr.Text( | |
label="Canonical", | |
info="NORMALIZED STANDARD REPRESENTATION", | |
show_copy_button=True, | |
) | |
out_human = gr.Text( | |
label="Human Readable", | |
info="MAINTYPE - SUBTYPE - VERSION - LENGTH - BODY", | |
show_copy_button=True, | |
) | |
out_decomposed = gr.Text( | |
label="Decomposed", | |
info="ISCC-UNITS", | |
show_copy_button=True, | |
) | |
out_multiformat = gr.Text( | |
label="Multiformat", | |
info="BASE58-BTC", | |
show_copy_button=True, | |
) | |
in_iscc.change(explain_iscc, inputs=[in_iscc], outputs=[ | |
out_canonical, | |
out_human, | |
out_decomposed, | |
out_multiformat, | |
]) | |
with gr.Blocks(title="CHUNKING") as demo_text_code: | |
gr.Markdown(""" | |
## π Content Defined Chunking for Shift-Resistant Text and Data Segmentation | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True) | |
in_chunksize = gr.Slider( | |
label="Chunk Size", | |
info="AVERAGE NUMBER OF CHARACTERS PER CHUNK", | |
minimum=32, maximum=2048, step=32, | |
value=64) | |
out_text = gr.HighlightedText( | |
label="Chunked Text Output", | |
interactive=False, | |
elem_id="chunked-text", | |
) | |
in_text.change(generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]) | |
in_chunksize.change(generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text]) | |
demo = gr.TabbedInterface( | |
title="βΆοΈ ISCC Playground", | |
interface_list=[demo_generate, demo_decode, demo_text_code], | |
tab_names=["ISCC-CODE", "ENCODING", "CHUNKING"], | |
css=custom_css, | |
) | |
if __name__ == '__main__': | |
demo.launch() | |