File size: 6,214 Bytes
12a4666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909172d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import io
import base64
import gradio as gr
import iscc_core as ic
import iscc_sdk as idk
from PIL import Image

idk.sdk_opts.image_thumbnail_size = 265
idk.sdk_opts.image_thumbnail_quality = 80
idk.sdk_opts.granular = True


custom_css = """
.fixed-height img {
    height: 265px;  /* Fixed height */
    object-fit: contain;  /* Scale the image to fit within the element */
}
#chunked-text span.label {
    text-transform: none !important;
}
"""

newline_symbols = {
    "\u000a": "⏎",  # Line Feed - Represented by the 'Return' symbol
    "\u000b": "↨",  # Vertical Tab - Represented by the 'Up Down Arrow' symbol
    "\u000c": "␌",  # Form Feed - Unicode Control Pictures representation
    "\u000d": "↡",  # Carriage Return - 'Downwards Arrow with Corner Leftwards' symbol
    "\u0085": "–",  # Next Line - 'Downwards Arrow with Double Stroke' symbol
    "\u2028": "↲",  # Line Separator - 'Downwards Arrow with Tip Leftwards' symbol
    "\u2029": "ΒΆ",  # Paragraph Separator - Represented by the 'Pilcrow' symbol
}


def no_nl(text):
    for char, symbol in newline_symbols.items():
        text = text.replace(char, symbol)
    return text


def generate_iscc(file):
    imeta = idk.code_iscc(file.name)
    thumbnail = None
    if imeta.thumbnail:
        header, encoded = imeta.thumbnail.split(",", 1)
        data = base64.b64decode(encoded)
        thumbnail = Image.open(io.BytesIO(data))
    metadata = imeta.dict(exclude_unset=False, by_alias=True)
    if metadata.get("thumbnail"):
        del metadata['thumbnail']
    return imeta.iscc, thumbnail, metadata


def explain_iscc(code):
    canonical = ic.iscc_normalize(code)
    human = " - ".join(ic.iscc_explain(code).split("-"))
    code_obj = ic.Code(canonical)
    decomposed = " - ".join(ic.iscc_decompose(canonical))
    multiformat = code_obj.mf_base58btc
    return canonical, human, decomposed, multiformat


def generate_text_code(text, chunk_size):
    original_chunk_size = idk.sdk_opts.text_avg_chunk_size
    idk.sdk_opts.text_avg_chunk_size = chunk_size
    cleaned = ic.text_clean(text)
    processed = idk.text_features(cleaned)
    features = processed["features"]
    sizes = processed["sizes"]
    start = 0
    chunks = []
    for size in sizes:
        end = start + size
        chunks.append(no_nl(cleaned[start:end]))
        start = end
    result = [(chunk, f"{size}:{feat}") for chunk, size, feat in zip(chunks, sizes, features)]
    idk.sdk_opts.text_avg_chunk_size = original_chunk_size
    return result


with gr.Blocks(title="ISCC-CODE") as demo_generate:
    gr.Markdown("""
    ## 🌟 ISCC-CODE Generator - The DNA of digital content
    """)
    with gr.Row():
        with gr.Column(scale=2):
            in_file = gr.File(label="Media File")
        with gr.Column(scale=1):
            out_thumbnail = gr.Image(label="Extracted Thumbnail", elem_classes=["fixed-height"])
    with gr.Row():
        out_iscc = gr.Text(label="ISCC-CODE", show_copy_button=True)
    with gr.Row():
        out_meta = gr.Json(label="Metadata")
    in_file.change(generate_iscc, inputs=[in_file], outputs=[out_iscc, out_thumbnail, out_meta])


with gr.Blocks("ENCODING") as demo_decode:
    gr.Markdown("""
    ## 🌟 A Codec for Self-Describing Compact Binary Codes
    """)
    with gr.Row():
        with gr.Column():
            in_iscc = gr.Text(
                label="ISCC",
                info="INPUT ANY VALID ISCC-CODE OR ISCC-UNIT",
                autofocus=True,
            )
            examples = [
                "ISCC:AAAWN77F727NXSUS",  # Meta-Code
                "bzqaqaal5rvp72lx2thvq",  # Multiformat
                "ISCC:EAASKDNZNYGUUF5A",  # Text-Code
                "ISCC:GABW5LUBVP23N3DOD7PPINHT5JKBI",  # Data-Code 128 bits
                "ISCC:KUAG5LUBVP23N3DOHCHWIYGXVN7ZS",  # ISCC-SUM
                "ISCC:KAA2Y5NUST7BFD5NN2XIDK7VW3WG4OEPMRQNPK37TE",  # ISCC-CDI
                "z36hVxiqoF8AAmDpZV958hn3tsv2i7v1NfCrSzpq",  # ISCC-CDI multiformats
                "ISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI",
            ]
            gr.Examples(label="Example ISCCs", examples=examples, inputs=[in_iscc])

    gr.Markdown("## Different Encodings:")
    with gr.Row():
        with gr.Column():
            out_canonical = gr.Text(
                label="Canonical",
                info="NORMALIZED STANDARD REPRESENTATION",
                show_copy_button=True,
            )
            out_human = gr.Text(
                label="Human Readable",
                info="MAINTYPE - SUBTYPE - VERSION - LENGTH - BODY",
                show_copy_button=True,
            )
            out_decomposed = gr.Text(
                label="Decomposed",
                info="ISCC-UNITS",
                show_copy_button=True,
            )
            out_multiformat = gr.Text(
                label="Multiformat",
                info="BASE58-BTC",
                show_copy_button=True,
            )
    in_iscc.change(explain_iscc, inputs=[in_iscc], outputs=[
        out_canonical,
        out_human,
        out_decomposed,
        out_multiformat,
    ])

with gr.Blocks(title="CHUNKING") as demo_text_code:
    gr.Markdown("""
    ## 🌟 Content Defined Chunking for Shift-Resistant Text and Data Segmentation
    """)
    with gr.Row():
        with gr.Column():
            in_text = gr.Textbox(label="Text Input", lines=8, autofocus=True)
            in_chunksize = gr.Slider(
                label="Chunk Size",
                info="AVERAGE NUMBER OF CHARACTERS PER CHUNK",
                minimum=32, maximum=2048, step=32,
                value=64)

        out_text = gr.HighlightedText(
            label="Chunked Text Output",
            interactive=False,
            elem_id="chunked-text",
        )
    in_text.change(generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text])
    in_chunksize.change(generate_text_code, inputs=[in_text, in_chunksize], outputs=[out_text])

demo = gr.TabbedInterface(
    title="▢️ ISCC Playground",
    interface_list=[demo_generate, demo_decode, demo_text_code],
    tab_names=["ISCC-CODE", "ENCODING", "CHUNKING"],
    css=custom_css,
)

if __name__ == '__main__':
    demo.launch()