File size: 3,690 Bytes
2a831b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# clean_app.py

# -----------------------------------
# Copy_right CC _developed By HNM
# -----------------------------------
import re
import gradio as gr
import tempfile

# STEP A
mappings = {
    "\x04ٲ": "ٲ",
    "\x04ُ": "ُ",
    "\x04ٚ": "ٚ",
    "\x04ٕ": "ٕ",
    "\x04ٛ": "ٛ",
    "\x04ٔ": "ٔ",
    "\x04ں": "ں",
    "\x04":  "",   # if stray \x04 alone, remove or replace as needed

    # ASCII symbol mappings
    ">": "ٲ",
    "<": "ُ",
    ";": "ٚ",
    "=": "ٕ",
    ":": "ٛ",
    ".": "ٔ",
    ",": "ں",
    "/":  ""       # remove slash
}

def basic_replacements(text: str) -> str:
    """
    Apply the dictionary-based .replace() calls for
    the \x04 combos and ASCII symbols.
    """
    for old, new in mappings.items():
        text = text.replace(old, new)
    return text

# STEP B
def fix_alif_combo(text: str) -> str:
    """ Replace any occurrence of 'اٲ' with 'ٲ'. """
    return text.replace("اٲ", "ٲ")

# STEP C
def fix_question_mark(text: str) -> str:
    """
    For each occurrence of (.)?(.) => remove '?', add "یٕ" to the 2nd letter.
    E.g. "س?ت" => "ستیٕ".
    """
    def _repl(m):
        first_char = m.group(1)  # the character before '?'
        second_char = m.group(2) # the character after '?'
        return f"{first_char}{second_char}یٕ"

    pattern = r"(.)\?(.)"
    return re.sub(pattern, _repl, text)

# STEP D
def clean_line(line: str) -> str:
    """
    Cleans a single line using:
     1) basic replacements (\x04 combos, ASCII symbols),
     2) fix_alif_combo (اٲ -> ٲ),
     3) fix_question_mark (س?ت -> ستیٕ)
    """
    line = basic_replacements(line)
    line = fix_alif_combo(line)
    line = fix_question_mark(line)
    return line

# Helper: cleans the entire string (multiple lines).
def clean_text(input_text: str) -> str:
    # Split into lines, clean each, then join
    lines = input_text.splitlines()
    cleaned_lines = [clean_line(line) for line in lines]
    return "\n".join(cleaned_lines)


# -----------------------------------
# Gradio Interface
# -----------------------------------
def process_text(raw_text):
    """
    This function is called by Gradio when the user clicks the button.
    It returns two outputs:
     1) The cleaned text (for display)
     2) A temporary file path with the cleaned text (for download)
    """
    cleaned = clean_text(raw_text)

    # Write the cleaned text to a temporary file for download
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
    tmp.write(cleaned)
    tmp.flush()
    tmp.close()

    return cleaned, tmp.name


# Build the interface
with gr.Blocks() as demo:
    gr.Markdown("## Clean Text Tool")
    gr.Markdown(
        "Paste your raw/unprocessed text below, then click 'Clean Text' to get the cleaned result."
    )

    with gr.Row():
        with gr.Column():
            raw_text = gr.Textbox(
                label="Input (Paste uncleaned text)",
                lines=15,
                placeholder="Paste any length of text here...",
            )
        with gr.Column():
            cleaned_output = gr.Textbox(
                label="Output (Cleaned text)",
                lines=15,
                interactive=False
            )

    # Button to trigger cleaning
    button = gr.Button("Clean Text")

    # We'll show the file download output in a second row
    download_file = gr.File(label="Download Cleaned .txt File")

    # Connect the function to the button
    button.click(
        fn=process_text,
        inputs=raw_text,
        outputs=[cleaned_output, download_file]
    )

# Run the app
if __name__ == "__main__":
    demo.launch()