Spaces:
Running
Running
| """ | |
| Gradio web interface for Unicode adversarial attack demonstration. | |
| Uses GGUF quantized models via llama-cpp-python for CPU inference. | |
| Designed for deployment on HuggingFace Spaces (free CPU tier). | |
| Supervisor approved: Feb 9, 2026 | |
| """ | |
| import os | |
| from pathlib import Path | |
| import gradio as gr | |
| # ============================================================================= | |
| # Unicode Style Mappings | |
| # ============================================================================= | |
| SMALL_CAPS_MAP = { | |
| 'a': '\u1d00', 'b': '\u0299', 'c': '\u1d04', 'd': '\u1d05', 'e': '\u1d07', 'f': '\ua730', 'g': '\u0262', | |
| 'h': '\u029c', 'i': '\u026a', 'j': '\u1d0a', 'k': '\u1d0b', 'l': '\u029f', 'm': '\u1d0d', 'n': '\u0274', | |
| 'o': '\u1d0f', 'p': '\u1d18', 'q': '\u01eb', 'r': '\u0280', 's': '\ua731', 't': '\u1d1b', 'u': '\u1d1c', | |
| 'v': '\u1d20', 'w': '\u1d21', 'x': 'x', 'y': '\u028f', 'z': '\u1d22', | |
| 'A': '\u1d00', 'B': '\u0299', 'C': '\u1d04', 'D': '\u1d05', 'E': '\u1d07', 'F': '\ua730', 'G': '\u0262', | |
| 'H': '\u029c', 'I': '\u026a', 'J': '\u1d0a', 'K': '\u1d0b', 'L': '\u029f', 'M': '\u1d0d', 'N': '\u0274', | |
| 'O': '\u1d0f', 'P': '\u1d18', 'Q': '\u01eb', 'R': '\u0280', 'S': '\ua731', 'T': '\u1d1b', 'U': '\u1d1c', | |
| 'V': '\u1d20', 'W': '\u1d21', 'X': 'x', 'Y': '\u028f', 'Z': '\u1d22', | |
| } | |
| CANADIAN_ABORIGINAL_MAP = { | |
| 'a': '\u141e', 'b': '\u1472', 'c': '\u1438', 'd': '\u146f', 'e': '\u156a', 'f': '\u155d', 'g': '\u1550', | |
| 'h': '\u144b', 'i': '\u1403', 'j': '\u1489', 'k': '\u1420', 'l': '\u14bb', 'm': '\u14bb', 'n': '\u1422', | |
| 'o': '\u14f1', 'p': '\u146d', 'q': '\u1574', 'r': '\u1550', 's': '\u1506', 't': '\u1466', 'u': '\u1421', | |
| 'v': '\u142f', 'w': '\u1424', 'x': '\u157d', 'y': '\u153e', 'z': '\u1646', | |
| 'A': '\u141e', 'B': '\u1472', 'C': '\u1438', 'D': '\u146f', 'E': '\u156a', 'F': '\u155d', 'G': '\u1550', | |
| 'H': '\u144b', 'I': '\u1403', 'J': '\u1489', 'K': '\u1420', 'L': '\u14bb', 'M': '\u14bb', 'N': '\u1422', | |
| 'O': '\u14f1', 'P': '\u146d', 'Q': '\u1574', 'R': '\u1550', 'S': '\u1506', 'T': '\u1466', 'U': '\u1421', | |
| 'V': '\u142f', 'W': '\u1424', 'X': '\u157d', 'Y': '\u153e', 'Z': '\u1646', | |
| } | |
| CIRCLED_MAP = { | |
| 'a': '\u24d0', 'b': '\u24d1', 'c': '\u24d2', 'd': '\u24d3', 'e': '\u24d4', 'f': '\u24d5', 'g': '\u24d6', | |
| 'h': '\u24d7', 'i': '\u24d8', 'j': '\u24d9', 'k': '\u24da', 'l': '\u24db', 'm': '\u24dc', 'n': '\u24dd', | |
| 'o': '\u24de', 'p': '\u24df', 'q': '\u24e0', 'r': '\u24e1', 's': '\u24e2', 't': '\u24e3', 'u': '\u24e4', | |
| 'v': '\u24e5', 'w': '\u24e6', 'x': '\u24e7', 'y': '\u24e8', 'z': '\u24e9', | |
| 'A': '\u24b6', 'B': '\u24b7', 'C': '\u24b8', 'D': '\u24b9', 'E': '\u24ba', 'F': '\u24bb', 'G': '\u24bc', | |
| 'H': '\u24bd', 'I': '\u24be', 'J': '\u24bf', 'K': '\u24c0', 'L': '\u24c1', 'M': '\u24c2', 'N': '\u24c3', | |
| 'O': '\u24c4', 'P': '\u24c5', 'Q': '\u24c6', 'R': '\u24c7', 'S': '\u24c8', 'T': '\u24c9', 'U': '\u24ca', | |
| 'V': '\u24cb', 'W': '\u24cc', 'X': '\u24cd', 'Y': '\u24ce', 'Z': '\u24cf', | |
| } | |
| SQUARED_MAP = { | |
| 'A': '\U0001F130', 'B': '\U0001F131', 'C': '\U0001F132', 'D': '\U0001F133', 'E': '\U0001F134', 'F': '\U0001F135', 'G': '\U0001F136', | |
| 'H': '\U0001F137', 'I': '\U0001F138', 'J': '\U0001F139', 'K': '\U0001F13A', 'L': '\U0001F13B', 'M': '\U0001F13C', 'N': '\U0001F13D', | |
| 'O': '\U0001F13E', 'P': '\U0001F13F', 'Q': '\U0001F140', 'R': '\U0001F141', 'S': '\U0001F142', 'T': '\U0001F143', 'U': '\U0001F144', | |
| 'V': '\U0001F145', 'W': '\U0001F146', 'X': '\U0001F147', 'Y': '\U0001F148', 'Z': '\U0001F149', | |
| 'a': '\U0001F130', 'b': '\U0001F131', 'c': '\U0001F132', 'd': '\U0001F133', 'e': '\U0001F134', 'f': '\U0001F135', 'g': '\U0001F136', | |
| 'h': '\U0001F137', 'i': '\U0001F138', 'j': '\U0001F139', 'k': '\U0001F13A', 'l': '\U0001F13B', 'm': '\U0001F13C', 'n': '\U0001F13D', | |
| 'o': '\U0001F13E', 'p': '\U0001F13F', 'q': '\U0001F140', 'r': '\U0001F141', 's': '\U0001F142', 't': '\U0001F143', 'u': '\U0001F144', | |
| 'v': '\U0001F145', 'w': '\U0001F146', 'x': '\U0001F147', 'y': '\U0001F148', 'z': '\U0001F149', | |
| } | |
| UPSIDE_DOWN_MAP = { | |
| 'a': '\u0250', 'b': 'q', 'c': '\u0254', 'd': 'p', 'e': '\u01dd', 'f': '\u025f', | |
| 'g': '\u0183', 'h': '\u0265', 'i': '\u0131', 'j': '\u027e', 'k': '\u029e', 'l': '\u05df', | |
| 'm': '\u026f', 'n': 'u', 'o': 'o', 'p': 'd', 'q': 'b', 'r': '\u0279', | |
| 's': 's', 't': '\u0287', 'u': 'n', 'v': '\u028c', 'w': '\u028d', 'x': 'x', | |
| 'y': '\u028e', 'z': 'z', | |
| 'A': '\u2200', 'B': 'q', 'C': '\u03fd', 'D': '\u15e1', 'E': '\u018e', 'F': '\u2132', | |
| 'G': '\u0183', 'H': 'H', 'I': 'I', 'J': '\u017f', 'K': '\u029e', 'L': '\u02e5', | |
| 'M': 'W', 'N': 'N', 'O': 'O', 'P': '\u0500', 'Q': '\u1f49', 'R': '\u1d1a', | |
| 'S': 'S', 'T': '\u22a5', 'U': '\u2229', 'V': '\u039b', 'W': 'M', 'X': 'X', | |
| 'Y': '\u028e', 'Z': 'Z', | |
| '.': '\u02d9', ',': "'", "'": ',', '?': '\u00bf', '!': '\u00a1', | |
| '[': ']', ']': '[', '(': ')', ')': '(', '_': '\u203e', ';': '\u061b', | |
| } | |
| MATH_SCRIPT_MAP = { | |
| 'a': '\U0001d4b6', 'b': '\U0001d4b7', 'c': '\U0001d4b8', 'd': '\U0001d4b9', 'e': '\u212f', 'f': '\U0001d4bb', | |
| 'g': '\u210a', 'h': '\U0001d4bd', 'i': '\U0001d4be', 'j': '\U0001d4bf', 'k': '\U0001d4c0', 'l': '\U0001d4c1', | |
| 'm': '\U0001d4c2', 'n': '\U0001d4c3', 'o': '\u2134', 'p': '\U0001d4c5', 'q': '\U0001d4c6', 'r': '\U0001d4c7', | |
| 's': '\U0001d4c8', 't': '\U0001d4c9', 'u': '\U0001d4ca', 'v': '\U0001d4cb', 'w': '\U0001d4cc', 'x': '\U0001d4cd', | |
| 'y': '\U0001d4ce', 'z': '\U0001d4cf', | |
| 'A': '\U0001d49c', 'B': '\u212c', 'C': '\U0001d49e', 'D': '\U0001d49f', 'E': '\u2130', 'F': '\u2131', | |
| 'G': '\U0001d4a2', 'H': '\u210b', 'I': '\u2110', 'J': '\U0001d4a5', 'K': '\U0001d4a6', 'L': '\u2112', | |
| 'M': '\u2133', 'N': '\U0001d4a9', 'O': '\U0001d4aa', 'P': '\U0001d4ab', 'Q': '\U0001d4ac', 'R': '\u211b', | |
| 'S': '\U0001d4ae', 'T': '\U0001d4af', 'U': '\U0001d4b0', 'V': '\U0001d4b1', 'W': '\U0001d4b2', 'X': '\U0001d4b3', | |
| 'Y': '\U0001d4b4', 'Z': '\U0001d4b5', | |
| } | |
| FRAKTUR_MAP = { | |
| 'a': '\U0001d51e', 'b': '\U0001d51f', 'c': '\U0001d520', 'd': '\U0001d521', 'e': '\U0001d522', 'f': '\U0001d523', | |
| 'g': '\U0001d524', 'h': '\U0001d525', 'i': '\U0001d526', 'j': '\U0001d527', 'k': '\U0001d528', 'l': '\U0001d529', | |
| 'm': '\U0001d52a', 'n': '\U0001d52b', 'o': '\U0001d52c', 'p': '\U0001d52d', 'q': '\U0001d52e', 'r': '\U0001d52f', | |
| 's': '\U0001d530', 't': '\U0001d531', 'u': '\U0001d532', 'v': '\U0001d533', 'w': '\U0001d534', 'x': '\U0001d535', | |
| 'y': '\U0001d536', 'z': '\U0001d537', | |
| 'A': '\U0001d504', 'B': '\U0001d505', 'C': '\u212d', 'D': '\U0001d507', 'E': '\U0001d508', 'F': '\U0001d509', | |
| 'G': '\U0001d50a', 'H': '\u210c', 'I': '\u2111', 'J': '\U0001d50d', 'K': '\U0001d50e', 'L': '\U0001d50f', | |
| 'M': '\U0001d510', 'N': '\U0001d511', 'O': '\U0001d512', 'P': '\U0001d513', 'Q': '\U0001d514', 'R': '\u211c', | |
| 'S': '\U0001d516', 'T': '\U0001d517', 'U': '\U0001d518', 'V': '\U0001d519', 'W': '\U0001d51a', 'X': '\U0001d51b', | |
| 'Y': '\U0001d51c', 'Z': '\u2128', | |
| } | |
| CHEROKEE_MAP = { | |
| 'a': '\uab7a', 'b': '\u13fc', 'c': '\uab6f', 'd': '\uab70', 'e': '\uab7c', 'f': '\uab81', 'g': '\u13fd', | |
| 'h': '\uab8b', 'i': '\uab96', 'j': '\uab7b', 'k': '\uabb6', 'l': '\uabae', 'm': '\uab87', 'n': '\uab91', | |
| 'o': '\uab8e', 'p': '\uabb2', 'q': '\uab74', 'r': '\uab71', 's': '\uabaa', 't': '\uab72', 'u': '\uabbc', | |
| 'v': '\uaba9', 'w': '\uaba4', 'x': '\uab82', 'y': '\uab79', 'z': '\uab93', | |
| 'A': '\uab7a', 'B': '\u13fc', 'C': '\uab6f', 'D': '\uab70', 'E': '\uab7c', 'F': '\uab81', 'G': '\u13fd', | |
| 'H': '\uab8b', 'I': '\uab96', 'J': '\uab7b', 'K': '\uabb6', 'L': '\uabae', 'M': '\uab87', 'N': '\uab91', | |
| 'O': '\uab8e', 'P': '\uabb2', 'Q': '\uab74', 'R': '\uab71', 'S': '\uabaa', 'T': '\uab72', 'U': '\uabbc', | |
| 'V': '\uaba9', 'W': '\uaba4', 'X': '\uab82', 'Y': '\uab79', 'Z': '\uab93', | |
| } | |
| STYLES = { | |
| 'small_caps': ('Small Caps', SMALL_CAPS_MAP), | |
| 'canadian_aboriginal': ('Canadian Aboriginal', CANADIAN_ABORIGINAL_MAP), | |
| 'circled': ('Circled Letters', CIRCLED_MAP), | |
| 'squared': ('Squared Letters', SQUARED_MAP), | |
| 'upside_down': ('Upside Down', UPSIDE_DOWN_MAP), | |
| 'math_script': ('Math Script', MATH_SCRIPT_MAP), | |
| 'fraktur': ('Fraktur', FRAKTUR_MAP), | |
| 'cherokee': ('Cherokee', CHEROKEE_MAP), | |
| } | |
| # ============================================================================= | |
| # Model Configuration | |
| # ============================================================================= | |
| MODELS = { | |
| 'gemma': { | |
| 'name': 'Gemma-2-2b-it', | |
| 'repo_id': 'bartowski/gemma-2-2b-it-GGUF', | |
| 'filename': 'gemma-2-2b-it-Q4_K_M.gguf', | |
| 'chat_format': 'gemma', | |
| }, | |
| 'phi': { | |
| 'name': 'Phi-3-mini-4k', | |
| 'repo_id': 'microsoft/Phi-3-mini-4k-instruct-gguf', | |
| 'filename': 'Phi-3-mini-4k-instruct-q4.gguf', | |
| 'chat_format': 'chatml', | |
| }, | |
| 'qwen': { | |
| 'name': 'Qwen2.5-3B', | |
| 'repo_id': 'Qwen/Qwen2.5-3B-Instruct-GGUF', | |
| 'filename': 'qwen2.5-3b-instruct-q4_k_m.gguf', | |
| 'chat_format': 'chatml', | |
| }, | |
| } | |
| # Global model cache — one model at a time to avoid memory pressure | |
| _current_model = None | |
| _current_model_name = None | |
| _llama_class = None | |
| def _prepare_musl_compat(): | |
| """ | |
| Some prebuilt llama-cpp wheels expect musl runtime symbol names. | |
| On glibc-based HF Spaces, provide a local compatibility symlink. | |
| """ | |
| target_candidates = [ | |
| Path("/lib/ld-musl-x86_64.so.1"), | |
| Path("/usr/lib/ld-musl-x86_64.so.1"), | |
| Path("/lib/x86_64-linux-musl/libc.so"), | |
| Path("/usr/lib/x86_64-linux-musl/libc.so"), | |
| ] | |
| target = next((p for p in target_candidates if p.exists()), None) | |
| if target is None: | |
| return | |
| compat_dir = Path("/tmp/musl-compat") | |
| compat_dir.mkdir(parents=True, exist_ok=True) | |
| compat_lib = compat_dir / "libc.musl-x86_64.so.1" | |
| if not compat_lib.exists(): | |
| compat_lib.symlink_to(target) | |
| current = os.environ.get("LD_LIBRARY_PATH", "") | |
| if str(compat_dir) not in current.split(":"): | |
| os.environ["LD_LIBRARY_PATH"] = ( | |
| f"{compat_dir}:{current}" if current else str(compat_dir) | |
| ) | |
| def _get_llama_class(): | |
| """Lazy import llama-cpp after runtime compatibility setup.""" | |
| global _llama_class | |
| if _llama_class is None: | |
| _prepare_musl_compat() | |
| from llama_cpp import Llama as _Llama | |
| _llama_class = _Llama | |
| return _llama_class | |
| # ============================================================================= | |
| # Core Functions | |
| # ============================================================================= | |
| def transform_text(text: str, style: str) -> str: | |
| """Transform text using the specified Unicode style.""" | |
| if style not in STYLES: | |
| return text | |
| char_map = STYLES[style][1] | |
| mapped = ''.join(char_map.get(c, c) for c in text) | |
| if style == 'upside_down': | |
| mapped = mapped[::-1] | |
| return mapped | |
| def load_model(model_key: str): | |
| """Load a GGUF model. Keeps one model at a time to avoid memory pressure.""" | |
| global _current_model, _current_model_name | |
| if _current_model_name == model_key and _current_model is not None: | |
| return _current_model | |
| # Unload previous model to free RAM | |
| if _current_model is not None: | |
| del _current_model | |
| _current_model = None | |
| _current_model_name = None | |
| config = MODELS[model_key] | |
| Llama = _get_llama_class() | |
| _current_model = Llama.from_pretrained( | |
| repo_id=config['repo_id'], | |
| filename=config['filename'], | |
| n_ctx=512, | |
| n_threads=8, | |
| verbose=False, | |
| ) | |
| _current_model_name = model_key | |
| return _current_model | |
| def preload_default_model(): | |
| """Pre-load the default model (Phi) at startup.""" | |
| print("Pre-loading default model (Phi-3-mini)...") | |
| load_model('phi') | |
| print("Default model ready.") | |
| def get_prediction(model, text: str, task: str, model_key: str) -> str: | |
| """Get model prediction for the given text and task. | |
| Uses the same prompt structure as the actual experiments | |
| (phase1_evaluation.ipynb) for consistency. | |
| """ | |
| if task == 'fact_verification': | |
| user_prompt = f"""Classify the following claim as either 'SUPPORTS', 'REFUTES', or 'NOT_ENOUGH_INFO'. | |
| The available classes are: | |
| - "SUPPORTS": The claim is true or supported by common knowledge. | |
| - "REFUTES": The claim is false or contradicts established facts. | |
| - "NOT_ENOUGH_INFO": The claim cannot be verified with common knowledge. | |
| ### Important: | |
| - **Only choose one class from the above-mentioned classes.** | |
| - **Answer with just one word, no other explanations.** | |
| Claim: {text} | |
| Answer:""" | |
| else: | |
| user_prompt = f"""Determine if the following sentence is an argument. An argument is a statement that takes a position on a topic and provides reasoning or evidence. | |
| The available classes are: | |
| - "ARGUMENT": The sentence is an argument (takes a stance with reasoning). | |
| - "NOT_ARGUMENT": The sentence is not an argument (factual statement, question, or lacks clear stance). | |
| ### Important: | |
| - **Only choose one class from the above-mentioned classes.** | |
| - **Answer with just one word, no other explanations.** | |
| Sentence: {text} | |
| Answer:""" | |
| messages = [ | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| response = model.create_chat_completion( | |
| messages=messages, | |
| max_tokens=8, | |
| temperature=0, | |
| ) | |
| output = response['choices'][0]['message']['content'].strip().upper() | |
| # Robust label extraction (matches experiment extract_classification logic) | |
| if task == 'fact_verification': | |
| if 'NOT_ENOUGH_INFO' in output or 'NOT ENOUGH INFO' in output or 'NEI' in output: | |
| return 'NOT_ENOUGH_INFO' | |
| if 'REFUTE' in output: | |
| return 'REFUTES' | |
| if 'SUPPORT' in output: | |
| return 'SUPPORTS' | |
| return 'NOT_ENOUGH_INFO' | |
| else: | |
| if 'NOT_ARGUMENT' in output or 'NOT ARGUMENT' in output or 'NOT AN ARGUMENT' in output: | |
| return 'NOT_ARGUMENT' | |
| if output.startswith('NO'): | |
| return 'NOT_ARGUMENT' | |
| if 'ARGUMENT' in output: | |
| return 'ARGUMENT' | |
| if output.startswith('YES'): | |
| return 'ARGUMENT' | |
| return 'NOT_ARGUMENT' | |
| def run_attack(text: str, style: str, model_key: str, task: str, progress=gr.Progress()): | |
| """Run the Unicode attack and compare predictions.""" | |
| import time | |
| if not text.strip(): | |
| return "", "", "", "Please enter some text.", "" | |
| try: | |
| t_start = time.time() | |
| # Transform text | |
| progress(0.0, desc="Transforming text...") | |
| styled_text = transform_text(text, style) | |
| # Load model (shows progress) | |
| progress(0.1, desc="Loading model (may take a moment on first run)...") | |
| yield styled_text, "Loading model...", "", "Loading model (this may take a moment)...", "" | |
| model = load_model(model_key) | |
| # Get original prediction | |
| progress(0.4, desc="Running inference on original text...") | |
| yield styled_text, "Running...", "", "Getting prediction for original text...", "" | |
| original_pred = get_prediction(model, text, task, model_key) | |
| # Get styled prediction | |
| progress(0.7, desc="Running inference on styled text...") | |
| yield styled_text, original_pred, "Running...", "Getting prediction for styled text...", "" | |
| styled_pred = get_prediction(model, styled_text, task, model_key) | |
| progress(1.0, desc="Done!") | |
| elapsed = time.time() - t_start | |
| # Determine result | |
| if original_pred != styled_pred: | |
| status = f"ATTACK SUCCEEDED: Prediction changed from {original_pred} to {styled_pred}\nTime taken: {elapsed:.1f}s" | |
| result_color = "green" | |
| else: | |
| status = f"Attack failed: Prediction unchanged ({original_pred})\nTime taken: {elapsed:.1f}s" | |
| result_color = "red" | |
| yield styled_text, original_pred, styled_pred, status, result_color | |
| except Exception as e: | |
| yield "", "", "", f"Error: {str(e)}", "red" | |
| def preview_all_styles(text: str) -> str: | |
| """Preview text in all available Unicode styles.""" | |
| if not text.strip(): | |
| return "Enter text to preview." | |
| lines = [f"Original: {text}", "=" * 50] | |
| for key, (name, _) in STYLES.items(): | |
| styled = transform_text(text, key) | |
| lines.append(f"\n{name}:\n{styled}") | |
| return '\n'.join(lines) | |
| # ============================================================================= | |
| # Sample Sentences (from actual experiment datasets) | |
| # ============================================================================= | |
| SAMPLE_SENTENCES = { | |
| 'fact_verification': [ | |
| "Sea level rise due to global warming is exaggerated.", | |
| "CO2 is increasing rapidly, and is reaching levels not seen on the earth for millions of years.", | |
| "Antarctica is too cold to lose ice.", | |
| "Greenhouse gases have been the main contributor of warming since 1970.", | |
| "The polar bear population has been growing.", | |
| "Renewables can't provide baseload power", | |
| "Sea ice has diminished much faster than scientists and climate models anticipated.", | |
| "Arctic sea ice extent was lower in the past.", | |
| "A large amount of warming is delayed, and if we don't act now we could pass tipping points.", | |
| "Clouds provide negative feedback.", | |
| ], | |
| 'argument_mining': [ | |
| "proponents of concealed carry say that criminals are less likely to attack someone they believe to be armed .", | |
| "the right to not be killed supersedes the right to not be pregnant .", | |
| "marijuana is less addictive than tobacco or alcohol , and compares favorably to those drugs on nearly every health metric .", | |
| "nuclear wastes as , or in , spent fuel are an unresolved problem .", | |
| "humans should not be turned into an experimental playground .", | |
| "higher prices lead to decreased demand , which can have a depressive effect on the economy .", | |
| "abortion is condemnable for the same reasons that slavery and genocide are .", | |
| "students can wear a variety of expressive items , such as buttons or jewlery .", | |
| "so where does all of this leave us ?", | |
| "diseases which have dogged us for generations could be wiped out due to our evolving faster than they could ever hope to .", | |
| ], | |
| } | |
| def get_sample_choices(task): | |
| """Return dropdown choices and set text input to first sample.""" | |
| sentences = SAMPLE_SENTENCES.get(task, SAMPLE_SENTENCES['fact_verification']) | |
| first = sentences[0] | |
| dropdown_update = gr.update( | |
| choices=[(s[:80] + "..." if len(s) > 80 else s, s) for s in sentences], | |
| value=first, | |
| ) | |
| return dropdown_update, first | |
| def fill_from_sample(sample): | |
| """Fill the text input when a sample is selected.""" | |
| if sample: | |
| return sample | |
| return gr.update() | |
| # ============================================================================= | |
| # Gradio Interface | |
| # ============================================================================= | |
| def create_demo(): | |
| """Create the Gradio demo interface.""" | |
| with gr.Blocks( | |
| title="Unicode Adversarial Attack Demo", | |
| theme=gr.themes.Soft(), | |
| ) as demo: | |
| gr.Markdown(""" | |
| # Unicode Adversarial Attack Demo | |
| Test how LLMs respond to Unicode-styled text. This demo transforms your input | |
| using special Unicode characters and compares model predictions. | |
| **Note:** This demo uses quantized models (Q4) for CPU inference. | |
| Results may differ slightly from full-precision models used in experiments. | |
| Each attack takes approximately **60–90 seconds** to complete (two inference passes on free CPU hardware). | |
| """) | |
| with gr.Tab("Attack Demo"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox( | |
| label="Input Text", | |
| lines=3, | |
| placeholder="Enter a claim or statement, or pick one from the samples below...", | |
| value="Sea level rise due to global warming is exaggerated.", | |
| ) | |
| sample_dropdown = gr.Dropdown( | |
| choices=[(s[:80] + "..." if len(s) > 80 else s, s) | |
| for s in SAMPLE_SENTENCES['fact_verification']], | |
| label="Sample Sentences (from experiment datasets)", | |
| value=None, | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| style_dropdown = gr.Dropdown( | |
| choices=[(STYLES[k][0], k) for k in STYLES], | |
| label="Unicode Style", | |
| value="upside_down", | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| choices=[(MODELS[k]['name'], k) for k in MODELS], | |
| label="Model", | |
| value="phi", | |
| ) | |
| task_dropdown = gr.Dropdown( | |
| choices=[ | |
| ("Fact Verification", "fact_verification"), | |
| ("Argument Mining", "argument_mining"), | |
| ], | |
| label="Task", | |
| value="fact_verification", | |
| ) | |
| run_btn = gr.Button("Run Attack", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| styled_output = gr.Textbox(label="Styled Text", lines=3) | |
| with gr.Row(): | |
| original_pred = gr.Textbox(label="Original Prediction") | |
| styled_pred = gr.Textbox(label="Styled Prediction") | |
| status_output = gr.Textbox(label="Result", lines=2) | |
| result_state = gr.State("") | |
| # Wire up sample dropdown → text input | |
| sample_dropdown.change( | |
| fn=fill_from_sample, | |
| inputs=[sample_dropdown], | |
| outputs=[text_input], | |
| ) | |
| # Wire up task change → update sample dropdown + text input | |
| task_dropdown.change( | |
| fn=get_sample_choices, | |
| inputs=[task_dropdown], | |
| outputs=[sample_dropdown, text_input], | |
| ) | |
| run_btn.click( | |
| fn=run_attack, | |
| inputs=[text_input, style_dropdown, model_dropdown, task_dropdown], | |
| outputs=[styled_output, original_pred, styled_pred, status_output, result_state], | |
| ) | |
| with gr.Tab("Style Preview"): | |
| gr.Markdown("### Preview Unicode Styles") | |
| gr.Markdown("See how your text looks in each of the 8 Unicode styles before running an attack.") | |
| preview_input = gr.Textbox( | |
| label="Enter text", | |
| placeholder="Type something...", | |
| value="Climate change is real", | |
| ) | |
| preview_btn = gr.Button("Preview All Styles") | |
| preview_output = gr.Textbox(label="Styled Versions", lines=25) | |
| preview_btn.click( | |
| fn=preview_all_styles, | |
| inputs=[preview_input], | |
| outputs=[preview_output], | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ## About This Demo | |
| This demo accompanies the research project: | |
| **"Unicode-Based Adversarial Attacks on Large Language Models: Evaluating Robustness and Building Interactive Attack Interfaces"** | |
| ### Key Findings (Phase 1: Full-Text Transformation) | |
| | Metric | Value | | |
| |--------|-------| | |
| | Total Experiments | 48 (3 models x 2 tasks x 8 styles) | | |
| | Total Samples Tested | 118,752 | | |
| | Overall Attack Success Rate | 52.1% | | |
| | Most Vulnerable Model | Phi-3-mini (60.7% ASR) | | |
| | Most Robust Model | Gemma-2-2b (39.1% ASR) | | |
| | Most Effective Style | Upside Down (58.4% ASR) | | |
| ### Phase 2: Importance-Based Perturbation | |
| Using gradient-based word importance (Captum Saliency), we perturb words | |
| one at a time in importance order until the prediction flips. | |
| | Model | ASR | Mean Perturbation Ratio | | |
| |-------|-----|------------------------| | |
| | Gemma-2-2b | 42.8% | 13.8% of words | | |
| | Phi-3-mini | 64.5% | 24.4% of words | | |
| | Qwen2.5-3B | 65.0% | 12.8% of words | | |
| ### Models Used | |
| | Model | Parameters | Quantization | | |
| |-------|------------|--------------| | |
| | Gemma-2-2b-it | 2B | Q4_K_M | | |
| | Phi-3-mini-4k | 3.8B | Q4 | | |
| | Qwen2.5-3B | 3B | Q4_K_M | | |
| ### Unicode Styles (8 Styles) | |
| | Style | Example | Mean ASR | | |
| |-------|---------|----------| | |
| | Small Caps | \u1d1b\u1d07x\u1d1b \u029f\u026a\u1d0b\u1d07 \u1d1b\u029c\u026a\ua731 | 38.1% | | |
| | Canadian Aboriginal | \u1466\u156a\u1506\u1424 \u14bb\u1403\u1420\u156a \u1466\u144b\u1403\u1506 | 56.5% | | |
| | Circled Letters | \u24e3\u24d4\u24e7\u24e3 \u24db\u24d8\u24da\u24d4 \u24e3\u24d7\u24d8\u24e2 | 53.1% | | |
| | Squared Letters | \U0001F143\U0001F134\U0001F147\U0001F143 \U0001F13B\U0001F138\U0001F13A\U0001F134 \U0001F143\U0001F137\U0001F138\U0001F142 | 53.1% | | |
| | Upside Down | s\u0131\u0265\u0287 \u01dd\u029e\u0131\u05df \u0287x\u01dd\u0287 | 58.4% | | |
| | Math Script | \U0001d4c9\u212f\U0001d4cd\U0001d4c9 \U0001d4c1\U0001d4be\U0001d4c0\u212f \U0001d4c9\U0001d4bd\U0001d4be\U0001d4c8 | 50.5% | | |
| | Fraktur | \U0001d531\U0001d522\U0001d535\U0001d531 \U0001d529\U0001d526\U0001d528\U0001d522 \U0001d531\U0001d525\U0001d526\U0001d530 | 52.6% | | |
| | Cherokee | \uab72\uab7c\uab82\uab72 \uabae\uab96\uabb6\uab7c \uab72\uab8b\uab96\uabaa | 54.3% | | |
| --- | |
| **Student:** Endrin Hoti (King's College London) | |
| **Supervisor:** Dr. Oana Cocarascu | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| *First query may be slow while the model downloads and loads (~2GB per model). | |
| Subsequent queries with the same model will be much faster.* | |
| """) | |
| return demo | |
| # ============================================================================= | |
| # Entry Point | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| print("Starting model pre-load...") | |
| preload_default_model() | |
| demo = create_demo() | |
| demo.queue(default_concurrency_limit=1) | |
| demo.launch() | |