|
import os, re, difflib, traceback |
|
from typing import List, Tuple |
|
import gradio as gr |
|
from huggingface_hub import hf_hub_download |
|
from ctransformers import AutoModelForCausalLM |
|
|
|
|
|
CANDIDATES: Tuple[Tuple[str, str], ...] = ( |
|
("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q8_0.gguf"), |
|
("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q6_K_L.gguf"), |
|
("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q5_K_M.gguf"), |
|
("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_0.gguf"), |
|
) |
|
|
|
def resolve_model_file() -> str: |
|
last_err = None |
|
for repo, fname in CANDIDATES: |
|
try: |
|
path = hf_hub_download(repo_id=repo, filename=fname) |
|
print(f"[Humanizer] Using {repo} :: {fname}") |
|
return path |
|
except Exception as e: |
|
last_err = e |
|
print(f"[Humanizer] Could not get {repo}/{fname}: {e}") |
|
raise RuntimeError(f"Failed to download any GGUF. Last error: {last_err}") |
|
|
|
MODEL_TYPE = "llama" |
|
_llm = None |
|
|
|
def load_model(): |
|
global _llm |
|
if _llm is None: |
|
file_path = resolve_model_file() |
|
_llm = AutoModelForCausalLM.from_pretrained( |
|
file_path, |
|
model_type=MODEL_TYPE, |
|
gpu_layers=0, |
|
context_length=4096, |
|
) |
|
return _llm |
|
|
|
|
|
SENTINEL_OPEN, SENTINEL_CLOSE = "§§KEEP_OPEN§§", "§§KEEP_CLOSE§§" |
|
URL_RE = re.compile(r'(https?://\S+)') |
|
CODE_RE = re.compile(r'`{1,3}[\s\S]*?`{1,3}') |
|
CITE_RE = re.compile(r'\[(?:[^\]]+?)\]|\(\d{4}\)|\[\d+(?:-\d+)?\]') |
|
NUM_RE = re.compile(r'\b\d[\d,.\-/]*\b') |
|
|
|
def protect(text: str): |
|
protected = [] |
|
def wrap(m): |
|
protected.append(m.group(0)) |
|
return f"{SENTINEL_OPEN}{len(protected)-1}{SENTINEL_CLOSE}" |
|
text = CODE_RE.sub(wrap, text) |
|
text = URL_RE.sub(wrap, text) |
|
text = CITE_RE.sub(wrap, text) |
|
text = NUM_RE.sub(wrap, text) |
|
return text, protected |
|
|
|
def restore(text: str, protected: List[str]): |
|
def unwrap(m): return protected[int(m.group(1))] |
|
text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text) |
|
return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "") |
|
|
|
|
|
SYSTEM = ( |
|
"You are an expert editor. Humanize the user's text: improve flow, vary sentence length, " |
|
"split run-ons, replace stiff phrasing with natural alternatives, and preserve meaning. " |
|
"Do NOT alter anything wrapped by §§KEEP_OPEN§§<id>§§KEEP_CLOSE§§ (citations, URLs, numbers, code). " |
|
"Keep the requested tone and region. No em dashes—use simple punctuation." |
|
) |
|
|
|
def build_prompt(text: str, tone: str, region: str, level: str, intensity: int) -> str: |
|
user = ( |
|
f"Tone: {tone}. Region: {region} English. Reading level: {level}. " |
|
f"Humanization intensity: {intensity} (10 strongest).\n\n" |
|
f"Rewrite this text. Keep markers intact:\n\n{text}" |
|
) |
|
return ( |
|
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n" |
|
f"{SYSTEM}\n" |
|
"<|eot_id|><|start_header_id|>user<|end_header_id|>\n" |
|
f"{user}\n" |
|
"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n" |
|
) |
|
|
|
def diff_ratio(a: str, b: str) -> float: |
|
return difflib.SequenceMatcher(None, a, b).ratio() |
|
|
|
def generate_once(prompt: str, temperature: float, max_new: int = 384) -> str: |
|
llm = load_model() |
|
return llm(prompt, temperature=temperature, top_p=0.95, max_new_tokens=max_new, stop=["<|eot_id|>"]).strip() |
|
|
|
|
|
def humanize_core(text: str, tone: str, region: str, level: str, intensity: int): |
|
try: |
|
protected_text, bag = protect(text) |
|
prompt = build_prompt(protected_text, tone, region, level, intensity) |
|
|
|
draft = generate_once(prompt, temperature=0.35) |
|
if diff_ratio(protected_text, draft) > 0.97: |
|
draft = generate_once(prompt, temperature=0.9) |
|
|
|
draft = draft.replace("—", "-") |
|
final = restore(draft, bag) |
|
|
|
for i, span in enumerate(bag): |
|
marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}" |
|
if marker in protected_text and span not in final: |
|
final = final.replace(marker, span) |
|
return final |
|
except Exception: |
|
return "ERROR:\n" + traceback.format_exc() |
|
|
|
|
|
def ui_humanize(text, tone, region, level, intensity): |
|
return humanize_core(text, tone, region, level, int(intensity)) |
|
|
|
demo = gr.Interface( |
|
fn=ui_humanize, |
|
inputs=[ |
|
gr.Textbox(lines=12, label="Input text"), |
|
gr.Dropdown(["professional","casual","academic","friendly","persuasive"], value="professional", label="Tone"), |
|
gr.Dropdown(["US","UK","KE"], value="US", label="Region"), |
|
gr.Dropdown(["general","simple","advanced"], value="general", label="Reading level"), |
|
gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"), |
|
], |
|
outputs=gr.Textbox(label="Humanized"), |
|
title="NoteCraft Humanizer (Llama-3.2-3B-Instruct)", |
|
description="REST: POST /api/predict/ with { data: [text,tone,region,level,intensity] }", |
|
).queue() |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |