Spaces:
Sleeping
Sleeping
| """ | |
| Combined Humanizer V2 - Adversarial Model + StealthWriter Post-Processor | |
| Optimized for bypassing AI detectors using proven techniques. | |
| """ | |
| import gradio as gr | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| import re | |
| import random | |
| import os | |
| # StealthWriter-style post-processor | |
| class StealthPostProcessor: | |
| """Post-process text using StealthWriter's proven approach.""" | |
| CONTRACTION_EXPANSIONS = { | |
| "it's": "it is", "It's": "It is", "don't": "do not", "Don't": "Do not", | |
| "doesn't": "does not", "Doesn't": "Does not", "didn't": "did not", | |
| "won't": "will not", "wouldn't": "would not", "couldn't": "could not", | |
| "shouldn't": "should not", "can't": "cannot", "Can't": "Cannot", | |
| "I'm": "I am", "I've": "I have", "I'll": "I will", "I'd": "I would", | |
| "you're": "you are", "You're": "You are", "you've": "you have", | |
| "we're": "we are", "We're": "We are", "we've": "we have", | |
| "they're": "they are", "They're": "They are", "they've": "they have", | |
| "that's": "that is", "That's": "That is", "there's": "there is", | |
| "what's": "what is", "who's": "who is", "let's": "let us", | |
| "isn't": "is not", "aren't": "are not", "wasn't": "was not", | |
| "weren't": "were not", "haven't": "have not", "hasn't": "has not", | |
| "hadn't": "had not", "here's": "here is", "he's": "he is", | |
| "she's": "she is", "we'll": "we will", "they'll": "they will", | |
| "gotta": "got to", "gonna": "going to", "wanna": "want to", | |
| "kinda": "kind of", "sorta": "sort of", | |
| } | |
| EMPHATIC_PHRASES = [", I tell you", ", I must say", ", mind you", ", you see", ", indeed"] | |
| FORMAL_STARTERS = [ | |
| "It is almost a given that ", "One must acknowledge that ", | |
| "It goes without saying that ", "It is worth noting that ", | |
| "As it happens, ", "As a matter of fact, ", "In point of fact, ", | |
| ] | |
| SYNONYM_REPLACEMENTS = { | |
| "furry friend": "hairy companion", "pet": "animal companion", | |
| "dog": "canine", "cat": "feline", "help": "assist", "use": "utilize", | |
| "get": "obtain", "make": "create", "good": "favorable", "bad": "unfavorable", | |
| "big": "substantial", "small": "modest", "very": "quite", "really": "truly", | |
| "important": "significant", "need": "require", "want": "desire", | |
| "think": "believe", "know": "understand", "see": "observe", | |
| "find": "discover", "show": "demonstrate", "give": "provide", | |
| "start": "commence", "begin": "initiate", "end": "conclude", | |
| "try": "attempt", "keep": "maintain", "lot of": "numerous", | |
| "a lot": "considerably", "lots of": "a great many", | |
| } | |
| FILLERS_TO_REMOVE = [ | |
| "like, ", ", like,", " like ", "you know, ", ", you know,", | |
| "basically, ", ", basically,", "honestly, ", "Honestly, ", | |
| "I mean, ", ", I mean,", "pretty much ", "kind of ", "sort of ", | |
| "actually, ", ", actually,", "literally ", "just ", "really ", | |
| "so, ", "So, ", "well, ", "Well, ", "anyway, ", "Anyway, ", | |
| "right? ", "Right? ", "you know? ", "I guess ", "I gotta say, ", | |
| ] | |
| def __init__(self, intensity="high"): | |
| self.change_probability = {"low": 0.3, "medium": 0.5, "high": 0.7}.get(intensity, 0.7) | |
| def expand_contractions(self, text): | |
| for contraction, expansion in self.CONTRACTION_EXPANSIONS.items(): | |
| pattern = re.compile(r'\b' + re.escape(contraction) + r'\b') | |
| text = pattern.sub(expansion, text) | |
| return text | |
| def remove_casual_fillers(self, text): | |
| for filler in self.FILLERS_TO_REMOVE: | |
| text = text.replace(filler, " " if filler.startswith(" ") or filler.endswith(" ") else "") | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def apply_synonym_replacements(self, text): | |
| for common, formal in self.SYNONYM_REPLACEMENTS.items(): | |
| if random.random() < self.change_probability: | |
| pattern = re.compile(r'\b' + re.escape(common) + r'\b', re.IGNORECASE) | |
| def replace_preserve_case(match): | |
| word = match.group(0) | |
| if word.isupper(): return formal.upper() | |
| elif word[0].isupper(): return formal.capitalize() | |
| return formal | |
| text = pattern.sub(replace_preserve_case, text) | |
| return text | |
| def add_emphatic_phrases(self, text): | |
| sentences = re.split(r'(?<=[.!])\s+', text) | |
| result = [] | |
| for sentence in sentences: | |
| # Only add emphatic phrase if sentence doesn't already have one | |
| has_emphatic = any(phrase.strip(", ") in sentence for phrase in self.EMPHATIC_PHRASES) | |
| if sentence.endswith('.') and not has_emphatic and random.random() < self.change_probability * 0.25: | |
| phrase = random.choice(self.EMPHATIC_PHRASES) | |
| sentence = sentence[:-1] + phrase + "." | |
| result.append(sentence) | |
| return ' '.join(result) | |
| def add_formal_starters(self, text): | |
| sentences = re.split(r'(?<=[.!?])\s+', text) | |
| result = [] | |
| for i, sentence in enumerate(sentences): | |
| # Only add formal starter if sentence doesn't already have one | |
| has_starter = any(starter.strip() in sentence for starter in self.FORMAL_STARTERS) | |
| if 0 < i < len(sentences) - 1 and not has_starter and random.random() < self.change_probability * 0.2: | |
| starter = random.choice(self.FORMAL_STARTERS) | |
| if sentence and sentence[0].isupper(): | |
| sentence = starter + sentence[0].lower() + sentence[1:] | |
| else: | |
| sentence = starter + sentence | |
| result.append(sentence) | |
| return ' '.join(result) | |
| def process(self, text): | |
| text = self.expand_contractions(text) | |
| text = self.remove_casual_fillers(text) | |
| text = self.apply_synonym_replacements(text) | |
| text = self.add_emphatic_phrases(text) | |
| text = self.add_formal_starters(text) | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def multi_pass_process(self, text, passes=2): | |
| for _ in range(passes): | |
| text = self.process(text) | |
| return text | |
| # Load model and tokenizer from HuggingFace Hub | |
| print("Loading humanizer V3 model from HuggingFace Hub...") | |
| MODEL_PATH = "harryroger798/humanizer-model-v3" | |
| tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH) | |
| model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH) | |
| print("Model loaded!") | |
| # Initialize post-processor | |
| processor = StealthPostProcessor(intensity="high") | |
| def humanize_text(text, use_post_processor=True, post_processor_passes=2): | |
| """Combined humanizer: StealthWriter post-processor (primary) + model paraphrasing""" | |
| if not text.strip(): | |
| return "", "" | |
| # Step 1: Run through model with better generation parameters | |
| inputs = tokenizer(f"humanize: {text}", return_tensors="pt", max_length=512, truncation=True) | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=512, | |
| num_beams=4, | |
| early_stopping=True, | |
| do_sample=True, | |
| temperature=0.8, | |
| top_p=0.9, | |
| repetition_penalty=2.5, | |
| no_repeat_ngram_size=3, | |
| length_penalty=1.0 | |
| ) | |
| model_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Check for repetitive output - if detected, use original text with post-processor only | |
| words = model_output.split() | |
| if len(words) > 10: | |
| # Check for excessive repetition | |
| word_counts = {} | |
| for word in words: | |
| word_counts[word] = word_counts.get(word, 0) + 1 | |
| max_repeat = max(word_counts.values()) if word_counts else 0 | |
| if max_repeat > len(words) * 0.3: # If any word appears more than 30% of the time | |
| # Fall back to using original text with post-processor | |
| model_output = text | |
| # Step 2: Apply StealthWriter post-processor (this is the key to bypassing detection) | |
| if use_post_processor: | |
| final_output = processor.multi_pass_process(model_output, passes=post_processor_passes) | |
| else: | |
| final_output = model_output | |
| return model_output, final_output | |
| def gradio_humanize(text, use_post_processor, passes): | |
| """Gradio interface function""" | |
| model_out, final_out = humanize_text(text, use_post_processor, int(passes)) | |
| return model_out, final_out | |
| # Create Gradio interface | |
| with gr.Blocks(title="Humanizer V2 - AI Detector Bypass") as demo: | |
| gr.Markdown(""" | |
| # 🔄 Humanizer V2 - AI Detector Bypass | |
| **Combined approach:** Fine-tuned T5 model (39,776 samples) + StealthWriter-style post-processor | |
| This humanizer uses techniques proven to bypass AI detectors: | |
| - Trained on 39,776 humanizer samples (combined dataset) | |
| - StealthWriter-style post-processing (expands contractions, uses formal expressions) | |
| - Multi-pass processing for better results | |
| - Achieved 0% AI detection on StealthWriter in testing | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_text = gr.Textbox( | |
| label="Input Text (AI-generated)", | |
| placeholder="Paste your AI-generated text here...", | |
| lines=8 | |
| ) | |
| with gr.Row(): | |
| use_post_processor = gr.Checkbox(label="Use StealthWriter Post-Processor", value=True) | |
| passes = gr.Slider(minimum=1, maximum=3, value=2, step=1, label="Post-Processor Passes") | |
| submit_btn = gr.Button("Humanize", variant="primary") | |
| with gr.Column(): | |
| model_output = gr.Textbox(label="Model Output (before post-processing)", lines=6) | |
| final_output = gr.Textbox(label="Final Output (after post-processing)", lines=6) | |
| submit_btn.click( | |
| fn=gradio_humanize, | |
| inputs=[input_text, use_post_processor, passes], | |
| outputs=[model_output, final_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Tips for best results:** | |
| - Enable the StealthWriter post-processor for better bypass rates | |
| - Use 2-3 passes for optimal results | |
| - Test the output on StealthWriter or other AI detectors | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |