Spaces:

Chungulus
/

Humanizer_Pro

Sleeping

App Files Files Community

Humanizer_Pro / a.py

Chungulus

Upload folder using huggingface_hub

fea8d44 verified 10 days ago

raw

history blame contribute delete

10.3 kB

	"""
	Combined Humanizer V2 - Adversarial Model + StealthWriter Post-Processor
	Optimized for bypassing AI detectors using proven techniques.
	"""

	import gradio as gr
	from transformers import T5ForConditionalGeneration, T5Tokenizer
	import re
	import random
	import os

	# StealthWriter-style post-processor
	class StealthPostProcessor:
	"""Post-process text using StealthWriter's proven approach."""

	CONTRACTION_EXPANSIONS = {
	"it's": "it is", "It's": "It is", "don't": "do not", "Don't": "Do not",
	"doesn't": "does not", "Doesn't": "Does not", "didn't": "did not",
	"won't": "will not", "wouldn't": "would not", "couldn't": "could not",
	"shouldn't": "should not", "can't": "cannot", "Can't": "Cannot",
	"I'm": "I am", "I've": "I have", "I'll": "I will", "I'd": "I would",
	"you're": "you are", "You're": "You are", "you've": "you have",
	"we're": "we are", "We're": "We are", "we've": "we have",
	"they're": "they are", "They're": "They are", "they've": "they have",
	"that's": "that is", "That's": "That is", "there's": "there is",
	"what's": "what is", "who's": "who is", "let's": "let us",
	"isn't": "is not", "aren't": "are not", "wasn't": "was not",
	"weren't": "were not", "haven't": "have not", "hasn't": "has not",
	"hadn't": "had not", "here's": "here is", "he's": "he is",
	"she's": "she is", "we'll": "we will", "they'll": "they will",
	"gotta": "got to", "gonna": "going to", "wanna": "want to",
	"kinda": "kind of", "sorta": "sort of",
	}

	EMPHATIC_PHRASES = [", I tell you", ", I must say", ", mind you", ", you see", ", indeed"]

	FORMAL_STARTERS = [
	"It is almost a given that ", "One must acknowledge that ",
	"It goes without saying that ", "It is worth noting that ",
	"As it happens, ", "As a matter of fact, ", "In point of fact, ",
	]

	SYNONYM_REPLACEMENTS = {
	"furry friend": "hairy companion", "pet": "animal companion",
	"dog": "canine", "cat": "feline", "help": "assist", "use": "utilize",
	"get": "obtain", "make": "create", "good": "favorable", "bad": "unfavorable",
	"big": "substantial", "small": "modest", "very": "quite", "really": "truly",
	"important": "significant", "need": "require", "want": "desire",
	"think": "believe", "know": "understand", "see": "observe",
	"find": "discover", "show": "demonstrate", "give": "provide",
	"start": "commence", "begin": "initiate", "end": "conclude",
	"try": "attempt", "keep": "maintain", "lot of": "numerous",
	"a lot": "considerably", "lots of": "a great many",
	}

	FILLERS_TO_REMOVE = [
	"like, ", ", like,", " like ", "you know, ", ", you know,",
	"basically, ", ", basically,", "honestly, ", "Honestly, ",
	"I mean, ", ", I mean,", "pretty much ", "kind of ", "sort of ",
	"actually, ", ", actually,", "literally ", "just ", "really ",
	"so, ", "So, ", "well, ", "Well, ", "anyway, ", "Anyway, ",
	"right? ", "Right? ", "you know? ", "I guess ", "I gotta say, ",
	]

	def __init__(self, intensity="high"):
	self.change_probability = {"low": 0.3, "medium": 0.5, "high": 0.7}.get(intensity, 0.7)

	def expand_contractions(self, text):
	for contraction, expansion in self.CONTRACTION_EXPANSIONS.items():
	pattern = re.compile(r'\b' + re.escape(contraction) + r'\b')
	text = pattern.sub(expansion, text)
	return text

	def remove_casual_fillers(self, text):
	for filler in self.FILLERS_TO_REMOVE:
	text = text.replace(filler, " " if filler.startswith(" ") or filler.endswith(" ") else "")
	return re.sub(r'\s+', ' ', text).strip()

	def apply_synonym_replacements(self, text):
	for common, formal in self.SYNONYM_REPLACEMENTS.items():
	if random.random() < self.change_probability:
	pattern = re.compile(r'\b' + re.escape(common) + r'\b', re.IGNORECASE)
	def replace_preserve_case(match):
	word = match.group(0)
	if word.isupper(): return formal.upper()
	elif word[0].isupper(): return formal.capitalize()
	return formal
	text = pattern.sub(replace_preserve_case, text)
	return text

	def add_emphatic_phrases(self, text):
	sentences = re.split(r'(?<=[.!])\s+', text)
	result = []
	for sentence in sentences:
	# Only add emphatic phrase if sentence doesn't already have one
	has_emphatic = any(phrase.strip(", ") in sentence for phrase in self.EMPHATIC_PHRASES)
	if sentence.endswith('.') and not has_emphatic and random.random() < self.change_probability * 0.25:
	phrase = random.choice(self.EMPHATIC_PHRASES)
	sentence = sentence[:-1] + phrase + "."
	result.append(sentence)
	return ' '.join(result)

	def add_formal_starters(self, text):
	sentences = re.split(r'(?<=[.!?])\s+', text)
	result = []
	for i, sentence in enumerate(sentences):
	# Only add formal starter if sentence doesn't already have one
	has_starter = any(starter.strip() in sentence for starter in self.FORMAL_STARTERS)
	if 0 < i < len(sentences) - 1 and not has_starter and random.random() < self.change_probability * 0.2:
	starter = random.choice(self.FORMAL_STARTERS)
	if sentence and sentence[0].isupper():
	sentence = starter + sentence[0].lower() + sentence[1:]
	else:
	sentence = starter + sentence
	result.append(sentence)
	return ' '.join(result)

	def process(self, text):
	text = self.expand_contractions(text)
	text = self.remove_casual_fillers(text)
	text = self.apply_synonym_replacements(text)
	text = self.add_emphatic_phrases(text)
	text = self.add_formal_starters(text)
	return re.sub(r'\s+', ' ', text).strip()

	def multi_pass_process(self, text, passes=2):
	for _ in range(passes):
	text = self.process(text)
	return text


	# Load model and tokenizer from HuggingFace Hub
	print("Loading humanizer V3 model from HuggingFace Hub...")
	MODEL_PATH = "harryroger798/humanizer-model-v3"
	tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH)
	model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH)
	print("Model loaded!")

	# Initialize post-processor
	processor = StealthPostProcessor(intensity="high")


	def humanize_text(text, use_post_processor=True, post_processor_passes=2):
	"""Combined humanizer: StealthWriter post-processor (primary) + model paraphrasing"""
	if not text.strip():
	return "", ""

	# Step 1: Run through model with better generation parameters
	inputs = tokenizer(f"humanize: {text}", return_tensors="pt", max_length=512, truncation=True)
	outputs = model.generate(
	**inputs,
	max_length=512,
	num_beams=4,
	early_stopping=True,
	do_sample=True,
	temperature=0.8,
	top_p=0.9,
	repetition_penalty=2.5,
	no_repeat_ngram_size=3,
	length_penalty=1.0
	)
	model_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Check for repetitive output - if detected, use original text with post-processor only
	words = model_output.split()
	if len(words) > 10:
	# Check for excessive repetition
	word_counts = {}
	for word in words:
	word_counts[word] = word_counts.get(word, 0) + 1
	max_repeat = max(word_counts.values()) if word_counts else 0
	if max_repeat > len(words) * 0.3: # If any word appears more than 30% of the time
	# Fall back to using original text with post-processor
	model_output = text

	# Step 2: Apply StealthWriter post-processor (this is the key to bypassing detection)
	if use_post_processor:
	final_output = processor.multi_pass_process(model_output, passes=post_processor_passes)
	else:
	final_output = model_output

	return model_output, final_output


	def gradio_humanize(text, use_post_processor, passes):
	"""Gradio interface function"""
	model_out, final_out = humanize_text(text, use_post_processor, int(passes))
	return model_out, final_out


	# Create Gradio interface
	with gr.Blocks(title="Humanizer V2 - AI Detector Bypass") as demo:
	gr.Markdown("""
	# 🔄 Humanizer V2 - AI Detector Bypass

	Combined approach: Fine-tuned T5 model (39,776 samples) + StealthWriter-style post-processor

	This humanizer uses techniques proven to bypass AI detectors:
	- Trained on 39,776 humanizer samples (combined dataset)
	- StealthWriter-style post-processing (expands contractions, uses formal expressions)
	- Multi-pass processing for better results
	- Achieved 0% AI detection on StealthWriter in testing
	""")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Input Text (AI-generated)",
	placeholder="Paste your AI-generated text here...",
	lines=8
	)

	with gr.Row():
	use_post_processor = gr.Checkbox(label="Use StealthWriter Post-Processor", value=True)
	passes = gr.Slider(minimum=1, maximum=3, value=2, step=1, label="Post-Processor Passes")

	submit_btn = gr.Button("Humanize", variant="primary")

	with gr.Column():
	model_output = gr.Textbox(label="Model Output (before post-processing)", lines=6)
	final_output = gr.Textbox(label="Final Output (after post-processing)", lines=6)

	submit_btn.click(
	fn=gradio_humanize,
	inputs=[input_text, use_post_processor, passes],
	outputs=[model_output, final_output]
	)

	gr.Markdown("""
	---
	Tips for best results:
	- Enable the StealthWriter post-processor for better bypass rates
	- Use 2-3 passes for optimal results
	- Test the output on StealthWriter or other AI detectors
	""")


	if __name__ == "__main__":
	demo.launch()