|
import gradio as gr |
|
import requests |
|
import json |
|
|
|
class SynthIDApp: |
|
def __init__(self): |
|
self.api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" |
|
self.headers = None |
|
self.WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] |
|
|
|
def login(self, hf_token): |
|
"""Initialize the API headers with authentication.""" |
|
try: |
|
self.headers = {"Authorization": f"Bearer {hf_token}"} |
|
|
|
|
|
response = requests.post( |
|
self.api_url, |
|
headers=self.headers, |
|
json={"inputs": "Test", "parameters": {"max_new_tokens": 1}} |
|
) |
|
response.raise_for_status() |
|
|
|
return "API connection initialized successfully!" |
|
except Exception as e: |
|
self.headers = None |
|
return f"Error initializing API: {str(e)}" |
|
|
|
def apply_watermark(self, text, ngram_len): |
|
"""Apply SynthID watermark to input text using the inference API.""" |
|
if not self.headers: |
|
return text, "Error: API not initialized. Please login first." |
|
|
|
try: |
|
|
|
|
|
prompt = f"<s>[INST] Return the exact same text, with watermark applied: {text} [/INST]" |
|
|
|
params = { |
|
"inputs": prompt, |
|
"parameters": { |
|
"return_full_text": True, |
|
"do_sample": False, |
|
"temperature": 0.01, |
|
"watermarking_config": { |
|
"keys": self.WATERMARK_KEYS, |
|
"ngram_len": int(ngram_len) |
|
} |
|
} |
|
} |
|
|
|
|
|
response = requests.post( |
|
self.api_url, |
|
headers=self.headers, |
|
json=params |
|
) |
|
response.raise_for_status() |
|
|
|
|
|
response = requests.post( |
|
self.api_url, |
|
headers=self.headers, |
|
json=params, |
|
timeout=30 |
|
) |
|
response.raise_for_status() |
|
|
|
|
|
result = response.json() |
|
if isinstance(result, list) and len(result) > 0: |
|
if 'error' in result[0]: |
|
return text, f"API Error: {result[0]['error']}" |
|
|
|
generated_text = result[0].get('generated_text', '').strip() |
|
|
|
|
|
try: |
|
|
|
parts = generated_text.split("[/INST]") |
|
if len(parts) > 1: |
|
watermarked_text = parts[-1].strip() |
|
else: |
|
|
|
idx = generated_text.find(text) |
|
if idx != -1: |
|
watermarked_text = generated_text[idx + len(text):].strip() |
|
else: |
|
|
|
watermarked_text = generated_text |
|
except Exception as e: |
|
return text, f"Error processing response: {str(e)}" |
|
|
|
|
|
watermarked_text = watermarked_text.strip(' .') |
|
|
|
if not watermarked_text: |
|
return text, "Error: No watermarked text generated" |
|
|
|
|
|
if text.strip().endswith('.'): |
|
watermarked_text += '.' |
|
|
|
return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})" |
|
else: |
|
return text, f"Error: Unexpected API response format: {str(result)}" |
|
|
|
return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})" |
|
except Exception as e: |
|
return text, f"Error applying watermark: {str(e)}" |
|
|
|
def analyze_text(self, text): |
|
"""Analyze text characteristics.""" |
|
try: |
|
total_words = len(text.split()) |
|
avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0 |
|
char_count = len(text) |
|
|
|
analysis = f"""Text Analysis: |
|
- Total characters: {char_count} |
|
- Total words: {total_words} |
|
- Average word length: {avg_word_length:.2f} |
|
|
|
Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.""" |
|
|
|
return analysis |
|
except Exception as e: |
|
return f"Error analyzing text: {str(e)}" |
|
|
|
|
|
app_instance = SynthIDApp() |
|
|
|
with gr.Blocks(title="SynthID Text Watermarking Tool") as app: |
|
gr.Markdown("# SynthID Text Watermarking Tool") |
|
gr.Markdown("Using Mistral-7B-Instruct-v0.2 with Hugging Face Inference API") |
|
|
|
|
|
with gr.Row(): |
|
hf_token = gr.Textbox( |
|
label="Enter Hugging Face Token", |
|
type="password", |
|
placeholder="hf_..." |
|
) |
|
login_status = gr.Textbox(label="Login Status") |
|
login_btn = gr.Button("Login") |
|
login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status]) |
|
|
|
with gr.Tab("Apply Watermark"): |
|
with gr.Row(): |
|
with gr.Column(scale=3): |
|
input_text = gr.Textbox( |
|
label="Input Text", |
|
lines=5, |
|
placeholder="Enter text to watermark...", |
|
value="Test Sentence: WordLift is a cutting-edge platform designed to enhance your digital content by leveraging the power of semantic technology. It transforms your website into a structured repository of knowledge, making your content more discoverable, engaging, and aligned with modern search engine algorithms. By utilizing AI-driven entity extraction and knowledge graph generation, WordLift helps you bridge the gap between your content and search intent, ensuring optimal visibility and performance." |
|
) |
|
output_text = gr.Textbox(label="Watermarked Text", lines=5) |
|
with gr.Column(scale=1): |
|
ngram_len = gr.Slider( |
|
label="N-gram Length", |
|
minimum=2, |
|
maximum=5, |
|
step=1, |
|
value=2, |
|
info="Controls watermark detectability (2-5)" |
|
) |
|
status = gr.Textbox(label="Status") |
|
|
|
gr.Markdown(""" |
|
### N-gram Length Parameter: |
|
- Higher values (4-5): More detectable watermark, but more brittle to changes |
|
- Lower values (2-3): More robust to changes, but harder to detect |
|
- Default (5): Maximum detectability""") |
|
|
|
apply_btn = gr.Button("Apply Watermark") |
|
apply_btn.click( |
|
app_instance.apply_watermark, |
|
inputs=[input_text, ngram_len], |
|
outputs=[output_text, status] |
|
) |
|
|
|
with gr.Tab("Analyze Text"): |
|
with gr.Row(): |
|
analyze_input = gr.Textbox( |
|
label="Text to Analyze", |
|
lines=5, |
|
placeholder="Enter text to analyze..." |
|
) |
|
analyze_result = gr.Textbox(label="Analysis Result", lines=5) |
|
analyze_btn = gr.Button("Analyze Text") |
|
analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result]) |
|
|
|
gr.Markdown(""" |
|
### Instructions: |
|
1. Enter your Hugging Face token and click Login |
|
2. Once connected, you can use the tabs to apply watermarks or analyze text |
|
3. Adjust the N-gram Length slider to control watermark characteristics |
|
|
|
### Notes: |
|
- The watermarking process attempts to maintain the original meaning while adding the watermark |
|
- If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text |
|
- This is an experimental feature using the Inference API |
|
- No model download required - everything runs in the cloud |
|
- The watermark is designed to be imperceptible to humans |
|
- This demo only implements watermark application |
|
- The official detector will be available in future releases |
|
- For production use, use your own secure watermark keys |
|
- Your token is never stored and is only used for API access |
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch() |