Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
import nltk | |
import numpy as np | |
import torch | |
import matplotlib.pyplot as plt | |
from sklearn.metrics.pairwise import cosine_similarity | |
from transformers import BertModel, BertTokenizer | |
from sentence_transformers import SentenceTransformer | |
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction | |
# Download NLTK data if not already present | |
nltk.download('punkt', quiet=True) | |
class SentenceDetectabilityCalculator: | |
""" | |
A class to calculate and analyze detectability metrics between an original sentence and paraphrased sentences. | |
""" | |
def __init__(self, original_sentence, paraphrased_sentences): | |
""" | |
Initialize the calculator with the original sentence and a list of paraphrased sentences. | |
""" | |
self.original_sentence = original_sentence | |
self.paraphrased_sentences = paraphrased_sentences | |
self.metrics = { | |
'BLEU Score': {}, | |
'Cosine Similarity': {}, | |
'STS Score': {} | |
} | |
self.normalized_metrics = { | |
'BLEU Score': {}, | |
'Cosine Similarity': {}, | |
'STS Score': {} | |
} | |
self.combined_detectabilities = {} | |
# Load pre-trained models | |
self.bert_model = BertModel.from_pretrained('bert-base-uncased') | |
self.bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
self.sts_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# Calculate original embeddings | |
self.original_embedding = self._get_sentence_embedding(self.original_sentence) | |
self.sts_original_embedding = self.sts_model.encode(self.original_sentence) | |
def calculate_all_metrics(self): | |
""" | |
Calculate all detectability metrics for each paraphrased sentence. | |
""" | |
for idx, paraphrased_sentence in enumerate(self.paraphrased_sentences): | |
key = f"Sentence_{idx + 1}" | |
self.metrics['BLEU Score'][key] = self._calculate_bleu(self.original_sentence, paraphrased_sentence) | |
paraphrase_embedding = self._get_sentence_embedding(paraphrased_sentence) | |
self.metrics['Cosine Similarity'][key] = cosine_similarity([self.original_embedding], [paraphrase_embedding])[0][0] | |
sts_paraphrase_embedding = self.sts_model.encode(paraphrased_sentence) | |
self.metrics['STS Score'][key] = cosine_similarity([self.sts_original_embedding], [sts_paraphrase_embedding])[0][0] | |
def normalize_metrics(self): | |
""" | |
Normalize all metrics to be between 0 and 1. | |
""" | |
for metric_name, metric_dict in self.metrics.items(): | |
self.normalized_metrics[metric_name] = self._normalize_dict(metric_dict) | |
def calculate_combined_detectability(self): | |
""" | |
Calculate the combined detectability using the root mean square of the normalized metrics. | |
""" | |
for key in self.normalized_metrics['BLEU Score'].keys(): | |
rms = np.sqrt(sum( | |
self.normalized_metrics[metric][key] ** 2 for metric in self.normalized_metrics | |
) / len(self.normalized_metrics)) | |
self.combined_detectabilities[key] = rms | |
def plot_metrics(self): | |
""" | |
Plot each normalized metric and the combined detectability in separate graphs. | |
""" | |
keys = list(self.normalized_metrics['BLEU Score'].keys()) | |
indices = np.arange(len(keys)) | |
# Prepare data for plotting | |
metrics = {name: [self.normalized_metrics[name][key] for key in keys] for name in self.normalized_metrics} | |
# Plot each metric separately | |
for metric_name, values in metrics.items(): | |
plt.figure(figsize=(12, 6)) | |
plt.plot(indices, values, marker='o', color=np.random.rand(3,)) | |
plt.xlabel('Sentence Index') | |
plt.ylabel('Normalized Value (0-1)') | |
plt.title(f'Normalized {metric_name}') | |
plt.grid(True) | |
plt.tight_layout() | |
plt.show() | |
# Private methods for metric calculations | |
def _calculate_bleu(self, reference, candidate): | |
""" | |
Calculate the BLEU score between the original and paraphrased sentence using smoothing. | |
""" | |
reference_tokens = nltk.word_tokenize(reference) | |
candidate_tokens = nltk.word_tokenize(candidate) | |
smoothing = SmoothingFunction().method1 | |
return sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothing) | |
def _get_sentence_embedding(self, sentence): | |
""" | |
Get sentence embedding using BERT. | |
""" | |
tokens = self.bert_tokenizer(sentence, return_tensors='pt', padding=True, truncation=True, max_length=512) | |
with torch.no_grad(): | |
outputs = self.bert_model(**tokens) | |
return outputs.last_hidden_state.mean(dim=1).squeeze().numpy() | |
def _normalize_dict(self, metric_dict): | |
""" | |
Normalize the values in a dictionary to be between 0 and 1. | |
""" | |
values = np.array(list(metric_dict.values())) | |
min_val = values.min() | |
max_val = values.max() | |
# Avoid division by zero if all values are the same | |
return dict(zip(metric_dict.keys(), np.zeros_like(values) if max_val - min_val == 0 else (values - min_val) / (max_val - min_val))) | |
# Getter methods | |
def get_normalized_metrics(self): | |
""" | |
Get all normalized metrics as a dictionary. | |
""" | |
return self.normalized_metrics | |
def get_combined_detectabilities(self): | |
""" | |
Get the dictionary of combined detectability values. | |
""" | |
return self.combined_detectabilities | |
# Example usage | |
if __name__ == "__main__": | |
# Original sentence | |
original_sentence = "The quick brown fox jumps over the lazy dog" | |
# Paraphrased sentences | |
paraphrased_sentences = [ | |
# Original 1: "A swift auburn fox leaps across a sleepy canine." | |
"The swift auburn fox leaps across a sleepy canine.", | |
"A quick auburn fox leaps across a sleepy canine.", | |
"A swift ginger fox leaps across a sleepy canine.", | |
"A swift auburn fox bounds across a sleepy canine.", | |
"A swift auburn fox leaps across a tired canine.", | |
"Three swift auburn foxes leap across a sleepy canine.", | |
"The vulpine specimen rapidly traverses over a dormant dog.", | |
"Like lightning, the russet hunter soars over the drowsy guardian.", | |
"Tha quick ginger fox jumps o'er the lazy hound, ye ken.", | |
"One rapid Vulpes vulpes traverses the path of a quiescent canine.", | |
"A swift auburn predator navigates across a lethargic pet.", | |
"Subject A (fox) demonstrates velocity over Subject B (dog).", | |
# Original 2: "The agile russet fox bounds over an idle hound." | |
"Some agile russet foxes bound over an idle hound.", | |
"The nimble russet fox bounds over an idle hound.", | |
"The agile brown fox bounds over an idle hound.", | |
"The agile russet fox jumps over an idle hound.", | |
"The agile russet fox bounds over a lazy hound.", | |
"Two agile russet foxes bound over an idle hound.", | |
"A dexterous vulpine surpasses a stationary canine.", | |
"Quick as thought, the copper warrior sails over the guardian.", | |
"Tha nimble reddish fox jumps o'er the doggo, don't ya know.", | |
"A dexterous V. vulpes exceeds the plane of an inactive canine.", | |
"An agile russet hunter maneuvers above a resting hound.", | |
"Test subject F-1 achieves displacement superior to subject D-1.", | |
# Original 3: "A nimble mahogany vulpine vaults above a drowsy dog." | |
"The nimble mahogany vulpine vaults above a drowsy dog.", | |
"A swift mahogany vulpine vaults above a drowsy dog.", | |
"A nimble reddish vulpine vaults above a drowsy dog.", | |
"A nimble mahogany fox vaults above a drowsy dog.", | |
"A nimble mahogany vulpine leaps above a drowsy dog.", | |
"Four nimble mahogany vulpines vault above a drowsy dog.", | |
"An agile specimen of reddish fur surpasses a somnolent canine.", | |
"Fleet as wind, the earth-toned hunter soars over the sleepy guard.", | |
"Tha quick brown beastie jumps o'er the tired pup, aye.", | |
"Single V. vulpes demonstrates vertical traverse over C. familiaris.", | |
"A nimble rust-colored predator crosses above a drowsy pet.", | |
"Observed: Subject Red executes vertical motion over Subject Gray.", | |
# Original 4: "The speedy copper-colored fox hops over the lethargic pup." | |
"A speedy copper-colored fox hops over the lethargic pup.", | |
"The quick copper-colored fox hops over the lethargic pup.", | |
"The speedy bronze fox hops over the lethargic pup.", | |
"The speedy copper-colored fox jumps over the lethargic pup.", | |
"The speedy copper-colored fox hops over the tired pup.", | |
"Multiple speedy copper-colored foxes hop over the lethargic pup.", | |
"A rapid vulpine of bronze hue traverses an inactive young canine.", | |
"Swift as a dart, the metallic hunter bounds over the lazy puppy.", | |
"Tha fast copper beastie leaps o'er the sleepy wee dog.", | |
"1 rapid V. vulpes crosses above 1 juvenile C. familiaris.", | |
"A fleet copper-toned predator moves past a sluggish young dog.", | |
"Field note: Adult fox subject exceeds puppy subject vertically.", | |
# Original 5: "A rapid tawny fox springs over a sluggish dog." | |
"The rapid tawny fox springs over a sluggish dog.", | |
"A quick tawny fox springs over a sluggish dog.", | |
"A rapid golden fox springs over a sluggish dog.", | |
"A rapid tawny fox jumps over a sluggish dog.", | |
"A rapid tawny fox springs over a lazy dog.", | |
"Six rapid tawny foxes spring over a sluggish dog.", | |
"An expeditious yellowish vulpine surpasses a torpid canine.", | |
"Fast as a bullet, the golden hunter vaults over the idle guard.", | |
"Tha swift yellowy fox jumps o'er the lazy mutt, aye.", | |
"One V. vulpes displays rapid transit over one inactive C. familiaris.", | |
"A speedy yellow-brown predator bypasses a motionless dog.", | |
"Log entry: Vulpine subject achieves swift vertical displacement.", | |
# Original 6: "The fleet-footed chestnut fox soars above an indolent canine." | |
"A fleet-footed chestnut fox soars above an indolent canine.", | |
"The swift chestnut fox soars above an indolent canine.", | |
"The fleet-footed brown fox soars above an indolent canine.", | |
"The fleet-footed chestnut fox leaps above an indolent canine.", | |
"The fleet-footed chestnut fox soars above a lazy canine.", | |
"Several fleet-footed chestnut foxes soar above an indolent canine.", | |
"A rapid brown vulpine specimen traverses a lethargic domestic dog.", | |
"Graceful as a bird, the nutbrown hunter flies over the lazy guard.", | |
"Tha quick brown beastie sails o'er the sleepy hound, ken.", | |
"Single agile V. vulpes achieves elevation above stationary canine.", | |
"A nimble brown predator glides over an unmoving domestic animal.", | |
"Research note: Brown subject displays superior vertical mobility.", | |
# Original 7: "A fast ginger fox hurdles past a slothful dog." | |
"The fast ginger fox hurdles past a slothful dog.", | |
"A quick ginger fox hurdles past a slothful dog.", | |
"A fast red fox hurdles past a slothful dog.", | |
"A fast ginger fox jumps past a slothful dog.", | |
"A fast ginger fox hurdles past a lazy dog.", | |
"Five fast ginger foxes hurdle past a slothful dog.", | |
"A rapid orange vulpine bypasses a lethargic canine.", | |
"Quick as lightning, the flame-colored hunter races past the lazy guard.", | |
"Tha swift ginger beastie leaps past the tired doggy, ye see.", | |
"1 rapid orange V. vulpes surpasses 1 inactive C. familiaris.", | |
"A speedy red-orange predator overtakes a motionless dog.", | |
"Data point: Orange subject demonstrates rapid transit past Gray subject.", | |
# Original 8: "The spry rusty-colored fox jumps across a dozing hound." | |
"A spry rusty-colored fox jumps across a dozing hound.", | |
"The agile rusty-colored fox jumps across a dozing hound.", | |
"The spry reddish fox jumps across a dozing hound.", | |
"The spry rusty-colored fox leaps across a dozing hound.", | |
"The spry rusty-colored fox jumps across a sleeping hound.", | |
"Multiple spry rusty-colored foxes jump across a dozing hound.", | |
"An agile rust-toned vulpine traverses a somnolent canine.", | |
"Nimble as thought, the copper hunter bounds over the resting guard.", | |
"Tha lively rust-colored beastie hops o'er the snoozin' hound.", | |
"Single dexterous V. vulpes crosses path of dormant C. familiaris.", | |
"A lithe rust-tinted predator moves past a slumbering dog.", | |
"Observation: Russet subject exhibits agility over dormant subject.", | |
# Original 9: "A quick tan fox leaps over an inactive dog." | |
"The quick tan fox leaps over an inactive dog.", | |
"A swift tan fox leaps over an inactive dog.", | |
"A quick beige fox leaps over an inactive dog.", | |
"A quick tan fox jumps over an inactive dog.", | |
"A quick tan fox leaps over a motionless dog.", | |
"Seven quick tan foxes leap over an inactive dog.", | |
"A rapid light-brown vulpine surpasses a stationary canine.", | |
"Fast as wind, the sand-colored hunter soars over the still guard.", | |
"Tha nimble tan beastie jumps o'er the quiet doggy, aye.", | |
"One agile fawn V. vulpes traverses one immobile C. familiaris.", | |
"A fleet tan-colored predator bypasses an unmoving dog.", | |
"Field report: Tan subject demonstrates movement over static subject.", | |
# Original 10: "The brisk auburn vulpine bounces over a listless canine." | |
"Some brisk auburn vulpines bounce over a listless canine.", | |
"The quick auburn vulpine bounces over a listless canine.", | |
"The brisk russet vulpine bounces over a listless canine.", | |
"The brisk auburn fox bounces over a listless canine.", | |
"The brisk auburn vulpine jumps over a listless canine.", | |
"Five brisk auburn vulpines bounce over a listless canine.", | |
"The expeditious specimen supersedes a quiescent Canis lupus.", | |
"Swift as wind, the russet hunter vaults over the idle guardian.", | |
"Tha quick ginger beastie hops o'er the lazy mutt, aye.", | |
"One V. vulpes achieves displacement over inactive C. familiaris.", | |
"A high-velocity auburn predator traverses an immobile animal.", | |
"Final observation: Red subject shows mobility over Gray subject." | |
] | |
# Create the calculator instance | |
calculator = SentenceDetectabilityCalculator(original_sentence, paraphrased_sentences) | |
# Calculate metrics | |
calculator.calculate_all_metrics() | |
calculator.normalize_metrics() | |
calculator.calculate_combined_detectability() | |
# Plot metrics | |
calculator.plot_metrics() | |
# Get results | |
normalized_metrics = calculator.get_normalized_metrics() | |
combined_detectabilities = calculator.get_combined_detectabilities() | |
print("Normalized Metrics:", normalized_metrics) | |
print("Combined Detectabilities:", combined_detectabilities) | |