File size: 7,853 Bytes
2bffcbb 1eb5cdf 2bffcbb 1eb5cdf 2bffcbb 1eb5cdf 2bffcbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import pandas as pd
import numpy as np
# Load Excel files
product_data = pd.read_excel("Output_File/excel/product_analysis.xlsx")
competitor_data = pd.read_excel("Output_File/excel/competitor_analysis.xlsx")
# Function to filter criteria based on available columns
def filter_existing_criteria(data, criteria):
"""
Filters a list of criteria to include only those present in the given DataFrame columns.
Args:
data: DataFrame containing the data.
criteria: List of criteria to filter.
Returns:
List of filtered criteria that exist in the DataFrame columns.
"""
return [criterion for criterion in criteria if criterion in data.columns]
# Define criteria for each category
branding_criteria = [
"Logo Placement", "Consistency", "Alignment",
"Brand Colors", "Typography Consistency", "Brand Identity", "Template Consistency"
]
content_marketing_criteria = [
"Content Visibility", "Engagement Cues", "Storytelling",
"Aesthetic Coherence", "Content Relevance"
]
social_media_marketing_criteria = [
"Font Size", "Visibility of Text", "Alignment",
"Aesthetic Appeal", "Repetitiveness"
]
# Filter criteria based on available columns in the data
branding_criteria_filtered = filter_existing_criteria(product_data, branding_criteria)
content_marketing_criteria_filtered = filter_existing_criteria(product_data, content_marketing_criteria)
social_media_marketing_criteria_filtered = filter_existing_criteria(product_data, social_media_marketing_criteria)
# Helper function to calculate the mean value for a criterion
def calculate_mean_criterion_value(product_data, competitor_data, criterion):
"""
Calculate the mean of a criterion from both product and competitor data, ignoring NaN values.
Args:
product_data: DataFrame containing product data.
competitor_data: DataFrame containing competitor data.
criterion: The column name representing the criterion.
Returns:
Mean value of the criterion.
"""
combined_values = np.concatenate([
product_data[criterion].dropna().values,
competitor_data[criterion].dropna().values,
])
return np.nanmean(combined_values)
# Helper function to calculate the score differences
def calculate_score_differences(product_scores, competitor_scores, product_data, competitor_data, category_criteria):
"""
Calculate the score differences, replacing NaN with mean values.
Args:
product_scores: Scores of a product post for the criteria.
competitor_scores: Scores of a competitor post for the criteria.
product_data: DataFrame containing product data.
competitor_data: DataFrame containing competitor data.
category_criteria: List of criteria in the category.
Returns:
Array of score differences.
"""
score_diff = product_scores - competitor_scores
for k, diff in enumerate(score_diff):
if np.isnan(diff):
mean_value = calculate_mean_criterion_value(product_data, competitor_data, category_criteria[k])
score_diff[k] = mean_value
return score_diff
# Main function to calculate the SD comparison matrix
def calculate_sd_comparison_matrix(product_data, competitor_data, category_criteria):
"""
Calculate a 6x6 SD Comparison Matrix for a specific category, replacing NaN differences with the mean of the criteria.
"""
# If there are no criteria, return a DataFrame of zeros
if not category_criteria:
return pd.DataFrame(
np.zeros((6, 6)),
index=[f"Product_{i+1}" for i in range(6)],
columns=[f"Competitor_{j+1}" for j in range(6)]
)
sd_matrix = np.zeros((6, 6)) # Initialize a 6x6 matrix for SD values
for i in range(6): # Loop over product posts
for j in range(6): # Loop over competitor posts
product_scores = product_data.iloc[i][category_criteria].values
competitor_scores = competitor_data.iloc[j][category_criteria].values
# Calculate score differences
score_diff = calculate_score_differences(
product_scores, competitor_scores, product_data, competitor_data, category_criteria
)
# Safely calculate the standard deviation
if len(score_diff) > 0:
sd_matrix[i, j] = np.std(score_diff)
else:
sd_matrix[i, j] = 0
# Convert to DataFrame for better readability
return pd.DataFrame(sd_matrix,
index=[f"Product_{i+1}" for i in range(6)],
columns=[f"Competitor_{j+1}" for j in range(6)])
# Calculate SD matrices for each category
branding_sd_matrix = calculate_sd_comparison_matrix(product_data, competitor_data, branding_criteria_filtered)
content_marketing_sd_matrix = calculate_sd_comparison_matrix(product_data, competitor_data, content_marketing_criteria_filtered)
social_media_marketing_sd_matrix = calculate_sd_comparison_matrix(product_data, competitor_data, social_media_marketing_criteria_filtered)
# Function to find the top SD values ensuring non-repetitive product and competitor image pairs
def find_top_non_repetitive_sd(sd_matrix, product_data, competitor_data, category, top_count=3):
"""
Find the top SD values ensuring non-repetitive product and competitor image pairs within the same category.
Args:
sd_matrix: DataFrame representing the SD matrix.
product_data: DataFrame containing product data (to extract image names).
competitor_data: DataFrame containing competitor data (to extract image names).
category: String representing the category name.
top_count: Number of top results to return (default is 3).
Returns:
List of tuples containing category, product image name, competitor image name, and SD value.
"""
used_product_images = set()
used_competitor_images = set()
top_results = []
for i in range(sd_matrix.shape[0]):
for j in range(sd_matrix.shape[1]):
if len(top_results) == top_count:
break
product_image = product_data.iloc[i]['Image']
competitor_image = competitor_data.iloc[j]['Image']
sd_value = sd_matrix.iloc[i, j]
if product_image not in used_product_images and competitor_image not in used_competitor_images:
top_results.append((category, product_image, competitor_image, sd_value))
used_product_images.add(product_image)
used_competitor_images.add(competitor_image)
return top_results
# Find top non-repetitive SD results
branding_top_3 = find_top_non_repetitive_sd(branding_sd_matrix, product_data, competitor_data, "Brand Marketing")
content_marketing_top_3 = find_top_non_repetitive_sd(content_marketing_sd_matrix, product_data, competitor_data, "Content Marketing")
social_media_marketing_top_3 = find_top_non_repetitive_sd(social_media_marketing_sd_matrix, product_data, competitor_data, "Social Media Marketing")
# Combine results into a DataFrame
all_top_3 = branding_top_3 + content_marketing_top_3 + social_media_marketing_top_3
top_3_df = pd.DataFrame(
all_top_3,
columns=['Category', 'Product_Image_Name', 'Competitor_Image_Name', 'SD_Value']
)
# Save results to Excel
top_3_df.to_excel("Output_File/excel/top_3_sd_results.xlsx", index=False)
# Print the results
print("\nTop 3 SD Results DataFrame:")
print(top_3_df)
import os
output_folder = "data/output_generated_file/Output_File/excel"
output_file_path = os.path.join(output_folder, "top_3_sd_results.xlsx")
os.makedirs(output_folder, exist_ok=True)
# Save results to the specified folder
top_3_df.to_excel(output_file_path, index=False)
# Print confirmation
print(f"Top 3 SD Results saved in: {output_file_path}")
|