File size: 2,956 Bytes
2bffcbb 5f15c09 2bffcbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import pandas as pd
import os
import re # For sanitizing the filenames
# Define file paths
product_analysis_path = "Output_File/excel/product_analysis.xlsx"
competitor_analysis_path = "Output_File/excel/competitor_analysis.xlsx"
top_3_df_path = "Output_File/excel/top_3_sd_results.xlsx"
# Read the data from Excel files
product_data = pd.read_excel(product_analysis_path)
competitor_data = pd.read_excel(competitor_analysis_path)
top_3_df = pd.read_excel(top_3_df_path)
# Create directory D if not exists
output_dir = "data/top_3_images"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Check columns to make sure we are accessing the correct data
print("Product Data Columns:", product_data.columns)
print("Competitor Data Columns:", competitor_data.columns)
# Function to sanitize filenames (remove any invalid characters)
def sanitize_filename(name):
# Replace any character that's not alphanumeric, space, or underscore with an underscore
return re.sub(r'[^\w\s-]', '_', name).strip().replace(' ', '_')
# Process Product Image Names
for image_name in top_3_df['Product_Image_Name']:
# Sanitize the image name to avoid invalid filename characters
sanitized_image_name = sanitize_filename(image_name)
# Fetch raw JSON response for the image in Product data by matching 'Image' column
product_response = product_data.loc[product_data['Image'] == image_name, 'Raw JSON Response'].values
if len(product_response) > 0:
raw_response = product_response[0]
if raw_response: # Check if the response is not empty
with open(f"{output_dir}/{sanitized_image_name}.txt", 'w') as file:
file.write(raw_response) # Write raw response as text
print(f"Saved Raw Text for Product image: {sanitized_image_name}")
else:
print(f"Empty Raw JSON Response for Product image: {sanitized_image_name}")
else:
print(f"Product image name '{image_name}' not found.")
# Process Competitor Image Names
for image_name in top_3_df['Competitor_Image_Name']:
# Sanitize the image name to avoid invalid filename characters
sanitized_image_name = sanitize_filename(image_name)
# Fetch raw JSON response for the image in Competitor data by matching 'Image' column
competitor_response = competitor_data.loc[competitor_data['Image'] == image_name, 'Raw JSON Response'].values
if len(competitor_response) > 0:
raw_response = competitor_response[0]
if raw_response: # Check if the response is not empty
with open(f"{output_dir}/{sanitized_image_name}.txt", 'w') as file:
file.write(raw_response) # Write raw response as text
print(f"Saved Raw Text for Competitor image: {sanitized_image_name}")
else:
print(f"Empty Raw JSON Response for Competitor image: {sanitized_image_name}")
else:
print(f"Competitor image name '{image_name}' not found.")
|