|
library(tidyverse) |
|
|
|
|
|
read_captions_from_directory <- function(directory_path) { |
|
|
|
txt_files <- list.files(directory_path, pattern = "\\.txt$", full.names = TRUE) |
|
|
|
|
|
data <- lapply(txt_files, function(file) { |
|
captions <- readLines(file, warn = FALSE) |
|
captions_list <- strsplit(captions, ",")[[1]] |
|
captions_list <- trimws(captions_list) |
|
tibble( |
|
image_path = gsub(".txt$", ".png", file), |
|
caption_order = 1:length(captions_list), |
|
caption = captions_list |
|
) |
|
}) |
|
|
|
|
|
bind_rows(data) |
|
} |
|
|
|
get_caption_frequency <- function(data) { |
|
data %>% |
|
group_by(caption) %>% |
|
summarise(frequency = n()) %>% |
|
arrange(-frequency) |
|
} |
|
|
|
search_by_caption <- function(data, target_caption) { |
|
data %>% |
|
filter(caption == target_caption) %>% |
|
group_by(image_path) %>% |
|
distinct() |
|
} |
|
|
|
remove_caption_and_adjust_order <- function(data, target_image_path, target_caption) { |
|
|
|
if (!any(data$image_path == target_image_path & data$caption == target_caption)) { |
|
cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path)) |
|
return(data) |
|
} |
|
|
|
|
|
removed_order <- data$caption_order[data$image_path == target_image_path & data$caption == target_caption] |
|
|
|
data <- data %>% filter(!(image_path == target_image_path & caption == target_caption)) |
|
|
|
data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] <- data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] - 1 |
|
|
|
return(data) |
|
} |
|
|
|
remove_low_frequency_captions <- function(data, threshold) { |
|
|
|
caption_freq <- get_caption_frequency(data) |
|
|
|
|
|
low_freq_captions <- caption_freq %>% |
|
filter(frequency <= threshold) %>% |
|
pull(caption) |
|
|
|
|
|
for (caption in low_freq_captions) { |
|
unique_images <- unique(data$image_path[data$caption == caption]) |
|
for (image in unique_images) { |
|
data <- remove_caption_and_adjust_order(data, image, caption) |
|
} |
|
} |
|
|
|
return(data) |
|
} |
|
|
|
edit_captions_interactively <- function(data, target_caption) { |
|
|
|
image_paths <- search_by_caption(data, target_caption)$image_path |
|
|
|
for (path in image_paths) { |
|
|
|
if (Sys.info()["sysname"] == "Windows") { |
|
cmd <- sprintf('start "" "%s"', path) |
|
shell(cmd, intern = TRUE) |
|
} else if (Sys.info()["sysname"] == "Darwin") { |
|
cmd <- sprintf('open "%s"', path) |
|
system(cmd) |
|
} else { |
|
cmd <- sprintf('xdg-open "%s"', path) |
|
system(cmd) |
|
} |
|
|
|
|
|
cat(sprintf("Do you want to remove the caption '%s' from image '%s'? (yes/no/end): ", target_caption, path)) |
|
response <- readline() |
|
|
|
if (tolower(response) == "end") { |
|
break |
|
} else if (tolower(response) == "yes") { |
|
data <- remove_caption_and_adjust_order(data, path, target_caption) |
|
} |
|
} |
|
|
|
return(data) |
|
} |
|
|
|
add_caption_at_order <- function(data, target_image_path, target_caption, target_order = NULL) { |
|
|
|
max_order <- max(data$caption_order[data$image_path == target_image_path], na.rm = TRUE) |
|
|
|
|
|
if (target_caption %in% data$caption[data$image_path == target_image_path]) { |
|
return(data) |
|
} |
|
|
|
|
|
if (is.null(target_order)) { |
|
print_image_captions_as_csv(data, target_image_path) |
|
cat("Enter the position (order) to insert the new caption (1 to", max_order + 1, "): ") |
|
target_order <- as.numeric(readline()) |
|
|
|
|
|
if (target_order <= 0 || target_order > max_order + 1) { |
|
target_order <- max_order + 1 |
|
} |
|
} |
|
|
|
|
|
data <- data %>% |
|
mutate(caption_order = ifelse(image_path == target_image_path & caption_order >= target_order, caption_order + 1, caption_order)) |
|
|
|
|
|
new_caption <- tibble( |
|
image_path = target_image_path, |
|
caption_order = target_order, |
|
caption = target_caption |
|
) |
|
data <- bind_rows(data, new_caption) |
|
|
|
return(data) |
|
} |
|
|
|
move_caption_order <- function(data, target_image_path, target_caption, new_order) { |
|
|
|
|
|
if (!any(data$image_path == target_image_path & data$caption == target_caption)) { |
|
cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path)) |
|
return(data) |
|
} |
|
|
|
|
|
data_after_removal <- remove_caption_and_adjust_order(data, target_image_path, target_caption) |
|
|
|
|
|
data_after_addition <- add_caption_at_order(data_after_removal, target_image_path, target_caption, new_order) |
|
return(data_after_addition) |
|
} |
|
|
|
|
|
is_caption_present <- function(data, target_image_path, target_caption) { |
|
return(any(data$image_path == target_image_path & data$caption == target_caption)) |
|
} |
|
|
|
|
|
print_all_unique_captions_as_csv <- function(data) { |
|
|
|
unique_captions <- unique(data$caption) |
|
|
|
cat(paste(unique_captions, collapse = ", "), "\n") |
|
} |
|
|
|
print_image_captions_as_csv <- function(data, target_image_path) { |
|
captions <- filter(data, image_path == target_image_path) %>% |
|
arrange(caption_order) %>% |
|
pull(caption) |
|
|
|
cat(paste(captions, collapse = ", "), "\n") |
|
} |
|
|
|
|
|
remove_related_captions_except_representative <- function(data, related_captions, representative_caption, target_image_path) { |
|
|
|
|
|
if (!any(data$image_path == target_image_path & data$caption == representative_caption)) { |
|
cat(sprintf("The representative caption '%s' is not associated with image '%s'.\n", representative_caption, target_image_path)) |
|
return(data) |
|
} |
|
|
|
|
|
for (caption in related_captions) { |
|
if (caption != representative_caption) { |
|
data <- remove_caption_and_adjust_order(data, target_image_path, caption) |
|
} |
|
} |
|
|
|
return(data) |
|
} |