cella110n commited on
Commit
f8dfd01
1 Parent(s): efccf3a
Files changed (1) hide show
  1. script.R +204 -0
script.R ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(tidyverse)
2
+
3
+ # ----- #
4
+ read_captions_from_directory <- function(directory_path) {
5
+ # ディレクトリ内の.txtファイルのリストを取得
6
+ txt_files <- list.files(directory_path, pattern = "\\.txt$", full.names = TRUE)
7
+
8
+ # .txtファイルが存在しない場合、エラーメッセージを含むリストを返す
9
+ if (length(txt_files) == 0) {
10
+ return()
11
+ }
12
+
13
+ # 各.txtファイルからキャプションを読み込み
14
+ data <- lapply(txt_files, function(file) {
15
+ captions <- readLines(file, warn = FALSE)
16
+ if (length(captions) == 0) {
17
+ return()
18
+ }
19
+ captions_list <- strsplit(captions, ",")[[1]]
20
+ captions_list <- trimws(captions_list) # 余分な空白を取り除く
21
+ tibble(
22
+ image_path = gsub(".txt$", ".png", file),
23
+ caption_order = 1:length(captions_list),
24
+ caption = captions_list
25
+ )
26
+ })
27
+
28
+ # データフレームに変換
29
+ bind_rows(data)
30
+ }
31
+
32
+ get_caption_frequency <- function(data) {
33
+ data %>%
34
+ group_by(caption) %>%
35
+ summarise(frequency = n()) %>%
36
+ arrange(-frequency)
37
+ }
38
+
39
+ search_by_caption <- function(data, target_caption) {
40
+ data %>%
41
+ filter(caption == target_caption) %>%
42
+ group_by(image_path) %>%
43
+ distinct()
44
+ }
45
+
46
+ remove_caption_and_adjust_order <- function(data, target_image_path, target_caption) {
47
+ # キャプションが存在するか確認
48
+ if (!any(data$image_path == target_image_path & data$caption == target_caption)) {
49
+ cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path))
50
+ return(data)
51
+ }
52
+
53
+ # 削除するキャプションのcaption_orderを取得
54
+ removed_order <- data$caption_order[data$image_path == target_image_path & data$caption == target_caption]
55
+ # キャプションを削除
56
+ data <- data %>% filter(!(image_path == target_image_path & caption == target_caption))
57
+ # caption_orderを調整
58
+ data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] <- data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] - 1
59
+
60
+ return(data)
61
+ }
62
+
63
+ remove_low_frequency_captions <- function(data, threshold) {
64
+ # キャプションの頻度を取得
65
+ caption_freq <- get_caption_frequency(data)
66
+
67
+ # 指定された頻度以下のキャプションのリストを作成
68
+ low_freq_captions <- caption_freq %>%
69
+ filter(frequency <= threshold) %>%
70
+ pull(caption)
71
+
72
+ # 低頻度のキャプションを削除し、caption_orderを調整
73
+ for (caption in low_freq_captions) {
74
+ unique_images <- unique(data$image_path[data$caption == caption])
75
+ for (image in unique_images) {
76
+ data <- remove_caption_and_adjust_order(data, image, caption)
77
+ }
78
+ }
79
+
80
+ return(data)
81
+ }
82
+
83
+ edit_captions_interactively <- function(data, target_caption) {
84
+ # キャプションで画像を検索
85
+ image_paths <- search_by_caption(data, target_caption)$image_path
86
+
87
+ for (path in image_paths) {
88
+ # OSに応じて画像を開く
89
+ if (Sys.info()["sysname"] == "Windows") {
90
+ cmd <- sprintf('start "" "%s"', path)
91
+ shell(cmd, intern = TRUE)
92
+ } else if (Sys.info()["sysname"] == "Darwin") { # macOS
93
+ cmd <- sprintf('open "%s"', path)
94
+ system(cmd)
95
+ } else { # Linux
96
+ cmd <- sprintf('xdg-open "%s"', path)
97
+ system(cmd)
98
+ }
99
+
100
+ # ユーザーにキャプションの削除を選択させる
101
+ cat(sprintf("Do you want to remove the caption '%s' from image '%s'? (yes/no/end): ", target_caption, path))
102
+ response <- readline()
103
+
104
+ if (tolower(response) == "end") {
105
+ break
106
+ } else if (tolower(response) == "yes") {
107
+ data <- remove_caption_and_adjust_order(data, path, target_caption)
108
+ }
109
+ }
110
+
111
+ return(data)
112
+ }
113
+
114
+ add_caption_at_order <- function(data, target_image_path, target_caption, target_order = NULL) {
115
+ # 指定された画像の最大のcaption_orderを取得
116
+ max_order <- max(data$caption_order[data$image_path == target_image_path], na.rm = TRUE)
117
+
118
+ # キャプションの重複チェック
119
+ if (target_caption %in% data$caption[data$image_path == target_image_path]) {
120
+ return(data) # 重複がある場合、データをそのまま返す
121
+ }
122
+
123
+ # target_orderが指定されていない場合、キャプションを表示してユーザーに選ばせる
124
+ if (is.null(target_order)) {
125
+ print_image_captions_as_csv(data, target_image_path)
126
+ cat("Enter the position (order) to insert the new caption (1 to", max_order + 1, "): ")
127
+ target_order <- as.numeric(readline())
128
+
129
+ # 不適切な値が入力された場合、最大のorder + 1で追加
130
+ if (target_order <= 0 || target_order > max_order + 1) {
131
+ target_order <- max_order + 1
132
+ }
133
+ }
134
+
135
+ # 指定されたorder以降のcaption_orderを増加
136
+ data <- data %>%
137
+ mutate(caption_order = ifelse(image_path == target_image_path & caption_order >= target_order, caption_order + 1, caption_order))
138
+
139
+ # 新しいキャプショ���を追加
140
+ new_caption <- tibble(
141
+ image_path = target_image_path,
142
+ caption_order = target_order,
143
+ caption = target_caption
144
+ )
145
+ data <- bind_rows(data, new_caption)
146
+
147
+ return(data)
148
+ }
149
+
150
+ move_caption_order <- function(data, target_image_path, target_caption, new_order) {
151
+
152
+ # キャプションが存在するか確認
153
+ if (!any(data$image_path == target_image_path & data$caption == target_caption)) {
154
+ cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path))
155
+ return(data)
156
+ }
157
+
158
+ # キャプションを削除
159
+ data_after_removal <- remove_caption_and_adjust_order(data, target_image_path, target_caption)
160
+
161
+ # 新しい位置にキャプションを追加
162
+ data_after_addition <- add_caption_at_order(data_after_removal, target_image_path, target_caption, new_order)
163
+ return(data_after_addition)
164
+ }
165
+
166
+ # キャプションが存在するか確認 (使わないかも)
167
+ is_caption_present <- function(data, target_image_path, target_caption) {
168
+ return(any(data$image_path == target_image_path & data$caption == target_caption))
169
+ }
170
+
171
+ # すべてのキャプションを表示
172
+ print_all_unique_captions_as_csv <- function(data) {
173
+ # 重複なく全てのキャプションを取得
174
+ unique_captions <- unique(data$caption)
175
+ # CSV形式で表示
176
+ cat(paste(unique_captions, collapse = ", "), "\n")
177
+ }
178
+
179
+ print_image_captions_as_csv <- function(data, target_image_path) {
180
+ captions <- filter(data, image_path == target_image_path) %>%
181
+ arrange(caption_order) %>%
182
+ pull(caption)
183
+
184
+ cat(paste(captions, collapse = ", "), "\n")
185
+ }
186
+
187
+ # 代表するキャプションに集約
188
+ remove_related_captions_except_representative <- function(data, related_captions, representative_caption, target_image_path) {
189
+
190
+ # representative_captionがtarget_image_pathに紐づいているか確認
191
+ if (!any(data$image_path == target_image_path & data$caption == representative_caption)) {
192
+ cat(sprintf("The representative caption '%s' is not associated with image '%s'.\n", representative_caption, target_image_path))
193
+ return(data)
194
+ }
195
+
196
+ # target_image_pathに関連するキャプションを削除
197
+ for (caption in related_captions) {
198
+ if (caption != representative_caption) {
199
+ data <- remove_caption_and_adjust_order(data, target_image_path, caption)
200
+ }
201
+ }
202
+
203
+ return(data)
204
+ }