|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <config.h> |
|
|
|
|
|
#include <stdio.h> |
|
|
#include <getopt.h> |
|
|
#include <sys/types.h> |
|
|
#include "system.h" |
|
|
|
|
|
#include "assure.h" |
|
|
#include "fadvise.h" |
|
|
#include "getndelim2.h" |
|
|
|
|
|
#include "set-fields.h" |
|
|
|
|
|
|
|
|
#define PROGRAM_NAME "cut" |
|
|
|
|
|
#define AUTHORS \ |
|
|
proper_name ("David M. Ihnat"), \ |
|
|
proper_name ("David MacKenzie"), \ |
|
|
proper_name ("Jim Meyering") |
|
|
|
|
|
#define FATAL_ERROR(Message) \ |
|
|
do \ |
|
|
{ \ |
|
|
error (0, 0, (Message)); \ |
|
|
usage (EXIT_FAILURE); \ |
|
|
} \ |
|
|
while (0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static struct field_range_pair *current_rp; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static char *field_1_buffer; |
|
|
|
|
|
|
|
|
static size_t field_1_bufsize; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static bool suppress_non_delimited; |
|
|
|
|
|
|
|
|
|
|
|
static bool complement; |
|
|
|
|
|
|
|
|
static unsigned char delim; |
|
|
|
|
|
|
|
|
static unsigned char line_delim = '\n'; |
|
|
|
|
|
|
|
|
static size_t output_delimiter_length; |
|
|
|
|
|
|
|
|
|
|
|
static char *output_delimiter_string; |
|
|
|
|
|
|
|
|
static char output_delimiter_default[1]; |
|
|
|
|
|
|
|
|
static bool have_read_stdin; |
|
|
|
|
|
|
|
|
|
|
|
enum |
|
|
{ |
|
|
OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, |
|
|
COMPLEMENT_OPTION |
|
|
}; |
|
|
|
|
|
static struct option const longopts[] = |
|
|
{ |
|
|
{"bytes", required_argument, nullptr, 'b'}, |
|
|
{"characters", required_argument, nullptr, 'c'}, |
|
|
{"fields", required_argument, nullptr, 'f'}, |
|
|
{"delimiter", required_argument, nullptr, 'd'}, |
|
|
{"only-delimited", no_argument, nullptr, 's'}, |
|
|
{"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION}, |
|
|
{"complement", no_argument, nullptr, COMPLEMENT_OPTION}, |
|
|
{"zero-terminated", no_argument, nullptr, 'z'}, |
|
|
{GETOPT_HELP_OPTION_DECL}, |
|
|
{GETOPT_VERSION_OPTION_DECL}, |
|
|
{nullptr, 0, nullptr, 0} |
|
|
}; |
|
|
|
|
|
void |
|
|
usage (int status) |
|
|
{ |
|
|
if (status != EXIT_SUCCESS) |
|
|
emit_try_help (); |
|
|
else |
|
|
{ |
|
|
printf (_("\ |
|
|
Usage: %s OPTION... [FILE]...\n\ |
|
|
"), |
|
|
program_name); |
|
|
fputs (_("\ |
|
|
Print selected parts of lines from each FILE to standard output.\n\ |
|
|
"), stdout); |
|
|
|
|
|
emit_stdin_note (); |
|
|
emit_mandatory_arg_note (); |
|
|
|
|
|
fputs (_("\ |
|
|
-b, --bytes=LIST select only these bytes\n\ |
|
|
-c, --characters=LIST select only these characters\n\ |
|
|
-d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
-f, --fields=LIST select only these fields; also print any line\n\ |
|
|
that contains no delimiter character, unless\n\ |
|
|
the -s option is specified\n\ |
|
|
-n (ignored)\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
--complement complement the set of selected bytes, characters\n\ |
|
|
or fields\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
-s, --only-delimited do not print lines not containing delimiters\n\ |
|
|
--output-delimiter=STRING use STRING as the output delimiter\n\ |
|
|
the default is to use the input delimiter\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
-z, --zero-terminated line delimiter is NUL, not newline\n\ |
|
|
"), stdout); |
|
|
fputs (HELP_OPTION_DESCRIPTION, stdout); |
|
|
fputs (VERSION_OPTION_DESCRIPTION, stdout); |
|
|
fputs (_("\ |
|
|
\n\ |
|
|
Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ |
|
|
range, or many ranges separated by commas. Selected input is written\n\ |
|
|
in the same order that it is read, and is written exactly once.\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
Each range is one of:\n\ |
|
|
\n\ |
|
|
N N'th byte, character or field, counted from 1\n\ |
|
|
N- from N'th byte, character or field, to end of line\n\ |
|
|
N-M from N'th to M'th (included) byte, character or field\n\ |
|
|
-M from first to M'th (included) byte, character or field\n\ |
|
|
"), stdout); |
|
|
emit_ancillary_info (PROGRAM_NAME); |
|
|
} |
|
|
exit (status); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline void |
|
|
next_item (uintmax_t *item_idx) |
|
|
{ |
|
|
(*item_idx)++; |
|
|
if ((*item_idx) > current_rp->hi) |
|
|
current_rp++; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
print_kth (uintmax_t k) |
|
|
{ |
|
|
return current_rp->lo <= k; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
is_range_start_index (uintmax_t k) |
|
|
{ |
|
|
return k == current_rp->lo; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void |
|
|
cut_bytes (FILE *stream) |
|
|
{ |
|
|
uintmax_t byte_idx; |
|
|
|
|
|
|
|
|
bool print_delimiter; |
|
|
|
|
|
byte_idx = 0; |
|
|
print_delimiter = false; |
|
|
current_rp = frp; |
|
|
while (true) |
|
|
{ |
|
|
int c; |
|
|
|
|
|
c = getc (stream); |
|
|
|
|
|
if (c == line_delim) |
|
|
{ |
|
|
if (putchar (c) < 0) |
|
|
write_error (); |
|
|
byte_idx = 0; |
|
|
print_delimiter = false; |
|
|
current_rp = frp; |
|
|
} |
|
|
else if (c == EOF) |
|
|
{ |
|
|
if (byte_idx > 0) |
|
|
{ |
|
|
if (putchar (line_delim) < 0) |
|
|
write_error (); |
|
|
} |
|
|
break; |
|
|
} |
|
|
else |
|
|
{ |
|
|
next_item (&byte_idx); |
|
|
if (print_kth (byte_idx)) |
|
|
{ |
|
|
if (output_delimiter_string != output_delimiter_default) |
|
|
{ |
|
|
if (print_delimiter && is_range_start_index (byte_idx)) |
|
|
{ |
|
|
if (fwrite (output_delimiter_string, sizeof (char), |
|
|
output_delimiter_length, stdout) |
|
|
!= output_delimiter_length) |
|
|
write_error (); |
|
|
} |
|
|
print_delimiter = true; |
|
|
} |
|
|
|
|
|
if (putchar (c) < 0) |
|
|
write_error (); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void |
|
|
cut_fields (FILE *stream) |
|
|
{ |
|
|
int c; |
|
|
uintmax_t field_idx = 1; |
|
|
bool found_any_selected_field = false; |
|
|
bool buffer_first_field; |
|
|
|
|
|
current_rp = frp; |
|
|
|
|
|
c = getc (stream); |
|
|
if (c == EOF) |
|
|
return; |
|
|
|
|
|
ungetc (c, stream); |
|
|
c = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); |
|
|
|
|
|
while (true) |
|
|
{ |
|
|
if (field_idx == 1 && buffer_first_field) |
|
|
{ |
|
|
ssize_t len; |
|
|
size_t n_bytes; |
|
|
|
|
|
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, |
|
|
GETNLINE_NO_LIMIT, delim, line_delim, stream); |
|
|
if (len < 0) |
|
|
{ |
|
|
free (field_1_buffer); |
|
|
field_1_buffer = nullptr; |
|
|
if (ferror (stream) || feof (stream)) |
|
|
break; |
|
|
xalloc_die (); |
|
|
} |
|
|
|
|
|
n_bytes = len; |
|
|
affirm (n_bytes != 0); |
|
|
|
|
|
c = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) |
|
|
{ |
|
|
if (suppress_non_delimited) |
|
|
{ |
|
|
|
|
|
} |
|
|
else |
|
|
{ |
|
|
if (fwrite (field_1_buffer, sizeof (char), n_bytes, stdout) |
|
|
!= n_bytes) |
|
|
write_error (); |
|
|
|
|
|
if (field_1_buffer[n_bytes - 1] != line_delim) |
|
|
{ |
|
|
if (putchar (line_delim) < 0) |
|
|
write_error (); |
|
|
} |
|
|
c = line_delim; |
|
|
} |
|
|
continue; |
|
|
} |
|
|
|
|
|
if (print_kth (1)) |
|
|
{ |
|
|
|
|
|
if (fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout) |
|
|
!= n_bytes - 1) |
|
|
write_error (); |
|
|
|
|
|
|
|
|
if (delim == line_delim) |
|
|
{ |
|
|
int last_c = getc (stream); |
|
|
if (last_c != EOF) |
|
|
{ |
|
|
ungetc (last_c, stream); |
|
|
found_any_selected_field = true; |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
found_any_selected_field = true; |
|
|
} |
|
|
} |
|
|
next_item (&field_idx); |
|
|
} |
|
|
|
|
|
int prev_c = c; |
|
|
|
|
|
if (print_kth (field_idx)) |
|
|
{ |
|
|
if (found_any_selected_field) |
|
|
{ |
|
|
if (fwrite (output_delimiter_string, sizeof (char), |
|
|
output_delimiter_length, stdout) |
|
|
!= output_delimiter_length) |
|
|
write_error (); |
|
|
} |
|
|
found_any_selected_field = true; |
|
|
|
|
|
while ((c = getc (stream)) != delim && c != line_delim && c != EOF) |
|
|
{ |
|
|
if (putchar (c) < 0) |
|
|
write_error (); |
|
|
prev_c = c; |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
while ((c = getc (stream)) != delim && c != line_delim && c != EOF) |
|
|
prev_c = c; |
|
|
} |
|
|
|
|
|
|
|
|
if (delim == line_delim && c == delim) |
|
|
{ |
|
|
int last_c = getc (stream); |
|
|
if (last_c != EOF) |
|
|
ungetc (last_c, stream); |
|
|
else |
|
|
c = last_c; |
|
|
} |
|
|
|
|
|
if (c == delim) |
|
|
next_item (&field_idx); |
|
|
else if (c == line_delim || c == EOF) |
|
|
{ |
|
|
if (found_any_selected_field |
|
|
|| !(suppress_non_delimited && field_idx == 1)) |
|
|
{ |
|
|
|
|
|
if (c == line_delim || prev_c != line_delim |
|
|
|| delim == line_delim) |
|
|
{ |
|
|
if (putchar (line_delim) < 0) |
|
|
write_error (); |
|
|
} |
|
|
} |
|
|
if (c == EOF) |
|
|
break; |
|
|
|
|
|
|
|
|
field_idx = 1; |
|
|
current_rp = frp; |
|
|
found_any_selected_field = false; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static bool |
|
|
cut_file (char const *file, void (*cut_stream) (FILE *)) |
|
|
{ |
|
|
FILE *stream; |
|
|
|
|
|
if (streq (file, "-")) |
|
|
{ |
|
|
have_read_stdin = true; |
|
|
stream = stdin; |
|
|
assume (stream); |
|
|
} |
|
|
else |
|
|
{ |
|
|
stream = fopen (file, "r"); |
|
|
if (stream == nullptr) |
|
|
{ |
|
|
error (0, errno, "%s", quotef (file)); |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
fadvise (stream, FADVISE_SEQUENTIAL); |
|
|
|
|
|
cut_stream (stream); |
|
|
|
|
|
int err = errno; |
|
|
if (!ferror (stream)) |
|
|
err = 0; |
|
|
if (streq (file, "-")) |
|
|
clearerr (stream); |
|
|
else if (fclose (stream) == EOF) |
|
|
err = errno; |
|
|
if (err) |
|
|
{ |
|
|
error (0, err, "%s", quotef (file)); |
|
|
return false; |
|
|
} |
|
|
return true; |
|
|
} |
|
|
|
|
|
int |
|
|
main (int argc, char **argv) |
|
|
{ |
|
|
int optc; |
|
|
bool ok; |
|
|
bool delim_specified = false; |
|
|
bool byte_mode = false; |
|
|
char *spec_list_string = nullptr; |
|
|
|
|
|
initialize_main (&argc, &argv); |
|
|
set_program_name (argv[0]); |
|
|
setlocale (LC_ALL, ""); |
|
|
bindtextdomain (PACKAGE, LOCALEDIR); |
|
|
textdomain (PACKAGE); |
|
|
|
|
|
atexit (close_stdout); |
|
|
|
|
|
|
|
|
suppress_non_delimited = false; |
|
|
|
|
|
delim = '\0'; |
|
|
have_read_stdin = false; |
|
|
|
|
|
while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, nullptr)) |
|
|
!= -1) |
|
|
{ |
|
|
switch (optc) |
|
|
{ |
|
|
case 'b': |
|
|
case 'c': |
|
|
|
|
|
byte_mode = true; |
|
|
FALLTHROUGH; |
|
|
case 'f': |
|
|
|
|
|
if (spec_list_string) |
|
|
FATAL_ERROR (_("only one list may be specified")); |
|
|
spec_list_string = optarg; |
|
|
break; |
|
|
|
|
|
case 'd': |
|
|
|
|
|
|
|
|
if (optarg[0] != '\0' && optarg[1] != '\0') |
|
|
FATAL_ERROR (_("the delimiter must be a single character")); |
|
|
delim = optarg[0]; |
|
|
delim_specified = true; |
|
|
break; |
|
|
|
|
|
case OUTPUT_DELIMITER_OPTION: |
|
|
|
|
|
|
|
|
output_delimiter_length = (optarg[0] == '\0' |
|
|
? 1 : strlen (optarg)); |
|
|
output_delimiter_string = optarg; |
|
|
break; |
|
|
|
|
|
case 'n': |
|
|
break; |
|
|
|
|
|
case 's': |
|
|
suppress_non_delimited = true; |
|
|
break; |
|
|
|
|
|
case 'z': |
|
|
line_delim = '\0'; |
|
|
break; |
|
|
|
|
|
case COMPLEMENT_OPTION: |
|
|
complement = true; |
|
|
break; |
|
|
|
|
|
case_GETOPT_HELP_CHAR; |
|
|
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); |
|
|
default: |
|
|
usage (EXIT_FAILURE); |
|
|
} |
|
|
} |
|
|
|
|
|
if (!spec_list_string) |
|
|
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); |
|
|
|
|
|
if (byte_mode) |
|
|
{ |
|
|
if (delim_specified) |
|
|
FATAL_ERROR (_("an input delimiter may be specified only\ |
|
|
when operating on fields")); |
|
|
|
|
|
if (suppress_non_delimited) |
|
|
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ |
|
|
\tonly when operating on fields")); |
|
|
} |
|
|
|
|
|
set_fields (spec_list_string, |
|
|
((byte_mode ? SETFLD_ERRMSG_USE_POS : 0) |
|
|
| (complement ? SETFLD_COMPLEMENT : 0))); |
|
|
|
|
|
if (!delim_specified) |
|
|
delim = '\t'; |
|
|
|
|
|
if (output_delimiter_string == nullptr) |
|
|
{ |
|
|
output_delimiter_default[0] = delim; |
|
|
output_delimiter_string = output_delimiter_default; |
|
|
output_delimiter_length = 1; |
|
|
} |
|
|
|
|
|
void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields; |
|
|
if (optind == argc) |
|
|
ok = cut_file ("-", cut_stream); |
|
|
else |
|
|
for (ok = true; optind < argc; optind++) |
|
|
ok &= cut_file (argv[optind], cut_stream); |
|
|
|
|
|
|
|
|
if (have_read_stdin && fclose (stdin) == EOF) |
|
|
{ |
|
|
error (0, errno, "-"); |
|
|
ok = false; |
|
|
} |
|
|
|
|
|
return ok ? EXIT_SUCCESS : EXIT_FAILURE; |
|
|
} |
|
|
|