Spaces:
Sleeping
Sleeping
#!/home/simon/prog/text-generation-webui/installer_files/env/bin/perl | |
eval 'exec /home/simon/prog/text-generation-webui/installer_files/env/bin/perl -S $0 ${1+"$@"}' | |
if 0; # ^ Run only under a shell | |
#!./perl | |
use 5.008001; | |
BEGIN { pop @INC if $INC[-1] eq '.' } | |
use strict; | |
use warnings; | |
use Encode; | |
use Getopt::Std; | |
use Carp; | |
use Encode::Guess; | |
$Getopt::Std::STANDARD_HELP_VERSION = 1; | |
my %opt; | |
getopts( "huSs:", \%opt ); | |
my @suspect_list; | |
list_valid_suspects() and exit if $opt{S}; | |
@suspect_list = split /:,/, $opt{s} if $opt{s}; | |
HELP_MESSAGE() if $opt{h}; | |
HELP_MESSAGE() unless @ARGV; | |
do_guess($_) for @ARGV; | |
sub read_file { | |
my $filename = shift; | |
local $/; | |
open my $fh, '<:raw', $filename or croak "$filename:$!"; | |
my $content = <$fh>; | |
close $fh; | |
return $content; | |
} | |
sub do_guess { | |
my $filename = shift; | |
my $data = read_file($filename); | |
my $enc = guess_encoding( $data, @suspect_list ); | |
if ( !ref($enc) && $opt{u} ) { | |
return 1; | |
} | |
print "$filename\t"; | |
if ( ref($enc) ) { | |
print $enc->mime_name(); | |
} | |
else { | |
print "unknown"; | |
} | |
print "\n"; | |
return 1; | |
} | |
sub list_valid_suspects { | |
print join( "\n", Encode->encodings(":all") ); | |
print "\n"; | |
return 1; | |
} | |
sub HELP_MESSAGE { | |
exec 'pod2usage', $0 or die "pod2usage: $!" | |
} | |
__END__ | |
=head1 NAME | |
encguess - guess character encodings of files | |
=head1 VERSION | |
$Id: encguess,v 0.3 2020/12/02 01:28:17 dankogai Exp dankogai $ | |
=head1 SYNOPSIS | |
encguess [switches] filename... | |
=head2 SWITCHES | |
=over 2 | |
=item -h | |
show this message and exit. | |
=item -s | |
specify a list of "suspect encoding types" to test, | |
separated by either C<:> or C<,> | |
=item -S | |
output a list of all acceptable encoding types that can be used with | |
the -s param | |
=item -u | |
suppress display of unidentified types | |
=back | |
=head2 EXAMPLES: | |
=over 2 | |
=item * | |
Guess encoding of a file named C<test.txt>, using only the default | |
suspect types. | |
encguess test.txt | |
=item * | |
Guess the encoding type of a file named C<test.txt>, using the suspect | |
types C<euc-jp,shiftjis,7bit-jis>. | |
encguess -s euc-jp,shiftjis,7bit-jis test.txt | |
encguess -s euc-jp:shiftjis:7bit-jis test.txt | |
=item * | |
Guess the encoding type of several files, do not display results for | |
unidentified files. | |
encguess -us euc-jp,shiftjis,7bit-jis test*.txt | |
=back | |
=head1 DESCRIPTION | |
The encoding identification is done by checking one encoding type at a | |
time until all but the right type are eliminated. The set of encoding | |
types to try is defined by the -s parameter and defaults to ascii, | |
utf8 and UTF-16/32 with BOM. This can be overridden by passing one or | |
more encoding types via the -s parameter. If you need to pass in | |
multiple suspect encoding types, use a quoted string with the a space | |
separating each value. | |
=head1 SEE ALSO | |
L<Encode::Guess>, L<Encode::Detect> | |
=head1 LICENSE AND COPYRIGHT | |
Copyright 2015 Michael LaGrasta and Dan Kogai. | |
This program is free software; you can redistribute it and/or modify it | |
under the terms of the the Artistic License (2.0). You may obtain a | |
copy of the full license at: | |
L<http://www.perlfoundation.org/artistic_license_2_0> | |
=cut | |