######################################################################
#
# make_CaseFolding.pl - make fc() table to update Char.pm
#
# Copyright (c) 2014, 2015 INABA Hitoshi <ina@cpan.org>
#
######################################################################

use 5.00503;
use strict;

unless (@ARGV) {
    die <<END;
usage:

1. download "CaseFolding.txt" from ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt

2. $^X $0 CaseFolding.txt > fc_table.txt

3. update Char.pm using fc_table.txt

END
}

my $casefolding_file = shift @ARGV;

binmode(STDOUT);

my @script_utf8_code = ();
my @script_utf8_mapping = ();
my @script_name = ();
my %done = ();
open(CASEFOLDING,$casefolding_file) || die "Can't open file: $casefolding_file\n";
while (<CASEFOLDING>) {
    next if /^#/;
    chomp;
    if (my($code,$status,$mapping,$name) = /^([^;]+);\s*([^;]+);\s*([^;]+);\s*(#.*)$/) {

        # Usage:
        #  A. To do a simple case folding, use the mappings with status C + S.
        #  B. To do a full case folding, use the mappings with status C + F.

        if ($status =~ /[CF]/) {
            if ($done{$code}) {
                die <<END;
code $code was done.\n";
$done{$code}
$_
END
            }
            $done{$code} = $_;
            my $utf8_code = utf8($code);
            my $utf8_mapping = join('', map {utf8($_)} split(/ /,$mapping));
            push @script_utf8_code, qq{"$utf8_code"};
            push @script_utf8_mapping, qq{"$utf8_mapping",};
            push @script_name, $name;
        }
    }
}
close(CASEFOLDING);

my($length1) = sort {$b <=> $a} map {length($_)} @script_utf8_code;
my($length2) = sort {$b <=> $a} map {length($_)} @script_utf8_mapping;
while (@script_utf8_code) {
    printf(qq{        %-${length1}s => %-${length2}s %s\n},
        shift @script_utf8_code,
        shift @script_utf8_mapping,
        shift @script_name,
    );
}

sub utf8 {
    my($unicode) = @_;
    my $bin = join('',
        map {{qw(
            0 0000
            1 0001
            2 0010
            3 0011
            4 0100
            5 0101
            6 0110
            7 0111
            8 1000
            9 1001
            A 1010
            B 1011
            C 1100
            D 1101
            E 1110
            F 1111
        )}->{$_}} split(//,uc($unicode))
    );
    my $bin24 = substr(('0' x 24) . $bin, -24, 24);

    if (0) {
    } #                0123 4567 8901 2345 6789 0123
    elsif ($bin24 =~ /^0000 0000 0000 0000 0... ....$/x) { # 0..7F
        return join('', map {"\\x\U$_"}
            unpack('H2',pack('B*', '0'.substr($bin24,17,7))),
        );
    } #                0123 4567 8901 2345 6789 0123
    elsif ($bin24 =~ /^0000 0000 0000 0... .... ....$/x) { #  ..7FF
        return join('', map {"\\x\U$_"}
            unpack('H2',pack('B*', '110'.substr($bin24,13,5))),
            unpack('H2',pack('B*', '10' .substr($bin24,18,6))),
        );
    } #                0123 4567 8901 2345 6789 0123
    elsif ($bin24 =~ /^0000 0000 .... .... .... ....$/x) { #  ..FFFF
        return join('', map {"\\x\U$_"}
            unpack('H2',pack('B*', '1110'.substr($bin24, 8,4))),
            unpack('H2',pack('B*', '10'  .substr($bin24,12,6))),
            unpack('H2',pack('B*', '10'  .substr($bin24,18,6))),
        );
    } #                0123 4567 8901 2345 6789 0123
    elsif ($bin24 =~ /^000. .... .... .... .... ....$/x) { #  ..1FFFFF
        return join('', map {"\\x\U$_"}
            unpack('H2',pack('B*', '11110'.substr($bin24, 3,3))),
            unpack('H2',pack('B*', '10'   .substr($bin24, 6,6))),
            unpack('H2',pack('B*', '10'   .substr($bin24,12,6))),
            unpack('H2',pack('B*', '10'   .substr($bin24,18,6))),
        );
    }
    else {
        die "Can't encode ($unicode) into UTF-8.\n";
    }
}

__END__

=pod

=head1 NAME

make_CaseFolding.pl - make fc() table to update Char.pm

=head1 SYNOPSIS

  1. download "CaseFolding.txt" from ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
  2. perl make_CaseFolding.pl CaseFolding.txt > fc_table.txt
  3. update Char.pm using fc_table.txt

=head1 DEPENDENCIES

This software requires perl5.00503 or later.

=head1 AUTHOR

INABA Hitoshi E<lt>ina@cpan.orgE<gt>

This project was originated by INABA Hitoshi.

=head1 LICENSE AND COPYRIGHT

This software is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.

This software is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=head1 SEE ALSO

 CPAN Directory INABA Hitoshi
 http://search.cpan.org/~ina/

 BackPAN
 http://backpan.perl.org/authors/id/I/IN/INA/

 Recent Perl packages by "INABA Hitoshi"
 http://code.activestate.com/ppm/author:INABA-Hitoshi/

=cut

