@@ -17,17 +17,16 @@ use File::Spec;
1717
1818no warnings ' utf8' ;
1919
20- our $VERSION = ' 1.19 ' ;
20+ our $VERSION = ' 1.25 ' ;
2121our $PACKAGE = __PACKAGE__ ;
2222
2323# ## begin XS only ###
24- require DynaLoader;
25- our @ISA = qw( DynaLoader) ;
26- bootstrap Unicode::Collate $VERSION ;
24+ use XSLoader ();
25+ XSLoader::load(' Unicode::Collate' , $VERSION );
2726# ## end XS only ###
2827
2928my @Path = qw( Unicode Collate) ;
30- my $KeyFile = " allkeys.txt" ;
29+ my $KeyFile = ' allkeys.txt' ;
3130
3231# Perl's boolean
3332use constant TRUE => 1;
@@ -89,9 +88,9 @@ my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
8988my $HighestVCE = pack (VCE_TEMPLATE, 0, 0xFFFE, 0x20, 0x5, 0xFFFF);
9089my $minimalVCE = pack (VCE_TEMPLATE, 0, 1, 0x20, 0x5, 0xFFFE);
9190
92- sub UCA_Version { " 32 " }
91+ sub UCA_Version { ' 34 ' }
9392
94- sub Base_Unicode_Version { " 8 .0.0" }
93+ sub Base_Unicode_Version { ' 9 .0.0' }
9594
9695# #####
9796
@@ -190,11 +189,13 @@ my %DerivCode = (
190189 28 => \&_derivCE_24, # 28 == 24
191190 30 => \&_derivCE_24, # 30 == 24
192191 32 => \&_derivCE_32,
192+ 34 => \&_derivCE_34,
193+ 36 => \&_derivCE_36,
193194);
194195
195196sub checkCollator {
196197 my $self = shift ;
197- _checkLevel($self -> {level }, " level" );
198+ _checkLevel($self -> {level }, ' level' );
198199
199200 $self -> {derivCode } = $DerivCode { $self -> {UCA_Version } }
200201 or croak " Illegal UCA version (passed $self ->{UCA_Version})." ;
@@ -208,13 +209,13 @@ sub checkCollator {
208209 if (! defined $self -> {backwards }) {
209210 $self -> {backwardsFlag } = 0;
210211 } elsif (! ref $self -> {backwards }) {
211- _checkLevel($self -> {backwards }, " backwards" );
212+ _checkLevel($self -> {backwards }, ' backwards' );
212213 $self -> {backwardsFlag } = 1 << $self -> {backwards };
213214 } else {
214215 my %level ;
215216 $self -> {backwardsFlag } = 0;
216217 for my $b (@{ $self -> {backwards } }) {
217- _checkLevel($b , " backwards" );
218+ _checkLevel($b , ' backwards' );
218219 $level {$b } = 1;
219220 }
220221 for my $v (sort keys %level ) {
@@ -439,13 +440,17 @@ sub parseEntry
439440 # if and only if "all" CEs are [.0000.0000.0000].
440441 }
441442
443+ # mapping: be an array ref or not exists (any false value is disallowed)
442444 $self -> {mapping }{$entry } = $is_L3_ignorable ? [] : \@key ;
443445
446+ # maxlength: be more than 1 or not exists (any false value is disallowed)
444447 if (@uv > 1) {
445448 if (!$self -> {maxlength }{$uv [0]} || $self -> {maxlength }{$uv [0]} < @uv ) {
446449 $self -> {maxlength }{$uv [0]} = @uv ;
447450 }
448451 }
452+
453+ # contraction: be 1 or not exists (any false value is disallowed)
449454 while (@uv > 2) {
450455 pop @uv ;
451456 my $fake_entry = join (CODE_SEP, @uv ); # in JCPS
@@ -514,7 +519,7 @@ sub splitEnt
514519 if ($vers <= 20 && _isIllegal($src [$i ])) {
515520 $src [$i ] = undef ;
516521 } elsif ($ver9 ) {
517- $src [$i ] = undef if $map -> { $src [$i ] }
522+ $src [$i ] = undef if exists $map -> { $src [$i ] }
518523 ? @{ $map -> { $src [$i ] } } == 0
519524 : $uXS && _ignorable_simple($src [$i ]); # ## XS only
520525 }
@@ -534,7 +539,7 @@ sub splitEnt
534539 my $i_orig = $i ;
535540
536541 # find contraction
537- if ($max -> {$jcps }) {
542+ if (exists $max -> {$jcps }) {
538543 my $temp_jcps = $jcps ;
539544 my $jcpsLen = 1;
540545 my $maxLen = $max -> {$jcps };
@@ -543,7 +548,7 @@ sub splitEnt
543548 next if ! defined $src [$p ];
544549 $temp_jcps .= CODE_SEP . $src [$p ];
545550 $jcpsLen ++;
546- if ($map -> {$temp_jcps }) {
551+ if (exists $map -> {$temp_jcps }) {
547552 $jcps = $temp_jcps ;
548553 $i = $p ;
549554 }
@@ -570,7 +575,7 @@ sub splitEnt
570575 last unless $curCC ;
571576 my $tail = CODE_SEP . $src [$p ];
572577
573- if ($preCC != $curCC && $map -> {$jcps .$tail }) {
578+ if ($preCC != $curCC && exists $map -> {$jcps .$tail }) {
574579 $jcps .= $tail ;
575580 push @out , $p ;
576581 } else {
@@ -579,16 +584,17 @@ sub splitEnt
579584
580585 next if !$long ;
581586
582- if ($preCC_uc != $curCC && ($map -> {$jcps_uc .$tail } ||
583- $cont -> {$jcps_uc .$tail })) {
587+ if ($preCC_uc != $curCC &&
588+ (exists $map -> {$jcps_uc .$tail } ||
589+ exists $cont -> {$jcps_uc .$tail })) {
584590 $jcps_uc .= $tail ;
585591 push @out_uc , $p ;
586592 } else {
587593 $preCC_uc = $curCC ;
588594 }
589595 }
590596
591- if (@out_uc && $map -> {$jcps_uc }) {
597+ if (@out_uc && exists $map -> {$jcps_uc }) {
592598 $jcps = $jcps_uc ;
593599 $src [$_ ] = undef for @out_uc ;
594600 } else {
@@ -598,7 +604,7 @@ sub splitEnt
598604 }
599605
600606 # skip completely ignorable
601- if ($map -> {$jcps } ? @{ $map -> {$jcps } } == 0 :
607+ if (exists $map -> {$jcps } ? @{ $map -> {$jcps } } == 0 :
602608 $uXS && $jcps !~ / ;/ && _ignorable_simple($jcps )) { # ## XS only
603609 if ($wLen && @buf ) {
604610 $buf [-1][2] = $i + 1;
@@ -647,7 +653,7 @@ sub getWt
647653 $u = 0xFFFD if $u !~ / ;/ && 0x10FFFF < $u && !$out ;
648654
649655 my @ce ;
650- if ($map -> {$u }) {
656+ if (exists $map -> {$u }) {
651657 @ce = @{ $map -> {$u } }; # $u may be a contraction
652658# ## begin XS only ###
653659 } elsif ($uXS && _exists_simple($u )) {
@@ -665,27 +671,27 @@ sub getWt
665671
666672 if (@decH == 2) {
667673 my $contract = join (CODE_SEP, @decH );
668- @decH = ($contract ) if $map -> {$contract };
674+ @decH = ($contract ) if exists $map -> {$contract };
669675 } else { # must be <@decH == 3>
670- if ($max -> {$decH [0]}) {
676+ if (exists $max -> {$decH [0]}) {
671677 my $contract = join (CODE_SEP, @decH );
672- if ($map -> {$contract }) {
678+ if (exists $map -> {$contract }) {
673679 @decH = ($contract );
674680 } else {
675681 $contract = join (CODE_SEP, @decH [0,1]);
676- $map -> {$contract } and @decH = ($contract , $decH [2]);
682+ exists $map -> {$contract } and @decH = ($contract , $decH [2]);
677683 }
678684 # even if V's ignorable, LT contraction is not supported.
679685 # If such a situation were required, NFD should be used.
680686 }
681- if (@decH == 3 && $max -> {$decH [1]}) {
687+ if (@decH == 3 && exists $max -> {$decH [1]}) {
682688 my $contract = join (CODE_SEP, @decH [1,2]);
683- $map -> {$contract } and @decH = ($decH [0], $contract );
689+ exists $map -> {$contract } and @decH = ($decH [0], $contract );
684690 }
685691 }
686692
687693 @ce = map ({
688- $map -> {$_ } ? @{ $map -> {$_ } } :
694+ exists $map -> {$_ } ? @{ $map -> {$_ } } :
689695 $uXS && _exists_simple($_ ) ? _fetch_simple($_ ) : # ## XS only
690696 $der -> ($_ );
691697 } @decH );
@@ -1098,7 +1104,7 @@ If the revision (previously "tracking version") number of UCA is given,
10981104behavior of that revision is emulated on collating.
10991105If omitted, the return value of C<UCA_Version() > is used.
11001106
1101- The following revisions are supported. The default is 32 .
1107+ The following revisions are supported. The default is 34 .
11021108
11031109 UCA Unicode Standard DUCET (@version)
11041110 -------------------------------------------------------
@@ -1115,6 +1121,8 @@ The following revisions are supported. The default is 32.
11151121 28 6.3.0 6.3.0 (6.3.0)
11161122 30 7.0.0 7.0.0 (7.0.0)
11171123 32 8.0.0 8.0.0 (8.0.0)
1124+ 34 9.0.0 9.0.0 (9.0.0)
1125+ 36 10.0.0 10.0.0(10.0.0)
11181126
11191127* See below for C<long_contraction > with C<UCA_Version > 22 and 24.
11201128
@@ -1454,13 +1462,15 @@ those in the CJK Unified Ideographs Extension A etc.
14541462 U+4E00..U+9FC3 if UCA_Version is 18.
14551463 U+4E00..U+9FCB if UCA_Version is 20 or 22.
14561464 U+4E00..U+9FCC if UCA_Version is 24 to 30.
1457- U+4E00..U+9FD5 if UCA_Version is 32.
1465+ U+4E00..U+9FD5 if UCA_Version is 32 or 34.
1466+ U+4E00..U+9FEA if UCA_Version is 36.
14581467
14591468 In the CJK Unified Ideographs Extension blocks:
14601469 Ext.A (U+3400..U+4DB5) and Ext.B (U+20000..U+2A6D6) in any UCA_Version.
14611470 Ext.C (U+2A700..U+2B734) if UCA_Version is 20 or later.
14621471 Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or later.
1463- Ext.E (U+2B820..U+2CEA1) if UCA_Version is 32.
1472+ Ext.E (U+2B820..U+2CEA1) if UCA_Version is 32 or later.
1473+ Ext.F (U+2CEB0..U+2EBE0) if UCA_Version is 36.
14641474
14651475Through C<overrideCJK > , ordering of CJK unified ideographs (including
14661476extensions) can be overridden.
@@ -2037,7 +2047,8 @@ The most preferable one is "The Default Unicode Collation Element Table"
20372047
20382048 http://www.unicode.org/Public/UCA/
20392049
2040- http://www.unicode.org/Public/UCA/latest/allkeys.txt (latest version)
2050+ http://www.unicode.org/Public/UCA/latest/allkeys.txt
2051+ (latest version)
20412052
20422053If DUCET is not installed, it is recommended to copy the file
20432054from http://www.unicode.org/Public/UCA/latest/allkeys.txt
@@ -2081,15 +2092,15 @@ B<Unicode::Normalize is required to try The Conformance Test.>
20812092=head1 AUTHOR, COPYRIGHT AND LICENSE
20822093
20832094The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
2084- <SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2016 ,
2095+ <SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2017 ,
20852096SADAHIRO Tomoyuki. Japan. All rights reserved.
20862097
20872098This module is free software; you can redistribute it and/or
20882099modify it under the same terms as Perl itself.
20892100
20902101The file Unicode/Collate/allkeys.txt was copied verbatim
2091- from L<http://www.unicode.org/Public/UCA/8 .0.0/allkeys.txt> .
2092- For this file, Copyright (c) 2001-2015 Unicode, Inc.; distributed
2102+ from L<http://www.unicode.org/Public/UCA/9 .0.0/allkeys.txt> .
2103+ For this file, Copyright (c) 2016 Unicode, Inc.; distributed
20932104under the Terms of Use in L<http://www.unicode.org/terms_of_use.html>
20942105
20952106=head1 SEE ALSO
0 commit comments