Skip to content
This repository was archived by the owner on Jun 1, 2023. It is now read-only.

Commit f73c6ab

Browse files
committed
Unicode-Collate-1.25
1 parent ed7ecb8 commit f73c6ab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+35144
-33886
lines changed

MANIFEST

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3021,6 +3021,7 @@ cpan/Unicode-Collate/Collate/Locale/cy.pl Unicode::Collate
30213021
cpan/Unicode-Collate/Collate/Locale/da.pl Unicode::Collate
30223022
cpan/Unicode-Collate/Collate/Locale/de_at_ph.pl
30233023
cpan/Unicode-Collate/Collate/Locale/de_phone.pl Unicode::Collate
3024+
cpan/Unicode-Collate/Collate/Locale/dsb.pl
30243025
cpan/Unicode-Collate/Collate/Locale/ee.pl
30253026
cpan/Unicode-Collate/Collate/Locale/eo.pl Unicode::Collate
30263027
cpan/Unicode-Collate/Collate/Locale/es.pl Unicode::Collate
@@ -3048,6 +3049,7 @@ cpan/Unicode-Collate/Collate/Locale/kl.pl Unicode::Collate
30483049
cpan/Unicode-Collate/Collate/Locale/kn.pl Unicode::Collate
30493050
cpan/Unicode-Collate/Collate/Locale/kok.pl Unicode::Collate
30503051
cpan/Unicode-Collate/Collate/Locale/ko.pl Unicode::Collate
3052+
cpan/Unicode-Collate/Collate/Locale/lkt.pl
30513053
cpan/Unicode-Collate/Collate/Locale/ln.pl Unicode::Collate
30523054
cpan/Unicode-Collate/Collate/Locale/lt.pl Unicode::Collate
30533055
cpan/Unicode-Collate/Collate/Locale/lv.pl Unicode::Collate
@@ -3138,6 +3140,7 @@ cpan/Unicode-Collate/t/loc_da.t Unicode::Collate
31383140
cpan/Unicode-Collate/t/loc_deat.t
31393141
cpan/Unicode-Collate/t/loc_deph.t Unicode::Collate
31403142
cpan/Unicode-Collate/t/loc_de.t Unicode::Collate
3143+
cpan/Unicode-Collate/t/loc_dsb.t
31413144
cpan/Unicode-Collate/t/loc_ee.t
31423145
cpan/Unicode-Collate/t/loc_eo.t Unicode::Collate
31433146
cpan/Unicode-Collate/t/loc_es.t Unicode::Collate
@@ -3168,6 +3171,7 @@ cpan/Unicode-Collate/t/loc_kl.t Unicode::Collate
31683171
cpan/Unicode-Collate/t/loc_kn.t Unicode::Collate
31693172
cpan/Unicode-Collate/t/loc_kok.t Unicode::Collate
31703173
cpan/Unicode-Collate/t/loc_ko.t Unicode::Collate
3174+
cpan/Unicode-Collate/t/loc_lkt.t
31713175
cpan/Unicode-Collate/t/loc_ln.t Unicode::Collate
31723176
cpan/Unicode-Collate/t/loc_lt.t Unicode::Collate
31733177
cpan/Unicode-Collate/t/loc_lv.t Unicode::Collate
@@ -3221,11 +3225,13 @@ cpan/Unicode-Collate/t/loc_zhzy.t
32213225
cpan/Unicode-Collate/t/nonchar.t Unicode::Collate
32223226
cpan/Unicode-Collate/t/normal.t Unicode::Collate
32233227
cpan/Unicode-Collate/t/notable.t
3228+
cpan/Unicode-Collate/t/nushu.t
32243229
cpan/Unicode-Collate/t/overcjk0.t Unicode::Collate
32253230
cpan/Unicode-Collate/t/overcjk1.t Unicode::Collate
32263231
cpan/Unicode-Collate/t/override.t Unicode::Collate
32273232
cpan/Unicode-Collate/t/rearrang.t Unicode::Collate
32283233
cpan/Unicode-Collate/t/rewrite.t Unicode::Collate
3234+
cpan/Unicode-Collate/t/tangut.t
32293235
cpan/Unicode-Collate/t/test.t Unicode::Collate
32303236
cpan/Unicode-Collate/t/trailwt.t Unicode::Collate
32313237
cpan/Unicode-Collate/t/variable.t Unicode::Collate

Porting/Maintainers.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1486,7 +1486,7 @@ package Maintainers;
14861486
},
14871487

14881488
'Unicode::Collate' => {
1489-
'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-1.19.tar.gz',
1489+
'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-1.25.tar.gz',
14901490
'FILES' => q[cpan/Unicode-Collate],
14911491
'EXCLUDED' => [
14921492
qr{N$},

cpan/Unicode-Collate/Collate.pm

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,16 @@ use File::Spec;
1717

1818
no warnings 'utf8';
1919

20-
our $VERSION = '1.19';
20+
our $VERSION = '1.25';
2121
our $PACKAGE = __PACKAGE__;
2222

2323
### begin XS only ###
24-
require DynaLoader;
25-
our @ISA = qw(DynaLoader);
26-
bootstrap Unicode::Collate $VERSION;
24+
use XSLoader ();
25+
XSLoader::load('Unicode::Collate', $VERSION);
2726
### end XS only ###
2827

2928
my @Path = qw(Unicode Collate);
30-
my $KeyFile = "allkeys.txt";
29+
my $KeyFile = 'allkeys.txt';
3130

3231
# Perl's boolean
3332
use constant TRUE => 1;
@@ -89,9 +88,9 @@ my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
8988
my $HighestVCE = pack(VCE_TEMPLATE, 0, 0xFFFE, 0x20, 0x5, 0xFFFF);
9089
my $minimalVCE = pack(VCE_TEMPLATE, 0, 1, 0x20, 0x5, 0xFFFE);
9190

92-
sub UCA_Version { "32" }
91+
sub UCA_Version { '34' }
9392

94-
sub Base_Unicode_Version { "8.0.0" }
93+
sub Base_Unicode_Version { '9.0.0' }
9594

9695
######
9796

@@ -190,11 +189,13 @@ my %DerivCode = (
190189
28 => \&_derivCE_24, # 28 == 24
191190
30 => \&_derivCE_24, # 30 == 24
192191
32 => \&_derivCE_32,
192+
34 => \&_derivCE_34,
193+
36 => \&_derivCE_36,
193194
);
194195

195196
sub checkCollator {
196197
my $self = shift;
197-
_checkLevel($self->{level}, "level");
198+
_checkLevel($self->{level}, 'level');
198199

199200
$self->{derivCode} = $DerivCode{ $self->{UCA_Version} }
200201
or croak "Illegal UCA version (passed $self->{UCA_Version}).";
@@ -208,13 +209,13 @@ sub checkCollator {
208209
if (! defined $self->{backwards}) {
209210
$self->{backwardsFlag} = 0;
210211
} elsif (! ref $self->{backwards}) {
211-
_checkLevel($self->{backwards}, "backwards");
212+
_checkLevel($self->{backwards}, 'backwards');
212213
$self->{backwardsFlag} = 1 << $self->{backwards};
213214
} else {
214215
my %level;
215216
$self->{backwardsFlag} = 0;
216217
for my $b (@{ $self->{backwards} }) {
217-
_checkLevel($b, "backwards");
218+
_checkLevel($b, 'backwards');
218219
$level{$b} = 1;
219220
}
220221
for my $v (sort keys %level) {
@@ -439,13 +440,17 @@ sub parseEntry
439440
# if and only if "all" CEs are [.0000.0000.0000].
440441
}
441442

443+
# mapping: be an array ref or not exists (any false value is disallowed)
442444
$self->{mapping}{$entry} = $is_L3_ignorable ? [] : \@key;
443445

446+
# maxlength: be more than 1 or not exists (any false value is disallowed)
444447
if (@uv > 1) {
445448
if (!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv) {
446449
$self->{maxlength}{$uv[0]} = @uv;
447450
}
448451
}
452+
453+
# contraction: be 1 or not exists (any false value is disallowed)
449454
while (@uv > 2) {
450455
pop @uv;
451456
my $fake_entry = join(CODE_SEP, @uv); # in JCPS
@@ -514,7 +519,7 @@ sub splitEnt
514519
if ($vers <= 20 && _isIllegal($src[$i])) {
515520
$src[$i] = undef;
516521
} elsif ($ver9) {
517-
$src[$i] = undef if $map->{ $src[$i] }
522+
$src[$i] = undef if exists $map->{ $src[$i] }
518523
? @{ $map->{ $src[$i] } } == 0
519524
: $uXS && _ignorable_simple($src[$i]); ### XS only
520525
}
@@ -534,7 +539,7 @@ sub splitEnt
534539
my $i_orig = $i;
535540

536541
# find contraction
537-
if ($max->{$jcps}) {
542+
if (exists $max->{$jcps}) {
538543
my $temp_jcps = $jcps;
539544
my $jcpsLen = 1;
540545
my $maxLen = $max->{$jcps};
@@ -543,7 +548,7 @@ sub splitEnt
543548
next if ! defined $src[$p];
544549
$temp_jcps .= CODE_SEP . $src[$p];
545550
$jcpsLen++;
546-
if ($map->{$temp_jcps}) {
551+
if (exists $map->{$temp_jcps}) {
547552
$jcps = $temp_jcps;
548553
$i = $p;
549554
}
@@ -570,7 +575,7 @@ sub splitEnt
570575
last unless $curCC;
571576
my $tail = CODE_SEP . $src[$p];
572577

573-
if ($preCC != $curCC && $map->{$jcps.$tail}) {
578+
if ($preCC != $curCC && exists $map->{$jcps.$tail}) {
574579
$jcps .= $tail;
575580
push @out, $p;
576581
} else {
@@ -579,16 +584,17 @@ sub splitEnt
579584

580585
next if !$long;
581586

582-
if ($preCC_uc != $curCC && ($map->{$jcps_uc.$tail} ||
583-
$cont->{$jcps_uc.$tail})) {
587+
if ($preCC_uc != $curCC &&
588+
(exists $map->{$jcps_uc.$tail} ||
589+
exists $cont->{$jcps_uc.$tail})) {
584590
$jcps_uc .= $tail;
585591
push @out_uc, $p;
586592
} else {
587593
$preCC_uc = $curCC;
588594
}
589595
}
590596

591-
if (@out_uc && $map->{$jcps_uc}) {
597+
if (@out_uc && exists $map->{$jcps_uc}) {
592598
$jcps = $jcps_uc;
593599
$src[$_] = undef for @out_uc;
594600
} else {
@@ -598,7 +604,7 @@ sub splitEnt
598604
}
599605

600606
# skip completely ignorable
601-
if ($map->{$jcps} ? @{ $map->{$jcps} } == 0 :
607+
if (exists $map->{$jcps} ? @{ $map->{$jcps} } == 0 :
602608
$uXS && $jcps !~ /;/ && _ignorable_simple($jcps)) { ### XS only
603609
if ($wLen && @buf) {
604610
$buf[-1][2] = $i + 1;
@@ -647,7 +653,7 @@ sub getWt
647653
$u = 0xFFFD if $u !~ /;/ && 0x10FFFF < $u && !$out;
648654

649655
my @ce;
650-
if ($map->{$u}) {
656+
if (exists $map->{$u}) {
651657
@ce = @{ $map->{$u} }; # $u may be a contraction
652658
### begin XS only ###
653659
} elsif ($uXS && _exists_simple($u)) {
@@ -665,27 +671,27 @@ sub getWt
665671

666672
if (@decH == 2) {
667673
my $contract = join(CODE_SEP, @decH);
668-
@decH = ($contract) if $map->{$contract};
674+
@decH = ($contract) if exists $map->{$contract};
669675
} else { # must be <@decH == 3>
670-
if ($max->{$decH[0]}) {
676+
if (exists $max->{$decH[0]}) {
671677
my $contract = join(CODE_SEP, @decH);
672-
if ($map->{$contract}) {
678+
if (exists $map->{$contract}) {
673679
@decH = ($contract);
674680
} else {
675681
$contract = join(CODE_SEP, @decH[0,1]);
676-
$map->{$contract} and @decH = ($contract, $decH[2]);
682+
exists $map->{$contract} and @decH = ($contract, $decH[2]);
677683
}
678684
# even if V's ignorable, LT contraction is not supported.
679685
# If such a situation were required, NFD should be used.
680686
}
681-
if (@decH == 3 && $max->{$decH[1]}) {
687+
if (@decH == 3 && exists $max->{$decH[1]}) {
682688
my $contract = join(CODE_SEP, @decH[1,2]);
683-
$map->{$contract} and @decH = ($decH[0], $contract);
689+
exists $map->{$contract} and @decH = ($decH[0], $contract);
684690
}
685691
}
686692

687693
@ce = map({
688-
$map->{$_} ? @{ $map->{$_} } :
694+
exists $map->{$_} ? @{ $map->{$_} } :
689695
$uXS && _exists_simple($_) ? _fetch_simple($_) : ### XS only
690696
$der->($_);
691697
} @decH);
@@ -1098,7 +1104,7 @@ If the revision (previously "tracking version") number of UCA is given,
10981104
behavior of that revision is emulated on collating.
10991105
If omitted, the return value of C<UCA_Version()> is used.
11001106
1101-
The following revisions are supported. The default is 32.
1107+
The following revisions are supported. The default is 34.
11021108
11031109
UCA Unicode Standard DUCET (@version)
11041110
-------------------------------------------------------
@@ -1115,6 +1121,8 @@ The following revisions are supported. The default is 32.
11151121
28 6.3.0 6.3.0 (6.3.0)
11161122
30 7.0.0 7.0.0 (7.0.0)
11171123
32 8.0.0 8.0.0 (8.0.0)
1124+
34 9.0.0 9.0.0 (9.0.0)
1125+
36 10.0.0 10.0.0(10.0.0)
11181126
11191127
* See below for C<long_contraction> with C<UCA_Version> 22 and 24.
11201128
@@ -1454,13 +1462,15 @@ those in the CJK Unified Ideographs Extension A etc.
14541462
U+4E00..U+9FC3 if UCA_Version is 18.
14551463
U+4E00..U+9FCB if UCA_Version is 20 or 22.
14561464
U+4E00..U+9FCC if UCA_Version is 24 to 30.
1457-
U+4E00..U+9FD5 if UCA_Version is 32.
1465+
U+4E00..U+9FD5 if UCA_Version is 32 or 34.
1466+
U+4E00..U+9FEA if UCA_Version is 36.
14581467
14591468
In the CJK Unified Ideographs Extension blocks:
14601469
Ext.A (U+3400..U+4DB5) and Ext.B (U+20000..U+2A6D6) in any UCA_Version.
14611470
Ext.C (U+2A700..U+2B734) if UCA_Version is 20 or later.
14621471
Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or later.
1463-
Ext.E (U+2B820..U+2CEA1) if UCA_Version is 32.
1472+
Ext.E (U+2B820..U+2CEA1) if UCA_Version is 32 or later.
1473+
Ext.F (U+2CEB0..U+2EBE0) if UCA_Version is 36.
14641474
14651475
Through C<overrideCJK>, ordering of CJK unified ideographs (including
14661476
extensions) can be overridden.
@@ -2037,7 +2047,8 @@ The most preferable one is "The Default Unicode Collation Element Table"
20372047
20382048
http://www.unicode.org/Public/UCA/
20392049
2040-
http://www.unicode.org/Public/UCA/latest/allkeys.txt (latest version)
2050+
http://www.unicode.org/Public/UCA/latest/allkeys.txt
2051+
(latest version)
20412052
20422053
If DUCET is not installed, it is recommended to copy the file
20432054
from http://www.unicode.org/Public/UCA/latest/allkeys.txt
@@ -2081,15 +2092,15 @@ B<Unicode::Normalize is required to try The Conformance Test.>
20812092
=head1 AUTHOR, COPYRIGHT AND LICENSE
20822093
20832094
The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
2084-
<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2016,
2095+
<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2017,
20852096
SADAHIRO Tomoyuki. Japan. All rights reserved.
20862097
20872098
This module is free software; you can redistribute it and/or
20882099
modify it under the same terms as Perl itself.
20892100
20902101
The file Unicode/Collate/allkeys.txt was copied verbatim
2091-
from L<http://www.unicode.org/Public/UCA/8.0.0/allkeys.txt>.
2092-
For this file, Copyright (c) 2001-2015 Unicode, Inc.; distributed
2102+
from L<http://www.unicode.org/Public/UCA/9.0.0/allkeys.txt>.
2103+
For this file, Copyright (c) 2016 Unicode, Inc.; distributed
20932104
under the Terms of Use in L<http://www.unicode.org/terms_of_use.html>
20942105
20952106
=head1 SEE ALSO

0 commit comments

Comments
 (0)