Skip to content

Commit 245aba3

Browse files
committed
idna: implement support for reporting errors on invalid IDNA2008 characters
1 parent ed86319 commit 245aba3

File tree

4 files changed

+2077
-1302
lines changed

4 files changed

+2077
-1302
lines changed

idna/src/make_uts46_mapping_table.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ def rust_slice(s):
7878
unicode_str = u''.join(char(c) for c in fields[2].strip().split(' '))
7979
elif mapping == "Deviation":
8080
unicode_str = u''
81+
82+
if len(fields) > 3:
83+
assert fields[3].strip() in ('NV8', 'XV8'), fields[3]
84+
assert mapping == 'Valid', mapping
85+
mapping = 'DisallowedIdna2008'
86+
8187
ranges.append((first, last, mapping, unicode_str))
8288

8389
def mergeable_key(r):
@@ -86,7 +92,7 @@ def mergeable_key(r):
8692
# These types have associated data, so we should not merge them.
8793
if mapping in ('Mapped', 'Deviation', 'DisallowedStd3Mapped'):
8894
return r
89-
assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid')
95+
assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid', 'DisallowedIdna2008')
9096
return mapping
9197

9298
grouped_ranges = itertools.groupby(ranges, key=mergeable_key)

idna/src/uts46.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum Mapping {
4848
Disallowed,
4949
DisallowedStd3Valid,
5050
DisallowedStd3Mapped(StringTableSlice),
51+
DisallowedIdna2008,
5152
}
5253

5354
struct Range {
@@ -140,6 +141,12 @@ impl<'a> Iterator for Mapper<'a> {
140141
self.slice = Some(decode_slice(slice).chars());
141142
continue;
142143
}
144+
Mapping::DisallowedIdna2008 => {
145+
if self.config.use_idna_2008_rules {
146+
self.errors.disallowed_in_idna_2008 = true;
147+
}
148+
codepoint
149+
}
143150
});
144151
}
145152
}
@@ -310,7 +317,7 @@ fn check_validity(label: &str, config: Config, errors: &mut Errors) {
310317

311318
// V6: Check against Mapping Table
312319
if label.chars().any(|c| match *find_char(c) {
313-
Mapping::Valid => false,
320+
Mapping::Valid | Mapping::DisallowedIdna2008 => false,
314321
Mapping::Deviation(_) => config.transitional_processing,
315322
Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
316323
_ => true,
@@ -510,6 +517,7 @@ pub struct Config {
510517
transitional_processing: bool,
511518
verify_dns_length: bool,
512519
check_hyphens: bool,
520+
use_idna_2008_rules: bool,
513521
}
514522

515523
/// The defaults are that of https://url.spec.whatwg.org/#idna
@@ -524,6 +532,7 @@ impl Default for Config {
524532

525533
// Only use for to_ascii, not to_unicode
526534
verify_dns_length: false,
535+
use_idna_2008_rules: false,
527536
}
528537
}
529538
}
@@ -553,6 +562,12 @@ impl Config {
553562
self
554563
}
555564

565+
#[inline]
566+
pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
567+
self.use_idna_2008_rules = value;
568+
self
569+
}
570+
556571
/// http://www.unicode.org/reports/tr46/#ToASCII
557572
pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
558573
let mut result = String::new();
@@ -599,6 +614,7 @@ pub struct Errors {
599614
disallowed_character: bool,
600615
too_long_for_dns: bool,
601616
too_short_for_dns: bool,
617+
disallowed_in_idna_2008: bool,
602618
}
603619

604620
impl Errors {
@@ -615,6 +631,7 @@ impl Errors {
615631
disallowed_character,
616632
too_long_for_dns,
617633
too_short_for_dns,
634+
disallowed_in_idna_2008,
618635
} = *self;
619636
punycode
620637
|| check_hyphens
@@ -627,6 +644,7 @@ impl Errors {
627644
|| disallowed_character
628645
|| too_long_for_dns
629646
|| too_short_for_dns
647+
|| disallowed_in_idna_2008
630648
}
631649
}
632650

@@ -644,6 +662,7 @@ impl fmt::Debug for Errors {
644662
disallowed_character,
645663
too_long_for_dns,
646664
too_short_for_dns,
665+
disallowed_in_idna_2008,
647666
} = *self;
648667

649668
let fields = [
@@ -661,6 +680,7 @@ impl fmt::Debug for Errors {
661680
("disallowed_character", disallowed_character),
662681
("too_long_for_dns", too_long_for_dns),
663682
("too_short_for_dns", too_short_for_dns),
683+
("disallowed_in_idna_2008", disallowed_in_idna_2008),
664684
];
665685

666686
let mut empty = true;

0 commit comments

Comments
 (0)