From adf3d7fa8b50faf11805dfd7758c2450f250fcbc Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 3 Oct 2024 13:00:15 +0200 Subject: [PATCH 01/21] [base] Add module 'new_name' The goal of the 'new_name' module is to provide a simpler and more efficient implementation of the basic domain name types. Rather than being generic over the underlying byte sequence, 'Name' and 'RelName' are just unsized byte slices, leaving their allocation up to the user. Their methods are entirely based in slice manipulations, rather than through generic label iteration. They should be much more performant, but we are lacking benchmarks. --- src/base/mod.rs | 1 + src/base/new_name/mod.rs | 611 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 612 insertions(+) create mode 100644 src/base/new_name/mod.rs diff --git a/src/base/mod.rs b/src/base/mod.rs index f242ad058..b29cd58d5 100644 --- a/src/base/mod.rs +++ b/src/base/mod.rs @@ -115,6 +115,7 @@ pub mod message; pub mod message_builder; pub mod name; pub mod net; +pub mod new_name; pub mod opt; pub mod question; pub mod rdata; diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs new file mode 100644 index 000000000..da4236898 --- /dev/null +++ b/src/base/new_name/mod.rs @@ -0,0 +1,611 @@ +//! Domain names. +//! +//! A _domain name_ is a sequence of _labels_ that names an entity within a +//! hierarchy. In the domain name `www.example.org.`, the hierarchy is: `.` +//! (the root) -> `org.` -> `example.org.` -> `www.example.org.`. Labels are +//! stored in reverse order, from innermost to outermost. + +use core::{cmp, iter}; + +/// An absolute domain name. +#[repr(transparent)] +pub struct Name([u8]); + +impl Name { + /// The maximum size of an absolute domain name in the wire format. + pub const MAX_SIZE: usize = 255; + + /// The root name. + pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[0u8]) }; +} + +impl Name { + /// Assume a byte string is a valid [`Name`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and within + /// the size restriction (255 bytes or fewer). It must be absolute. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Name' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`Name`]. + /// + /// The byte string is confirmed to be correctly encoded in the wire format. + /// If it is not properly encoded, an error is returned. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, NameError> { + // Without the last byte, this should be a relative name. + let (root, rel_name) = bytes.split_last().ok_or(NameError)?; + + if RelName::from_bytes(rel_name).is_err() { + return Err(NameError); + } else if *root != 0u8 { + // The last byte must be a root label. + return Err(NameError); + } + + // SAFETY: 'bytes' has been confirmed to be correctly encoded. + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } +} + +impl Name { + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.len() == 1 + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// The parent of this name, if any. + /// + /// The name containing all but the first label is returned. If this is a + /// root name, [`None`] is returned. + pub fn parent(&self) -> Option<&Self> { + if self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let bytes = &bytes[1 + bytes[0] as usize..]; + + // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// The whole name without the root label. + /// + /// If this is a root name, an empty relative name is returned. + /// + /// This is equivalent to `self.strip_suffix(Name::ROOT).unwrap()`. + pub fn without_root(&self) -> &RelName { + let bytes = &self.as_bytes()[..self.len() - 1]; + // SAFETY: A slice of labels (as from 'self') is a relative name. + unsafe { RelName::from_bytes_unchecked(bytes) } + } + + /// Whether this name starts with a particular relative name. + pub fn starts_with(&self, that: &RelName) -> bool { + if self.len() < that.len() { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) + } + + /// Whether this name ends with a particular absolute name. + pub fn ends_with(&self, that: &Self) -> bool { + if self.len() < that.len() { + return false; + } + + // We want to compare the last bytes of the current name to the given + // candidate. To do so, we need to ensure that those last bytes start + // at a valid label boundary. + + let mut index = 0usize; + let offset = self.len() - that.len(); + while index < offset { + index += 1 + self.0[index] as usize; + } + + if index != offset { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) + } +} + +impl Name { + /// Split this name into a label and the rest. + /// + /// If this is the root name, [`None`] is returned. The returned label will + /// always be non-empty. + pub fn split_first(&self) -> Option<(&Label, &Self)> { + if self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); + + // SAFETY: 'self' only contains valid labels. + let label = unsafe { Label::from_bytes_unchecked(label) }; + // SAFETY: 'rest' is 253 bytes or smaller and has valid labels. + let rest = unsafe { Self::from_bytes_unchecked(rest) }; + + Some((label, rest)) + } + + /// Strip a prefix from this name. + /// + /// If this name has the given prefix (see [`Self::starts_with()`]), the + /// rest of the name without the prefix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { + if self.starts_with(prefix) { + let bytes = &self.as_bytes()[prefix.len()..]; + + // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' + // start with the same labels as 'prefix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Strip a suffix from this name. + /// + /// If this name has the given suffix (see [`Self::ends_with()`]), the rest + /// of the name without the suffix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { + if self.ends_with(suffix) { + let bytes = &self.as_bytes()[..self.len() - suffix.len()]; + + // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' + // ended with the same labels as 'suffix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Canonicalize this domain name. + /// + /// All uppercase ASCII characters in the name will be lowercased. + pub fn canonicalize(&mut self) { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for Name { + fn eq(&self, that: &Self) -> bool { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for Name {} + +impl PartialOrd for Name { + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for Name { + fn cmp(&self, that: &Self) -> cmp::Ordering { + // We want to find a shared suffix between the two names, and the labels + // immediately before that shared suffix. However, we can't determine + // label boundaries when working backward. So, we find a shared suffix + // (even if it crosses partially between labels), then iterate through + // both names until we find their label boundaries up to the suffix. + + let this_iter = self.as_bytes().iter().rev(); + let that_iter = that.as_bytes().iter().rev(); + let suffix = iter::zip(this_iter, that_iter) + .position(|(l, r)| l.eq_ignore_ascii_case(r)); + + if let Some(suffix) = suffix { + // Iterate through the labels in both names until both have a tail + // of equal size within the shared suffix we found. + + // SAFETY: At least one unequal byte exists in both names, and it + // cannot be the root label, so there must be at least one non-root + // label in both names. + let (mut this_head, mut this_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + let (mut that_head, mut that_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + + loop { + let (this_len, that_len) = (this_tail.len(), that_tail.len()); + + if this_len == that_len && this_len < suffix { + // We have found the shared suffix of labels. Now, we must + // have two unequal head labels; we compare them (ASCII case + // insensitively). + break Ord::cmp(this_head, that_head); + } + + // If one tail is longer than the other, it will be shortened. + // Any tail longer than the suffix will also be shortened. + + if this_len > that_len || this_len > suffix { + // SAFETY: 'this_tail' has strictly more than one byte. + (this_head, this_tail) = + unsafe { this_tail.split_first().unwrap_unchecked() }; + } + + if that_len > this_len || that_len > suffix { + // SAFETY: 'that_tail' has strictly more than one byte. + (that_head, that_tail) = + unsafe { that_tail.split_first().unwrap_unchecked() }; + } + } + } else { + // The shorter name is a suffix of the longer one. If the names are + // of equal length, they are equal; otherwise, the longer one has + // more labels, and is greater than the shorter one. + Ord::cmp(&self.len(), &that.len()) + } + } +} + +impl AsRef<[u8]> for Name { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// A relative domain name. +#[repr(transparent)] +pub struct RelName([u8]); + +impl RelName { + /// Assume a byte string is a valid [`RelName`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and within + /// the size restriction (255 bytes or fewer). It must be relative. + pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'RelName' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'RelName' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`RelName`]. + /// + /// The byte string is confirmed to be correctly encoded in the wire format. + /// If it is not properly encoded, an error is returned. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, RelNameError> { + if bytes.len() + 1 > Name::MAX_SIZE { + // This can never become an absolute domain name. + return Err(RelNameError); + } + + // Iterate through labels in the name. + let mut index = 0usize; + while index < bytes.len() { + let length = bytes[index]; + if length == 0 { + // Empty labels are not allowed. + return Err(RelNameError); + } else if length >= 64 { + // An invalid label length (or a compression pointer). + return Err(RelNameError); + } else { + // This was the length of the label, excluding the length octet. + index += 1 + length as usize; + } + } + + // We must land exactly at the end of the name, otherwise the previous + // label reported a length that was too long. + if index != bytes.len() { + return Err(RelNameError); + } + + // SAFETY: 'bytes' has been confirmed to be correctly encoded. + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } +} + +impl RelName { + /// The size of this name in the wire format. + pub const fn len(&self) -> usize { + self.0.len() + } + + /// Whether this name contains no labels at all. + pub const fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// The parent of this name, if any. + /// + /// The name containing all but the first label is returned. If there are + /// no remaining labels, [`None`] is returned. + pub fn parent(&self) -> Option<&Self> { + if self.is_empty() { + return None; + } + + let bytes = self.as_bytes(); + let bytes = &bytes[1 + bytes[0] as usize..]; + + // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// Whether this name starts with a particular relative name. + pub fn starts_with(&self, that: &RelName) -> bool { + if self.len() < that.len() { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) + } + + /// Whether this name ends with a particular relative name. + pub fn ends_with(&self, that: &Self) -> bool { + if self.len() < that.len() { + return false; + } + + // We want to compare the last bytes of the current name to the given + // candidate. To do so, we need to ensure that those last bytes start + // at a valid label boundary. + + let mut index = 0usize; + let offset = self.len() - that.len(); + while index < offset { + index += 1 + self.0[index] as usize; + } + + if index != offset { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) + } +} + +impl RelName { + /// Split this name into a label and the rest. + /// + /// If the name is empty, [`None`] is returned. The returned label will + /// always be non-empty. + pub fn split_first(&self) -> Option<(&Label, &Self)> { + if self.is_empty() { + return None; + } + + let bytes = self.as_bytes(); + let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); + + // SAFETY: 'self' only contains valid labels. + let label = unsafe { Label::from_bytes_unchecked(label) }; + // SAFETY: 'rest' is 252 bytes or smaller and has valid labels. + let rest = unsafe { Self::from_bytes_unchecked(rest) }; + + Some((label, rest)) + } + + /// Strip a prefix from this name. + /// + /// If this name has the given prefix (see [`Self::starts_with()`]), the + /// rest of the name without the prefix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { + if self.starts_with(prefix) { + let bytes = &self.as_bytes()[prefix.len()..]; + + // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' + // start with the same labels as 'prefix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Strip a suffix from this name. + /// + /// If this name has the given suffix (see [`Self::ends_with()`]), the rest + /// of the name without the suffix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { + if self.ends_with(suffix) { + let bytes = &self.as_bytes()[..self.len() - suffix.len()]; + + // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' + // ended with the same labels as 'suffix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Canonicalize this domain name. + /// + /// All uppercase ASCII characters in the name will be lowercased. + pub fn canonicalize(&mut self) { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for RelName { + fn eq(&self, that: &Self) -> bool { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for RelName {} + +impl AsRef<[u8]> for RelName { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// A label in a domain name. +#[repr(transparent)] +pub struct Label([u8]); + +impl Label { + /// The maximum size of a label in the wire format. + pub const MAX_SIZE: usize = 63; + + /// The root label. + pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[]) }; +} + +impl Label { + /// Assume a byte string is a valid [`Label`]. + /// + /// # Safety + /// + /// The byte string must be within the size restriction (63 bytes or fewer). + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Label' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`Label`]. + /// + /// If the byte string is too long, an error is returned. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, LabelError> { + if bytes.len() > Self::MAX_SIZE { + // The label was too long to be used. + return Err(LabelError); + } + + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// Extract a label from the start of a byte string. + /// + /// A label encoded in the wire format will be extracted from the beginning + /// of the given byte string. If a valid label cannot be extracted, or the + /// byte string is simply empty, an error is returned. The extracted label + /// and the remainder of the byte string are returned. + pub fn split_off(bytes: &[u8]) -> Result<(&Self, &[u8]), LabelError> { + let (&length, bytes) = bytes.split_first().ok_or(LabelError)?; + if length < 64 && bytes.len() >= length as usize { + let (label, bytes) = bytes.split_at(length as usize); + // SAFETY: 'label' is known be to less than 64 bytes in size. + Ok((unsafe { Self::from_bytes_unchecked(label) }, bytes)) + } else { + // Overlong label (or compression pointer). + Err(LabelError) + } + } +} + +impl Label { + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.is_empty() + } + + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl Label { + /// Canonicalize this label. + /// + /// All uppercase ASCII characters in the label will be lowercased. + pub fn canonicalize(&mut self) { + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for Label { + fn eq(&self, that: &Self) -> bool { + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for Label {} + +impl PartialOrd for Label { + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for Label { + fn cmp(&self, that: &Self) -> cmp::Ordering { + let this_bytes = self.as_bytes().iter().copied(); + let that_bytes = that.as_bytes().iter().copied(); + iter::zip(this_bytes, that_bytes) + .find(|(l, r)| !l.eq_ignore_ascii_case(r)) + .map_or(Ord::cmp(&self.len(), &that.len()), |(l, r)| { + Ord::cmp(&l.to_ascii_lowercase(), &r.to_ascii_lowercase()) + }) + } +} + +impl AsRef<[u8]> for Label { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// An error in costructing a [`Name`]. +pub struct NameError; + +/// An error in constructing a [`RelName`]. +pub struct RelNameError; + +/// An error in constructing a [`Label`]. +pub struct LabelError; From 0a37d6dcf53aad0c03cb604f29fbc1ce4b351bb2 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 3 Oct 2024 15:52:21 +0200 Subject: [PATCH 02/21] [base/new_name] impl 'Hash' I was worried I'd have to iterate over the labels, but actually, it's pretty straightforward. I hope the compiler can vectorize it nicely. --- src/base/new_name/mod.rs | 52 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index da4236898..52a9cc7e9 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -5,7 +5,11 @@ //! (the root) -> `org.` -> `example.org.` -> `www.example.org.`. Labels are //! stored in reverse order, from innermost to outermost. -use core::{cmp, iter}; +use core::{ + cmp, + hash::{Hash, Hasher}, + iter, +}; /// An absolute domain name. #[repr(transparent)] @@ -275,6 +279,21 @@ impl Ord for Name { } } +impl Hash for Name { + fn hash(&self, state: &mut H) { + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since + // they are always less than 64. As such, we don't need to iterate over + // the labels manually; we can just give them to the hasher as-is. + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + impl AsRef<[u8]> for Name { fn as_ref(&self) -> &[u8] { &self.0 @@ -292,7 +311,7 @@ impl RelName { /// /// The byte string must be correctly encoded in the wire format, and within /// the size restriction (255 bytes or fewer). It must be relative. - pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // SAFETY: 'RelName' is a 'repr(transparent)' wrapper around '[u8]', so // casting a '[u8]' into a 'RelName' is sound. core::mem::transmute(bytes) @@ -481,6 +500,21 @@ impl PartialEq for RelName { impl Eq for RelName {} +impl Hash for RelName { + fn hash(&self, state: &mut H) { + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since + // they are always less than 64. As such, we don't need to iterate over + // the labels manually; we can just give them to the hasher as-is. + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + impl AsRef<[u8]> for RelName { fn as_ref(&self) -> &[u8] { &self.0 @@ -595,6 +629,20 @@ impl Ord for Label { } } +impl Hash for Label { + fn hash(&self, state: &mut H) { + // Individual labels and names should hash in the same way. + state.write_u8(self.len() as u8); + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + impl AsRef<[u8]> for Label { fn as_ref(&self) -> &[u8] { &self.0 From 04b315bcd3ca16d153051941f23659ef2114c889 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 3 Oct 2024 17:43:35 +0200 Subject: [PATCH 03/21] [base/new_name] Split into submodules --- src/base/new_name/absolute.rs | 299 ++++++++++++++++ src/base/new_name/label.rs | 136 +++++++ src/base/new_name/mod.rs | 656 +--------------------------------- src/base/new_name/relative.rs | 227 ++++++++++++ 4 files changed, 668 insertions(+), 650 deletions(-) create mode 100644 src/base/new_name/absolute.rs create mode 100644 src/base/new_name/label.rs create mode 100644 src/base/new_name/relative.rs diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs new file mode 100644 index 000000000..6a5e8ac36 --- /dev/null +++ b/src/base/new_name/absolute.rs @@ -0,0 +1,299 @@ +use core::{ + cmp, + hash::{Hash, Hasher}, + iter, +}; + +use super::{Label, RelName}; + +/// An absolute domain name. +#[repr(transparent)] +pub struct Name([u8]); + +impl Name { + /// The maximum size of an absolute domain name in the wire format. + pub const MAX_SIZE: usize = 255; + + /// The root name. + pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[0u8]) }; +} + +impl Name { + /// Assume a byte string is a valid [`Name`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and within + /// the size restriction (255 bytes or fewer). It must be absolute. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Name' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`Name`]. + /// + /// The byte string is confirmed to be correctly encoded in the wire format. + /// If it is not properly encoded, an error is returned. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, NameError> { + // Without the last byte, this should be a relative name. + let (root, rel_name) = bytes.split_last().ok_or(NameError)?; + + if RelName::from_bytes(rel_name).is_err() { + return Err(NameError); + } else if *root != 0u8 { + // The last byte must be a root label. + return Err(NameError); + } + + // SAFETY: 'bytes' has been confirmed to be correctly encoded. + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } +} + +impl Name { + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.len() == 1 + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// The parent of this name, if any. + /// + /// The name containing all but the first label is returned. If this is a + /// root name, [`None`] is returned. + pub fn parent(&self) -> Option<&Self> { + if self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let bytes = &bytes[1 + bytes[0] as usize..]; + + // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// The whole name without the root label. + /// + /// If this is a root name, an empty relative name is returned. + /// + /// This is equivalent to `self.strip_suffix(Name::ROOT).unwrap()`. + pub fn without_root(&self) -> &RelName { + let bytes = &self.as_bytes()[..self.len() - 1]; + // SAFETY: A slice of labels (as from 'self') is a relative name. + unsafe { RelName::from_bytes_unchecked(bytes) } + } + + /// Whether this name starts with a particular relative name. + pub fn starts_with(&self, that: &RelName) -> bool { + if self.len() < that.len() { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) + } + + /// Whether this name ends with a particular absolute name. + pub fn ends_with(&self, that: &Self) -> bool { + if self.len() < that.len() { + return false; + } + + // We want to compare the last bytes of the current name to the given + // candidate. To do so, we need to ensure that those last bytes start + // at a valid label boundary. + + let mut index = 0usize; + let offset = self.len() - that.len(); + while index < offset { + index += 1 + self.0[index] as usize; + } + + if index != offset { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) + } +} + +impl Name { + /// Split this name into a label and the rest. + /// + /// If this is the root name, [`None`] is returned. The returned label will + /// always be non-empty. + pub fn split_first(&self) -> Option<(&Label, &Self)> { + if self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); + + // SAFETY: 'self' only contains valid labels. + let label = unsafe { Label::from_bytes_unchecked(label) }; + // SAFETY: 'rest' is 253 bytes or smaller and has valid labels. + let rest = unsafe { Self::from_bytes_unchecked(rest) }; + + Some((label, rest)) + } + + /// Strip a prefix from this name. + /// + /// If this name has the given prefix (see [`Self::starts_with()`]), the + /// rest of the name without the prefix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { + if self.starts_with(prefix) { + let bytes = &self.as_bytes()[prefix.len()..]; + + // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' + // start with the same labels as 'prefix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Strip a suffix from this name. + /// + /// If this name has the given suffix (see [`Self::ends_with()`]), the rest + /// of the name without the suffix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { + if self.ends_with(suffix) { + let bytes = &self.as_bytes()[..self.len() - suffix.len()]; + + // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' + // ended with the same labels as 'suffix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Canonicalize this domain name. + /// + /// All uppercase ASCII characters in the name will be lowercased. + pub fn canonicalize(&mut self) { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for Name { + fn eq(&self, that: &Self) -> bool { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for Name {} + +impl PartialOrd for Name { + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for Name { + fn cmp(&self, that: &Self) -> cmp::Ordering { + // We want to find a shared suffix between the two names, and the labels + // immediately before that shared suffix. However, we can't determine + // label boundaries when working backward. So, we find a shared suffix + // (even if it crosses partially between labels), then iterate through + // both names until we find their label boundaries up to the suffix. + + let this_iter = self.as_bytes().iter().rev(); + let that_iter = that.as_bytes().iter().rev(); + let suffix = iter::zip(this_iter, that_iter) + .position(|(l, r)| l.eq_ignore_ascii_case(r)); + + if let Some(suffix) = suffix { + // Iterate through the labels in both names until both have a tail + // of equal size within the shared suffix we found. + + // SAFETY: At least one unequal byte exists in both names, and it + // cannot be the root label, so there must be at least one non-root + // label in both names. + let (mut this_head, mut this_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + let (mut that_head, mut that_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + + loop { + let (this_len, that_len) = (this_tail.len(), that_tail.len()); + + if this_len == that_len && this_len < suffix { + // We have found the shared suffix of labels. Now, we must + // have two unequal head labels; we compare them (ASCII case + // insensitively). + break Ord::cmp(this_head, that_head); + } + + // If one tail is longer than the other, it will be shortened. + // Any tail longer than the suffix will also be shortened. + + if this_len > that_len || this_len > suffix { + // SAFETY: 'this_tail' has strictly more than one byte. + (this_head, this_tail) = + unsafe { this_tail.split_first().unwrap_unchecked() }; + } + + if that_len > this_len || that_len > suffix { + // SAFETY: 'that_tail' has strictly more than one byte. + (that_head, that_tail) = + unsafe { that_tail.split_first().unwrap_unchecked() }; + } + } + } else { + // The shorter name is a suffix of the longer one. If the names are + // of equal length, they are equal; otherwise, the longer one has + // more labels, and is greater than the shorter one. + Ord::cmp(&self.len(), &that.len()) + } + } +} + +impl Hash for Name { + fn hash(&self, state: &mut H) { + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since + // they are always less than 64. As such, we don't need to iterate over + // the labels manually; we can just give them to the hasher as-is. + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + +impl AsRef<[u8]> for Name { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// An error in costructing a [`Name`]. +pub struct NameError; diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs new file mode 100644 index 000000000..0a2d80c0b --- /dev/null +++ b/src/base/new_name/label.rs @@ -0,0 +1,136 @@ +use core::{ + cmp, + hash::{Hash, Hasher}, + iter, +}; + +/// A label in a domain name. +#[repr(transparent)] +pub struct Label([u8]); + +impl Label { + /// The maximum size of a label in the wire format. + pub const MAX_SIZE: usize = 63; + + /// The root label. + pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[]) }; +} + +impl Label { + /// Assume a byte string is a valid [`Label`]. + /// + /// # Safety + /// + /// The byte string must be within the size restriction (63 bytes or fewer). + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Label' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`Label`]. + /// + /// If the byte string is too long, an error is returned. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, LabelError> { + if bytes.len() > Self::MAX_SIZE { + // The label was too long to be used. + return Err(LabelError); + } + + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// Extract a label from the start of a byte string. + /// + /// A label encoded in the wire format will be extracted from the beginning + /// of the given byte string. If a valid label cannot be extracted, or the + /// byte string is simply empty, an error is returned. The extracted label + /// and the remainder of the byte string are returned. + pub fn split_off(bytes: &[u8]) -> Result<(&Self, &[u8]), LabelError> { + let (&length, bytes) = bytes.split_first().ok_or(LabelError)?; + if length < 64 && bytes.len() >= length as usize { + let (label, bytes) = bytes.split_at(length as usize); + // SAFETY: 'label' is known be to less than 64 bytes in size. + Ok((unsafe { Self::from_bytes_unchecked(label) }, bytes)) + } else { + // Overlong label (or compression pointer). + Err(LabelError) + } + } +} + +impl Label { + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.is_empty() + } + + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl Label { + /// Canonicalize this label. + /// + /// All uppercase ASCII characters in the label will be lowercased. + pub fn canonicalize(&mut self) { + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for Label { + fn eq(&self, that: &Self) -> bool { + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for Label {} + +impl PartialOrd for Label { + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for Label { + fn cmp(&self, that: &Self) -> cmp::Ordering { + let this_bytes = self.as_bytes().iter().copied(); + let that_bytes = that.as_bytes().iter().copied(); + iter::zip(this_bytes, that_bytes) + .find(|(l, r)| !l.eq_ignore_ascii_case(r)) + .map_or(Ord::cmp(&self.len(), &that.len()), |(l, r)| { + Ord::cmp(&l.to_ascii_lowercase(), &r.to_ascii_lowercase()) + }) + } +} + +impl Hash for Label { + fn hash(&self, state: &mut H) { + // Individual labels and names should hash in the same way. + state.write_u8(self.len() as u8); + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + +impl AsRef<[u8]> for Label { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// An error in constructing a [`Label`]. +pub struct LabelError; diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index 52a9cc7e9..2a1a4dd1e 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -5,655 +5,11 @@ //! (the root) -> `org.` -> `example.org.` -> `www.example.org.`. Labels are //! stored in reverse order, from innermost to outermost. -use core::{ - cmp, - hash::{Hash, Hasher}, - iter, -}; +mod absolute; +pub use absolute::{Name, NameError}; -/// An absolute domain name. -#[repr(transparent)] -pub struct Name([u8]); +mod relative; +pub use relative::{RelName, RelNameError}; -impl Name { - /// The maximum size of an absolute domain name in the wire format. - pub const MAX_SIZE: usize = 255; - - /// The root name. - pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[0u8]) }; -} - -impl Name { - /// Assume a byte string is a valid [`Name`]. - /// - /// # Safety - /// - /// The byte string must be correctly encoded in the wire format, and within - /// the size restriction (255 bytes or fewer). It must be absolute. - pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { - // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so - // casting a '[u8]' into a 'Name' is sound. - core::mem::transmute(bytes) - } - - /// Try converting a byte string into a [`Name`]. - /// - /// The byte string is confirmed to be correctly encoded in the wire format. - /// If it is not properly encoded, an error is returned. - pub fn from_bytes(bytes: &[u8]) -> Result<&Self, NameError> { - // Without the last byte, this should be a relative name. - let (root, rel_name) = bytes.split_last().ok_or(NameError)?; - - if RelName::from_bytes(rel_name).is_err() { - return Err(NameError); - } else if *root != 0u8 { - // The last byte must be a root label. - return Err(NameError); - } - - // SAFETY: 'bytes' has been confirmed to be correctly encoded. - Ok(unsafe { Self::from_bytes_unchecked(bytes) }) - } -} - -impl Name { - /// The size of this name in the wire format. - #[allow(clippy::len_without_is_empty)] - pub const fn len(&self) -> usize { - self.0.len() - } - - /// Whether this is the root label. - pub const fn is_root(&self) -> bool { - self.0.len() == 1 - } - - /// The wire format representation of the name. - pub const fn as_bytes(&self) -> &[u8] { - &self.0 - } - - /// The parent of this name, if any. - /// - /// The name containing all but the first label is returned. If this is a - /// root name, [`None`] is returned. - pub fn parent(&self) -> Option<&Self> { - if self.is_root() { - return None; - } - - let bytes = self.as_bytes(); - let bytes = &bytes[1 + bytes[0] as usize..]; - - // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. - Some(unsafe { Self::from_bytes_unchecked(bytes) }) - } - - /// The whole name without the root label. - /// - /// If this is a root name, an empty relative name is returned. - /// - /// This is equivalent to `self.strip_suffix(Name::ROOT).unwrap()`. - pub fn without_root(&self) -> &RelName { - let bytes = &self.as_bytes()[..self.len() - 1]; - // SAFETY: A slice of labels (as from 'self') is a relative name. - unsafe { RelName::from_bytes_unchecked(bytes) } - } - - /// Whether this name starts with a particular relative name. - pub fn starts_with(&self, that: &RelName) -> bool { - if self.len() < that.len() { - return false; - } - - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) - } - - /// Whether this name ends with a particular absolute name. - pub fn ends_with(&self, that: &Self) -> bool { - if self.len() < that.len() { - return false; - } - - // We want to compare the last bytes of the current name to the given - // candidate. To do so, we need to ensure that those last bytes start - // at a valid label boundary. - - let mut index = 0usize; - let offset = self.len() - that.len(); - while index < offset { - index += 1 + self.0[index] as usize; - } - - if index != offset { - return false; - } - - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) - } -} - -impl Name { - /// Split this name into a label and the rest. - /// - /// If this is the root name, [`None`] is returned. The returned label will - /// always be non-empty. - pub fn split_first(&self) -> Option<(&Label, &Self)> { - if self.is_root() { - return None; - } - - let bytes = self.as_bytes(); - let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); - - // SAFETY: 'self' only contains valid labels. - let label = unsafe { Label::from_bytes_unchecked(label) }; - // SAFETY: 'rest' is 253 bytes or smaller and has valid labels. - let rest = unsafe { Self::from_bytes_unchecked(rest) }; - - Some((label, rest)) - } - - /// Strip a prefix from this name. - /// - /// If this name has the given prefix (see [`Self::starts_with()`]), the - /// rest of the name without the prefix is returned. Otherwise, [`None`] is - /// returned. - pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { - if self.starts_with(prefix) { - let bytes = &self.as_bytes()[prefix.len()..]; - - // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' - // start with the same labels as 'prefix'; removing those labels - // still leaves 'self' with whole labels. - Some(unsafe { Self::from_bytes_unchecked(bytes) }) - } else { - None - } - } - - /// Strip a suffix from this name. - /// - /// If this name has the given suffix (see [`Self::ends_with()`]), the rest - /// of the name without the suffix is returned. Otherwise, [`None`] is - /// returned. - pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { - if self.ends_with(suffix) { - let bytes = &self.as_bytes()[..self.len() - suffix.len()]; - - // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' - // ended with the same labels as 'suffix'; removing those labels - // still leaves 'self' with whole labels. - Some(unsafe { Self::from_bytes_unchecked(bytes) }) - } else { - None - } - } - - /// Canonicalize this domain name. - /// - /// All uppercase ASCII characters in the name will be lowercased. - pub fn canonicalize(&mut self) { - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the entire byte string as ASCII. - self.0.make_ascii_lowercase() - } -} - -impl PartialEq for Name { - fn eq(&self, that: &Self) -> bool { - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the entire byte string as ASCII. - self.0.eq_ignore_ascii_case(&that.0) - } -} - -impl Eq for Name {} - -impl PartialOrd for Name { - fn partial_cmp(&self, that: &Self) -> Option { - Some(Ord::cmp(self, that)) - } -} - -impl Ord for Name { - fn cmp(&self, that: &Self) -> cmp::Ordering { - // We want to find a shared suffix between the two names, and the labels - // immediately before that shared suffix. However, we can't determine - // label boundaries when working backward. So, we find a shared suffix - // (even if it crosses partially between labels), then iterate through - // both names until we find their label boundaries up to the suffix. - - let this_iter = self.as_bytes().iter().rev(); - let that_iter = that.as_bytes().iter().rev(); - let suffix = iter::zip(this_iter, that_iter) - .position(|(l, r)| l.eq_ignore_ascii_case(r)); - - if let Some(suffix) = suffix { - // Iterate through the labels in both names until both have a tail - // of equal size within the shared suffix we found. - - // SAFETY: At least one unequal byte exists in both names, and it - // cannot be the root label, so there must be at least one non-root - // label in both names. - let (mut this_head, mut this_tail) = - unsafe { self.split_first().unwrap_unchecked() }; - let (mut that_head, mut that_tail) = - unsafe { self.split_first().unwrap_unchecked() }; - - loop { - let (this_len, that_len) = (this_tail.len(), that_tail.len()); - - if this_len == that_len && this_len < suffix { - // We have found the shared suffix of labels. Now, we must - // have two unequal head labels; we compare them (ASCII case - // insensitively). - break Ord::cmp(this_head, that_head); - } - - // If one tail is longer than the other, it will be shortened. - // Any tail longer than the suffix will also be shortened. - - if this_len > that_len || this_len > suffix { - // SAFETY: 'this_tail' has strictly more than one byte. - (this_head, this_tail) = - unsafe { this_tail.split_first().unwrap_unchecked() }; - } - - if that_len > this_len || that_len > suffix { - // SAFETY: 'that_tail' has strictly more than one byte. - (that_head, that_tail) = - unsafe { that_tail.split_first().unwrap_unchecked() }; - } - } - } else { - // The shorter name is a suffix of the longer one. If the names are - // of equal length, they are equal; otherwise, the longer one has - // more labels, and is greater than the shorter one. - Ord::cmp(&self.len(), &that.len()) - } - } -} - -impl Hash for Name { - fn hash(&self, state: &mut H) { - // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since - // they are always less than 64. As such, we don't need to iterate over - // the labels manually; we can just give them to the hasher as-is. - - // The default 'std' hasher actually buffers 8 bytes of input before - // processing them. There's no point trying to chunk the input here. - self.as_bytes() - .iter() - .map(|&b| b.to_ascii_lowercase()) - .for_each(|b| state.write_u8(b)); - } -} - -impl AsRef<[u8]> for Name { - fn as_ref(&self) -> &[u8] { - &self.0 - } -} - -/// A relative domain name. -#[repr(transparent)] -pub struct RelName([u8]); - -impl RelName { - /// Assume a byte string is a valid [`RelName`]. - /// - /// # Safety - /// - /// The byte string must be correctly encoded in the wire format, and within - /// the size restriction (255 bytes or fewer). It must be relative. - pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { - // SAFETY: 'RelName' is a 'repr(transparent)' wrapper around '[u8]', so - // casting a '[u8]' into a 'RelName' is sound. - core::mem::transmute(bytes) - } - - /// Try converting a byte string into a [`RelName`]. - /// - /// The byte string is confirmed to be correctly encoded in the wire format. - /// If it is not properly encoded, an error is returned. - pub fn from_bytes(bytes: &[u8]) -> Result<&Self, RelNameError> { - if bytes.len() + 1 > Name::MAX_SIZE { - // This can never become an absolute domain name. - return Err(RelNameError); - } - - // Iterate through labels in the name. - let mut index = 0usize; - while index < bytes.len() { - let length = bytes[index]; - if length == 0 { - // Empty labels are not allowed. - return Err(RelNameError); - } else if length >= 64 { - // An invalid label length (or a compression pointer). - return Err(RelNameError); - } else { - // This was the length of the label, excluding the length octet. - index += 1 + length as usize; - } - } - - // We must land exactly at the end of the name, otherwise the previous - // label reported a length that was too long. - if index != bytes.len() { - return Err(RelNameError); - } - - // SAFETY: 'bytes' has been confirmed to be correctly encoded. - Ok(unsafe { Self::from_bytes_unchecked(bytes) }) - } -} - -impl RelName { - /// The size of this name in the wire format. - pub const fn len(&self) -> usize { - self.0.len() - } - - /// Whether this name contains no labels at all. - pub const fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// The wire format representation of the name. - pub const fn as_bytes(&self) -> &[u8] { - &self.0 - } - - /// The parent of this name, if any. - /// - /// The name containing all but the first label is returned. If there are - /// no remaining labels, [`None`] is returned. - pub fn parent(&self) -> Option<&Self> { - if self.is_empty() { - return None; - } - - let bytes = self.as_bytes(); - let bytes = &bytes[1 + bytes[0] as usize..]; - - // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. - Some(unsafe { Self::from_bytes_unchecked(bytes) }) - } - - /// Whether this name starts with a particular relative name. - pub fn starts_with(&self, that: &RelName) -> bool { - if self.len() < that.len() { - return false; - } - - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) - } - - /// Whether this name ends with a particular relative name. - pub fn ends_with(&self, that: &Self) -> bool { - if self.len() < that.len() { - return false; - } - - // We want to compare the last bytes of the current name to the given - // candidate. To do so, we need to ensure that those last bytes start - // at a valid label boundary. - - let mut index = 0usize; - let offset = self.len() - that.len(); - while index < offset { - index += 1 + self.0[index] as usize; - } - - if index != offset { - return false; - } - - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) - } -} - -impl RelName { - /// Split this name into a label and the rest. - /// - /// If the name is empty, [`None`] is returned. The returned label will - /// always be non-empty. - pub fn split_first(&self) -> Option<(&Label, &Self)> { - if self.is_empty() { - return None; - } - - let bytes = self.as_bytes(); - let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); - - // SAFETY: 'self' only contains valid labels. - let label = unsafe { Label::from_bytes_unchecked(label) }; - // SAFETY: 'rest' is 252 bytes or smaller and has valid labels. - let rest = unsafe { Self::from_bytes_unchecked(rest) }; - - Some((label, rest)) - } - - /// Strip a prefix from this name. - /// - /// If this name has the given prefix (see [`Self::starts_with()`]), the - /// rest of the name without the prefix is returned. Otherwise, [`None`] is - /// returned. - pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { - if self.starts_with(prefix) { - let bytes = &self.as_bytes()[prefix.len()..]; - - // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' - // start with the same labels as 'prefix'; removing those labels - // still leaves 'self' with whole labels. - Some(unsafe { Self::from_bytes_unchecked(bytes) }) - } else { - None - } - } - - /// Strip a suffix from this name. - /// - /// If this name has the given suffix (see [`Self::ends_with()`]), the rest - /// of the name without the suffix is returned. Otherwise, [`None`] is - /// returned. - pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { - if self.ends_with(suffix) { - let bytes = &self.as_bytes()[..self.len() - suffix.len()]; - - // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' - // ended with the same labels as 'suffix'; removing those labels - // still leaves 'self' with whole labels. - Some(unsafe { Self::from_bytes_unchecked(bytes) }) - } else { - None - } - } - - /// Canonicalize this domain name. - /// - /// All uppercase ASCII characters in the name will be lowercased. - pub fn canonicalize(&mut self) { - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the entire byte string as ASCII. - self.0.make_ascii_lowercase() - } -} - -impl PartialEq for RelName { - fn eq(&self, that: &Self) -> bool { - // Label lengths are never ASCII characters, because they start from - // byte value 65. So we can treat the entire byte string as ASCII. - self.0.eq_ignore_ascii_case(&that.0) - } -} - -impl Eq for RelName {} - -impl Hash for RelName { - fn hash(&self, state: &mut H) { - // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since - // they are always less than 64. As such, we don't need to iterate over - // the labels manually; we can just give them to the hasher as-is. - - // The default 'std' hasher actually buffers 8 bytes of input before - // processing them. There's no point trying to chunk the input here. - self.as_bytes() - .iter() - .map(|&b| b.to_ascii_lowercase()) - .for_each(|b| state.write_u8(b)); - } -} - -impl AsRef<[u8]> for RelName { - fn as_ref(&self) -> &[u8] { - &self.0 - } -} - -/// A label in a domain name. -#[repr(transparent)] -pub struct Label([u8]); - -impl Label { - /// The maximum size of a label in the wire format. - pub const MAX_SIZE: usize = 63; - - /// The root label. - pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[]) }; -} - -impl Label { - /// Assume a byte string is a valid [`Label`]. - /// - /// # Safety - /// - /// The byte string must be within the size restriction (63 bytes or fewer). - pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { - // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so - // casting a '[u8]' into a 'Label' is sound. - core::mem::transmute(bytes) - } - - /// Try converting a byte string into a [`Label`]. - /// - /// If the byte string is too long, an error is returned. - pub fn from_bytes(bytes: &[u8]) -> Result<&Self, LabelError> { - if bytes.len() > Self::MAX_SIZE { - // The label was too long to be used. - return Err(LabelError); - } - - Ok(unsafe { Self::from_bytes_unchecked(bytes) }) - } - - /// Extract a label from the start of a byte string. - /// - /// A label encoded in the wire format will be extracted from the beginning - /// of the given byte string. If a valid label cannot be extracted, or the - /// byte string is simply empty, an error is returned. The extracted label - /// and the remainder of the byte string are returned. - pub fn split_off(bytes: &[u8]) -> Result<(&Self, &[u8]), LabelError> { - let (&length, bytes) = bytes.split_first().ok_or(LabelError)?; - if length < 64 && bytes.len() >= length as usize { - let (label, bytes) = bytes.split_at(length as usize); - // SAFETY: 'label' is known be to less than 64 bytes in size. - Ok((unsafe { Self::from_bytes_unchecked(label) }, bytes)) - } else { - // Overlong label (or compression pointer). - Err(LabelError) - } - } -} - -impl Label { - /// Whether this is the root label. - pub const fn is_root(&self) -> bool { - self.0.is_empty() - } - - /// The size of this name in the wire format. - #[allow(clippy::len_without_is_empty)] - pub const fn len(&self) -> usize { - self.0.len() - } - - /// The wire format representation of the name. - pub const fn as_bytes(&self) -> &[u8] { - &self.0 - } -} - -impl Label { - /// Canonicalize this label. - /// - /// All uppercase ASCII characters in the label will be lowercased. - pub fn canonicalize(&mut self) { - self.0.make_ascii_lowercase() - } -} - -impl PartialEq for Label { - fn eq(&self, that: &Self) -> bool { - self.0.eq_ignore_ascii_case(&that.0) - } -} - -impl Eq for Label {} - -impl PartialOrd for Label { - fn partial_cmp(&self, that: &Self) -> Option { - Some(Ord::cmp(self, that)) - } -} - -impl Ord for Label { - fn cmp(&self, that: &Self) -> cmp::Ordering { - let this_bytes = self.as_bytes().iter().copied(); - let that_bytes = that.as_bytes().iter().copied(); - iter::zip(this_bytes, that_bytes) - .find(|(l, r)| !l.eq_ignore_ascii_case(r)) - .map_or(Ord::cmp(&self.len(), &that.len()), |(l, r)| { - Ord::cmp(&l.to_ascii_lowercase(), &r.to_ascii_lowercase()) - }) - } -} - -impl Hash for Label { - fn hash(&self, state: &mut H) { - // Individual labels and names should hash in the same way. - state.write_u8(self.len() as u8); - - // The default 'std' hasher actually buffers 8 bytes of input before - // processing them. There's no point trying to chunk the input here. - self.as_bytes() - .iter() - .map(|&b| b.to_ascii_lowercase()) - .for_each(|b| state.write_u8(b)); - } -} - -impl AsRef<[u8]> for Label { - fn as_ref(&self) -> &[u8] { - &self.0 - } -} - -/// An error in costructing a [`Name`]. -pub struct NameError; - -/// An error in constructing a [`RelName`]. -pub struct RelNameError; - -/// An error in constructing a [`Label`]. -pub struct LabelError; +mod label; +pub use label::{Label, LabelError}; diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs new file mode 100644 index 000000000..c2d20fdcb --- /dev/null +++ b/src/base/new_name/relative.rs @@ -0,0 +1,227 @@ +use core::hash::{Hash, Hasher}; + +use super::{Label, Name}; + +/// A relative domain name. +#[repr(transparent)] +pub struct RelName([u8]); + +impl RelName { + /// Assume a byte string is a valid [`RelName`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and within + /// the size restriction (255 bytes or fewer). It must be relative. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'RelName' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'RelName' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`RelName`]. + /// + /// The byte string is confirmed to be correctly encoded in the wire format. + /// If it is not properly encoded, an error is returned. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, RelNameError> { + if bytes.len() + 1 > Name::MAX_SIZE { + // This can never become an absolute domain name. + return Err(RelNameError); + } + + // Iterate through labels in the name. + let mut index = 0usize; + while index < bytes.len() { + let length = bytes[index]; + if length == 0 { + // Empty labels are not allowed. + return Err(RelNameError); + } else if length >= 64 { + // An invalid label length (or a compression pointer). + return Err(RelNameError); + } else { + // This was the length of the label, excluding the length octet. + index += 1 + length as usize; + } + } + + // We must land exactly at the end of the name, otherwise the previous + // label reported a length that was too long. + if index != bytes.len() { + return Err(RelNameError); + } + + // SAFETY: 'bytes' has been confirmed to be correctly encoded. + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } +} + +impl RelName { + /// The size of this name in the wire format. + pub const fn len(&self) -> usize { + self.0.len() + } + + /// Whether this name contains no labels at all. + pub const fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// The parent of this name, if any. + /// + /// The name containing all but the first label is returned. If there are + /// no remaining labels, [`None`] is returned. + pub fn parent(&self) -> Option<&Self> { + if self.is_empty() { + return None; + } + + let bytes = self.as_bytes(); + let bytes = &bytes[1 + bytes[0] as usize..]; + + // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// Whether this name starts with a particular relative name. + pub fn starts_with(&self, that: &RelName) -> bool { + if self.len() < that.len() { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) + } + + /// Whether this name ends with a particular relative name. + pub fn ends_with(&self, that: &Self) -> bool { + if self.len() < that.len() { + return false; + } + + // We want to compare the last bytes of the current name to the given + // candidate. To do so, we need to ensure that those last bytes start + // at a valid label boundary. + + let mut index = 0usize; + let offset = self.len() - that.len(); + while index < offset { + index += 1 + self.0[index] as usize; + } + + if index != offset { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) + } +} + +impl RelName { + /// Split this name into a label and the rest. + /// + /// If the name is empty, [`None`] is returned. The returned label will + /// always be non-empty. + pub fn split_first(&self) -> Option<(&Label, &Self)> { + if self.is_empty() { + return None; + } + + let bytes = self.as_bytes(); + let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); + + // SAFETY: 'self' only contains valid labels. + let label = unsafe { Label::from_bytes_unchecked(label) }; + // SAFETY: 'rest' is 252 bytes or smaller and has valid labels. + let rest = unsafe { Self::from_bytes_unchecked(rest) }; + + Some((label, rest)) + } + + /// Strip a prefix from this name. + /// + /// If this name has the given prefix (see [`Self::starts_with()`]), the + /// rest of the name without the prefix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { + if self.starts_with(prefix) { + let bytes = &self.as_bytes()[prefix.len()..]; + + // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' + // start with the same labels as 'prefix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Strip a suffix from this name. + /// + /// If this name has the given suffix (see [`Self::ends_with()`]), the rest + /// of the name without the suffix is returned. Otherwise, [`None`] is + /// returned. + pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { + if self.ends_with(suffix) { + let bytes = &self.as_bytes()[..self.len() - suffix.len()]; + + // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' + // ended with the same labels as 'suffix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Canonicalize this domain name. + /// + /// All uppercase ASCII characters in the name will be lowercased. + pub fn canonicalize(&mut self) { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for RelName { + fn eq(&self, that: &Self) -> bool { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for RelName {} + +impl Hash for RelName { + fn hash(&self, state: &mut H) { + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since + // they are always less than 64. As such, we don't need to iterate over + // the labels manually; we can just give them to the hasher as-is. + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + +impl AsRef<[u8]> for RelName { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// An error in constructing a [`RelName`]. +pub struct RelNameError; From f7445827d67e81f2c5fcfc7d82b100269a82d515 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 3 Oct 2024 18:45:26 +0200 Subject: [PATCH 04/21] [base/new_name] Note runtime complexity in docs --- src/base/new_name/absolute.rs | 64 +++++++++++++++++++++++++++++++---- src/base/new_name/label.rs | 35 +++++++++++++++++++ src/base/new_name/relative.rs | 47 +++++++++++++++++++++---- 3 files changed, 132 insertions(+), 14 deletions(-) diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index 6a5e8ac36..33a75f4c8 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -35,6 +35,8 @@ impl Name { /// /// The byte string is confirmed to be correctly encoded in the wire format. /// If it is not properly encoded, an error is returned. + /// + /// Runtime: `O(bytes.len())`. pub fn from_bytes(bytes: &[u8]) -> Result<&Self, NameError> { // Without the last byte, this should be a relative name. let (root, rel_name) = bytes.split_last().ok_or(NameError)?; @@ -72,6 +74,8 @@ impl Name { /// /// The name containing all but the first label is returned. If this is a /// root name, [`None`] is returned. + /// + /// Runtime: `O(1)`. pub fn parent(&self) -> Option<&Self> { if self.is_root() { return None; @@ -89,6 +93,8 @@ impl Name { /// If this is a root name, an empty relative name is returned. /// /// This is equivalent to `self.strip_suffix(Name::ROOT).unwrap()`. + /// + /// Runtime: `O(1)`. pub fn without_root(&self) -> &RelName { let bytes = &self.as_bytes()[..self.len() - 1]; // SAFETY: A slice of labels (as from 'self') is a relative name. @@ -96,19 +102,24 @@ impl Name { } /// Whether this name starts with a particular relative name. - pub fn starts_with(&self, that: &RelName) -> bool { - if self.len() < that.len() { + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. + pub fn starts_with(&self, prefix: &RelName) -> bool { + if self.len() < prefix.len() { return false; } // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) + self.as_bytes()[..prefix.len()] + .eq_ignore_ascii_case(prefix.as_bytes()) } /// Whether this name ends with a particular absolute name. - pub fn ends_with(&self, that: &Self) -> bool { - if self.len() < that.len() { + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. + pub fn ends_with(&self, suffix: &Self) -> bool { + if self.len() < suffix.len() { return false; } @@ -117,7 +128,7 @@ impl Name { // at a valid label boundary. let mut index = 0usize; - let offset = self.len() - that.len(); + let offset = self.len() - suffix.len(); while index < offset { index += 1 + self.0[index] as usize; } @@ -128,7 +139,7 @@ impl Name { // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) + self.as_bytes()[offset..].eq_ignore_ascii_case(suffix.as_bytes()) } } @@ -137,6 +148,8 @@ impl Name { /// /// If this is the root name, [`None`] is returned. The returned label will /// always be non-empty. + /// + /// Runtime: `O(1)`. pub fn split_first(&self) -> Option<(&Label, &Self)> { if self.is_root() { return None; @@ -158,6 +171,8 @@ impl Name { /// If this name has the given prefix (see [`Self::starts_with()`]), the /// rest of the name without the prefix is returned. Otherwise, [`None`] is /// returned. + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { if self.starts_with(prefix) { let bytes = &self.as_bytes()[prefix.len()..]; @@ -176,6 +191,8 @@ impl Name { /// If this name has the given suffix (see [`Self::ends_with()`]), the rest /// of the name without the suffix is returned. Otherwise, [`None`] is /// returned. + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { if self.ends_with(suffix) { let bytes = &self.as_bytes()[..self.len() - suffix.len()]; @@ -192,6 +209,8 @@ impl Name { /// Canonicalize this domain name. /// /// All uppercase ASCII characters in the name will be lowercased. + /// + /// Runtime: `O(self.len())`. pub fn canonicalize(&mut self) { // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the entire byte string as ASCII. @@ -200,6 +219,12 @@ impl Name { } impl PartialEq for Name { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. fn eq(&self, that: &Self) -> bool { // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the entire byte string as ASCII. @@ -210,12 +235,26 @@ impl PartialEq for Name { impl Eq for Name {} impl PartialOrd for Name { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// + /// Runtime: `O(self.len() + that.len())`. fn partial_cmp(&self, that: &Self) -> Option { Some(Ord::cmp(self, that)) } } impl Ord for Name { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// + /// Runtime: `O(self.len() + that.len())`. fn cmp(&self, that: &Self) -> cmp::Ordering { // We want to find a shared suffix between the two names, and the labels // immediately before that shared suffix. However, we can't determine @@ -275,6 +314,16 @@ impl Ord for Name { } impl Hash for Name { + /// Hash this label by its canonical value. + /// + /// The hasher is provided with the labels in this name with ASCII + /// characters lowercased. Each label is preceded by its length as `u8`. + /// + /// The same scheme is used by [`RelName`] and [`Label`], so a tuple of any + /// of these types will have the same hash as the concatenation of the + /// labels. + /// + /// Runtime: `O(self.len())`. fn hash(&self, state: &mut H) { // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since // they are always less than 64. As such, we don't need to iterate over @@ -290,6 +339,7 @@ impl Hash for Name { } impl AsRef<[u8]> for Name { + /// The bytes in the name in the wire format. fn as_ref(&self) -> &[u8] { &self.0 } diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 0a2d80c0b..cea6c0fc9 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -31,6 +31,8 @@ impl Label { /// Try converting a byte string into a [`Label`]. /// /// If the byte string is too long, an error is returned. + /// + /// Runtime: `O(bytes.len())`. pub fn from_bytes(bytes: &[u8]) -> Result<&Self, LabelError> { if bytes.len() > Self::MAX_SIZE { // The label was too long to be used. @@ -46,6 +48,8 @@ impl Label { /// of the given byte string. If a valid label cannot be extracted, or the /// byte string is simply empty, an error is returned. The extracted label /// and the remainder of the byte string are returned. + /// + /// Runtime: `O(1)`. pub fn split_off(bytes: &[u8]) -> Result<(&Self, &[u8]), LabelError> { let (&length, bytes) = bytes.split_first().ok_or(LabelError)?; if length < 64 && bytes.len() >= length as usize { @@ -81,12 +85,20 @@ impl Label { /// Canonicalize this label. /// /// All uppercase ASCII characters in the label will be lowercased. + /// + /// Runtime: `O(self.len())`. pub fn canonicalize(&mut self) { self.0.make_ascii_lowercase() } } impl PartialEq for Label { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. fn eq(&self, that: &Self) -> bool { self.0.eq_ignore_ascii_case(&that.0) } @@ -95,12 +107,24 @@ impl PartialEq for Label { impl Eq for Label {} impl PartialOrd for Label { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. fn partial_cmp(&self, that: &Self) -> Option { Some(Ord::cmp(self, that)) } } impl Ord for Label { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. fn cmp(&self, that: &Self) -> cmp::Ordering { let this_bytes = self.as_bytes().iter().copied(); let that_bytes = that.as_bytes().iter().copied(); @@ -113,6 +137,16 @@ impl Ord for Label { } impl Hash for Label { + /// Hash this label by its canonical value. + /// + /// The hasher is provided with the labels in this name with ASCII + /// characters lowercased. Each label is preceded by its length as `u8`. + /// + /// The same scheme is used by [`Name`] and [`RelName`], so a tuple of any + /// of these types will have the same hash as the concatenation of the + /// labels. + /// + /// Runtime: `O(self.len())`. fn hash(&self, state: &mut H) { // Individual labels and names should hash in the same way. state.write_u8(self.len() as u8); @@ -127,6 +161,7 @@ impl Hash for Label { } impl AsRef<[u8]> for Label { + /// The raw bytes in this name, with no length octet. fn as_ref(&self) -> &[u8] { &self.0 } diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs index c2d20fdcb..5764c8e97 100644 --- a/src/base/new_name/relative.rs +++ b/src/base/new_name/relative.rs @@ -23,6 +23,8 @@ impl RelName { /// /// The byte string is confirmed to be correctly encoded in the wire format. /// If it is not properly encoded, an error is returned. + /// + /// Runtime: `O(bytes.len())`. pub fn from_bytes(bytes: &[u8]) -> Result<&Self, RelNameError> { if bytes.len() + 1 > Name::MAX_SIZE { // This can never become an absolute domain name. @@ -76,6 +78,8 @@ impl RelName { /// /// The name containing all but the first label is returned. If there are /// no remaining labels, [`None`] is returned. + /// + /// Runtime: `O(1)`. pub fn parent(&self) -> Option<&Self> { if self.is_empty() { return None; @@ -89,19 +93,24 @@ impl RelName { } /// Whether this name starts with a particular relative name. - pub fn starts_with(&self, that: &RelName) -> bool { - if self.len() < that.len() { + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. + pub fn starts_with(&self, prefix: &RelName) -> bool { + if self.len() < prefix.len() { return false; } // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[..that.len()].eq_ignore_ascii_case(that.as_bytes()) + self.as_bytes()[..prefix.len()] + .eq_ignore_ascii_case(prefix.as_bytes()) } /// Whether this name ends with a particular relative name. - pub fn ends_with(&self, that: &Self) -> bool { - if self.len() < that.len() { + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. + pub fn ends_with(&self, suffix: &Self) -> bool { + if self.len() < suffix.len() { return false; } @@ -110,7 +119,7 @@ impl RelName { // at a valid label boundary. let mut index = 0usize; - let offset = self.len() - that.len(); + let offset = self.len() - suffix.len(); while index < offset { index += 1 + self.0[index] as usize; } @@ -121,7 +130,7 @@ impl RelName { // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the byte strings as ASCII. - self.as_bytes()[offset..].eq_ignore_ascii_case(that.as_bytes()) + self.as_bytes()[offset..].eq_ignore_ascii_case(suffix.as_bytes()) } } @@ -130,6 +139,8 @@ impl RelName { /// /// If the name is empty, [`None`] is returned. The returned label will /// always be non-empty. + /// + /// Runtime: `O(1)`. pub fn split_first(&self) -> Option<(&Label, &Self)> { if self.is_empty() { return None; @@ -151,6 +162,8 @@ impl RelName { /// If this name has the given prefix (see [`Self::starts_with()`]), the /// rest of the name without the prefix is returned. Otherwise, [`None`] is /// returned. + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { if self.starts_with(prefix) { let bytes = &self.as_bytes()[prefix.len()..]; @@ -169,6 +182,8 @@ impl RelName { /// If this name has the given suffix (see [`Self::ends_with()`]), the rest /// of the name without the suffix is returned. Otherwise, [`None`] is /// returned. + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { if self.ends_with(suffix) { let bytes = &self.as_bytes()[..self.len() - suffix.len()]; @@ -185,6 +200,8 @@ impl RelName { /// Canonicalize this domain name. /// /// All uppercase ASCII characters in the name will be lowercased. + /// + /// Runtime: `O(self.len())`. pub fn canonicalize(&mut self) { // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the entire byte string as ASCII. @@ -193,6 +210,12 @@ impl RelName { } impl PartialEq for RelName { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. fn eq(&self, that: &Self) -> bool { // Label lengths are never ASCII characters, because they start from // byte value 65. So we can treat the entire byte string as ASCII. @@ -203,6 +226,15 @@ impl PartialEq for RelName { impl Eq for RelName {} impl Hash for RelName { + /// Hash this label by its canonical value. + /// + /// The hasher is provided with the labels in this name with ASCII + /// characters lowercased. Each label is preceded by its length as `u8`. + /// + /// The same scheme is used by [`Name`] and [`Label`], so a tuple of any of + /// these types will have the same hash as the concatenation of the labels. + /// + /// Runtime: `O(self.len())`. fn hash(&self, state: &mut H) { // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since // they are always less than 64. As such, we don't need to iterate over @@ -218,6 +250,7 @@ impl Hash for RelName { } impl AsRef<[u8]> for RelName { + /// The bytes in the name in the wire format. fn as_ref(&self) -> &[u8] { &self.0 } From c4b1b9ff1b4bbbd087447756f73363586a120ce8 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 3 Oct 2024 18:48:09 +0200 Subject: [PATCH 05/21] [base/new_name] Augment error types --- src/base/new_name/absolute.rs | 12 +++++++++++- src/base/new_name/label.rs | 12 +++++++++++- src/base/new_name/relative.rs | 15 ++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index 33a75f4c8..e9ab633e8 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -1,5 +1,5 @@ use core::{ - cmp, + cmp, fmt, hash::{Hash, Hasher}, iter, }; @@ -346,4 +346,14 @@ impl AsRef<[u8]> for Name { } /// An error in costructing a [`Name`]. +#[derive(Clone, Debug)] pub struct NameError; + +impl fmt::Display for NameError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("could not parse an absolute domain name") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for NameError {} diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index cea6c0fc9..337cbd0c2 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -1,5 +1,5 @@ use core::{ - cmp, + cmp, fmt, hash::{Hash, Hasher}, iter, }; @@ -168,4 +168,14 @@ impl AsRef<[u8]> for Label { } /// An error in constructing a [`Label`]. +#[derive(Clone, Debug)] pub struct LabelError; + +impl fmt::Display for LabelError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("could not parse a domain name label") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for LabelError {} diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs index 5764c8e97..f1267578a 100644 --- a/src/base/new_name/relative.rs +++ b/src/base/new_name/relative.rs @@ -1,4 +1,7 @@ -use core::hash::{Hash, Hasher}; +use core::{ + fmt, + hash::{Hash, Hasher}, +}; use super::{Label, Name}; @@ -257,4 +260,14 @@ impl AsRef<[u8]> for RelName { } /// An error in constructing a [`RelName`]. +#[derive(Clone, Debug)] pub struct RelNameError; + +impl fmt::Display for RelNameError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("could not parse a relative domain name") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for RelNameError {} From 06ac4d239fcd45b9dab370fe3d3e0e2106b1e391 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Tue, 8 Oct 2024 13:02:38 +0200 Subject: [PATCH 06/21] [new_name/label] Add 'is_internationalized()' --- src/base/new_name/label.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 337cbd0c2..36de8a5f8 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -79,6 +79,15 @@ impl Label { pub const fn as_bytes(&self) -> &[u8] { &self.0 } + + /// Whether this is an internationalized label. + /// + /// If the label begins with the ACE (ASCII Compatible Encoding) prefix + /// `xn--`, it is assumed to be a Unicode string encoded into ASCII using + /// the Nameprep and Punycode algorithms. + pub fn is_internationalized(&self) -> bool { + self.as_bytes().starts_with(b"xn--") + } } impl Label { From 68d2a5cc25aa64beb8658f4e46462b979da4eb17 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Tue, 8 Oct 2024 13:02:57 +0200 Subject: [PATCH 07/21] [new_name/absolute] fix rustdoc typo --- src/base/new_name/absolute.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index e9ab633e8..db75df40c 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -345,7 +345,7 @@ impl AsRef<[u8]> for Name { } } -/// An error in costructing a [`Name`]. +/// An error in constructing a [`Name`]. #[derive(Clone, Debug)] pub struct NameError; From 93aee1fcf1e953e4fe343cf430b3f1a45b9065d2 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Tue, 8 Oct 2024 13:03:09 +0200 Subject: [PATCH 08/21] [new_name] Implement 'NameBuilder' Unlike the existing 'NameBuilder', this type uses a fixed-size buffer to write the name into. This results in simpler code and it should be more efficient. It provides simple methods to extract domain names by borrowing from the buffer instead of allocating. This is a rewrite of . --- src/base/new_name/builder.rs | 383 +++++++++++++++++++++++++++++++++++ src/base/new_name/mod.rs | 3 + 2 files changed, 386 insertions(+) create mode 100644 src/base/new_name/builder.rs diff --git a/src/base/new_name/builder.rs b/src/base/new_name/builder.rs new file mode 100644 index 000000000..106b2efb6 --- /dev/null +++ b/src/base/new_name/builder.rs @@ -0,0 +1,383 @@ +use super::{Label, Name, NameError, RelName}; + +/// An incremental builder for domain names. +/// +/// This type can be used to build up a domain name from a sequence of labels or +/// even simple bytes. It can be used to concatenate or modify domain names. +/// +/// The name is written into a 256-byte buffer. This buffer can be placed on +/// the stack as it will not grow dynamically. By requiring the buffer to have +/// a fixed size, the builder is simpler and faster. +/// +/// # Building Labels +/// +/// The builder may be in the middle of constructing a label. It reaches this +/// state using [`begin_label()`], it must be in it to call [`write_slice()`], +/// and it exits this state using [`end_label()`]. At initialization, no label +/// is being built, so [`begin_label()`] must be called. +/// +/// [`begin_label()`]: Self::begin_label() +/// [`write_slice()`]: Self::write_slice() +/// [`end_label()`]: Self::end_label() +/// +/// Importantly, the root label (which is empty) cannot be ended explicitly. To +/// write a root label, begin a new label by calling [`begin_label()`] and then +/// do not call [`end_label()`], directly going to [`get_name()`]. +/// +/// [`get_name()`]: Self::get_name() +/// +/// When the builder is not in the middle of a label, convenience methods like +/// [`write_label()`], [`write_rel_name()`], and [`write_name()`] are available. +/// +/// [`write_label()`]: Self::write_label() +/// [`write_rel_name()`]: Self::write_rel_name() +/// [`write_name()`]: Self::write_name() +#[derive(Clone, Default)] +pub struct NameBuilder { + /// The offset the next byte will be written to. + /// + /// Invariants: + /// + /// - `write_offset <= 255` + write_offset: u8, + + /// The offset of the current label. + /// + /// This is the position of the length byte of the current label. + /// + /// Invariants: + /// + /// - `label_offset < 255` + /// - `label_offset <= write_offset` + /// - if `label_offset < write_offset`: + /// - `buffer[label_offset] == 0` + /// - `write_offset - label_offset <= 64` + label_offset: u8, + + /// The name being constructed. + buffer: Buffer, +} + +/// # Initialization +impl NameBuilder { + /// Construct a new [`NameBuilder`] over the given buffer. + /// + /// Any existing contents of the buffer will be overwritten by the builder + /// (either upon this function call or later). They should not be relied + /// upon. + #[must_use] + pub const fn new(buffer: Buffer) -> Self { + // TODO: Do we want to zero the buffer already? Check benchmarks. + + Self { + write_offset: 0, + label_offset: 0, + buffer, + } + } +} + +/// # Inspection +impl> NameBuilder { + /// Whether a label is currently being built. + /// + /// This is true in the time between [`begin_label()`] and [`end_label()`]. + /// + /// [`begin_label()`]: Self::begin_label() + /// [`end_label()`]: Self::end_label() + pub fn mid_label(&self) -> bool { + self.label_offset < self.write_offset + } + + /// The label currently being built, if any. + pub fn cur_label(&self) -> Option<&Label> { + if self.label_offset < self.write_offset { + let start = self.label_offset as usize + 1; + let end = self.write_offset as usize; + let label = &self.buffer.as_ref()[start..end]; + // SAFETY: The label is built correctly. + Some(unsafe { Label::from_bytes_unchecked(label) }) + } else { + None + } + } + + /// The length of the name built this far. + /// + /// This does not include the length of any partially-built label. + pub fn len(&self) -> usize { + self.write_offset as usize + } + + /// The length of the name built this far. + /// + /// This includes the length of any partially-built label. + pub fn total_len(&self) -> usize { + self.write_offset as usize + } + + /// The name built thus far. + /// + /// This does not include any partially-built label. + pub fn cur_name(&self) -> &RelName { + let name = &self.buffer.as_ref()[..self.label_offset as usize]; + // SAFETY: The name is built correctly. + unsafe { RelName::from_bytes_unchecked(name) } + } +} + +/// # Construction +impl> NameBuilder { + /// Begin a new label. + /// + /// A length byte for the label is allocated in the buffer. It is left + /// uninitialized -- it will be written to once the entire label is ready. + /// + /// # Panics + /// + /// Panics if a label is already being built. + pub fn begin_label(&mut self) { + assert!( + self.label_offset == self.write_offset, + "begin_label() was called before a previous label was ended" + ); + + // In case of the root label, 'end_label()' will not be called; we need + // to set the length byte to 0 right now as we won't have a chance to do + // it later. + self.buffer.as_mut()[self.write_offset as usize] = 0; + + // Since 'label_offset < 255', this will not overflow. + self.write_offset += 1; + } + + /// Write a slice to a label. + /// + /// # Errors + /// + /// Fails if the bytes being written would make the label too long (64 bytes + /// or more) or the entire name too long (256 bytes or more, including the + /// 1-byte root label, which will follow the current bytes). + /// + /// # Panics + /// + /// Panics if no label is being built. + pub fn write_slice(&mut self, data: &[u8]) -> Result<(), OverlongError> { + assert!( + self.label_offset < self.write_offset, + "begin_label() must be called before using write_slice()" + ); + + if self.write_offset as usize + data.len() + >= self.label_offset as usize + 1 + 64 + { + // The label would become 64 bytes or larger. + return Err(OverlongError); + } else if self.write_offset as usize + data.len() + 1 > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError); + } + + let buffer = &mut self.buffer.as_mut()[self.write_offset as usize..]; + buffer[..data.len()].copy_from_slice(data); + self.write_offset += data.len() as u8; + + Ok(()) + } + + /// End a label being built. + /// + /// The length byte for the label will be updated. + /// + /// This must not be called on the root label. + /// + /// # Errors + /// + /// Fails if the label being built is empty. + /// + /// # Panics + /// + /// Panics if no label is being built. + pub fn end_label(&mut self) -> Result<(), EmptyLabelError> { + assert!( + self.label_offset < self.write_offset, + "begin_label() must be called before using end_label()" + ); + + if self.write_offset <= self.label_offset + 1 { + return Err(EmptyLabelError); + } + + let len = self.write_offset - (self.label_offset + 1); + self.buffer.as_mut()[self.label_offset as usize] = len; + self.label_offset = self.write_offset; + + Ok(()) + } +} + +/// # Convenience Methods +impl> NameBuilder { + /// Append a whole label to the name. + /// + /// This will start a new label, write the provided label into the buffer, + /// and complete it immediately. It is convenient to use if a whole label + /// is to be written, rather than being built incrementally. + /// + /// # Errors + /// + /// Fails if the label is empty or if adding the label would make the domain + /// name too large. + /// + /// # Panics + /// + /// Panics if a label was already being built. + pub fn write_label(&mut self, label: &Label) -> Result<(), BuildError> { + assert!( + self.label_offset == self.write_offset, + "write_label() was called before a previous label was ended" + ); + + if self.write_offset as usize + 1 + label.len() + 1 > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError.into()); + } + + let buffer = &mut self.buffer.as_mut()[self.write_offset as usize..]; + buffer[0] = label.len() as u8; + buffer[1..1 + label.len()].copy_from_slice(label.as_bytes()); + self.write_offset += 1 + label.len() as u8; + self.label_offset += 1 + label.len() as u8; + + Ok(()) + } + + /// Append a relative name to the name. + /// + /// # Errors + /// + /// Fails if appending the given name would make the domain name too large. + /// + /// # Panics + /// + /// Panics if a label was already being built. + pub fn write_rel_name( + &mut self, + name: &RelName, + ) -> Result<(), OverlongError> { + assert!( + self.label_offset == self.write_offset, + "write_rel_name() was called before a previous label was ended" + ); + + if self.write_offset as usize + name.len() + 1 > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError); + } + + self.buffer.as_mut()[self.write_offset as usize..][..name.len()] + .copy_from_slice(name.as_bytes()); + self.write_offset += name.len() as u8; + self.label_offset += name.len() as u8; + + Ok(()) + } + + /// Append an absolute name to the name. + /// + /// # Errors + /// + /// Fails if appending the given name would make the domain name too large. + /// + /// # Panics + /// + /// Panics if a label was already being built. + pub fn write_name(&mut self, name: &Name) -> Result<(), OverlongError> { + assert!( + self.label_offset == self.write_offset, + "write_name() was called before a previous label was ended" + ); + + if self.write_offset as usize + name.len() > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError); + } + + self.buffer.as_mut()[self.write_offset as usize..][..name.len()] + .copy_from_slice(name.as_bytes()); + self.write_offset += name.len() as u8; + self.label_offset += name.len() as u8 - 1; + + Ok(()) + } +} + +/// # Extraction +impl> NameBuilder { + /// Extract an absolute domain name. + /// + /// # Errors + /// + /// If a root label is not present, a [`NameError`] is returned. + /// + /// # Panics + /// + /// Panics if a label (except the root label) is still being built. + pub fn get_name(&self) -> Result<&Name, NameError> { + assert!( + self.write_offset <= self.label_offset + 1, + "get_name() was called before a previous label was ended" + ); + + if self.write_offset != self.label_offset + 1 { + return Err(NameError); + } + + let name = &self.buffer.as_ref()[..self.write_offset as usize]; + // SAFETY: The name is built correctly. + Ok(unsafe { Name::from_bytes_unchecked(name) }) + } + + /// Extract a relative domain name. + /// + /// # Panics + /// + /// Panics if a label (including the root label) is still being built. + pub fn get_rel_name(&self) -> &RelName { + assert!( + self.write_offset <= self.label_offset + 1, + "get_rel_name() was called before a previous label was ended" + ); + + let name = &self.buffer.as_ref()[..self.write_offset as usize]; + // SAFETY: The name is built correctly. + unsafe { RelName::from_bytes_unchecked(name) } + } +} + +/// An error in building a domain name. +pub enum BuildError { + /// A domain name or label was too long. + Overlong(OverlongError), + + /// A (non-root) label was empty. + EmptyLabel(EmptyLabelError), +} + +impl From for BuildError { + fn from(value: OverlongError) -> Self { + Self::Overlong(value) + } +} + +impl From for BuildError { + fn from(value: EmptyLabelError) -> Self { + Self::EmptyLabel(value) + } +} + +/// A domain name or label was too long. +pub struct OverlongError; + +/// A (non-root) label was empty. +pub struct EmptyLabelError; diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index 2a1a4dd1e..00ca2ac34 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -13,3 +13,6 @@ pub use relative::{RelName, RelNameError}; mod label; pub use label::{Label, LabelError}; + +mod builder; +pub use builder::NameBuilder; From 5954d7a26d36064be705892e960336a8401695b5 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 10 Oct 2024 11:09:29 +0200 Subject: [PATCH 09/21] [base/new_name] impl conversion traits --- src/base/new_name/absolute.rs | 14 ++++++++++++++ src/base/new_name/label.rs | 14 ++++++++++++++ src/base/new_name/relative.rs | 28 ++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index db75df40c..e3efdcdcc 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -345,6 +345,20 @@ impl AsRef<[u8]> for Name { } } +impl<'a> TryFrom<&'a [u8]> for &'a Name { + type Error = NameError; + + fn try_from(bytes: &'a [u8]) -> Result { + Name::from_bytes(bytes) + } +} + +impl<'a> From<&'a Name> for &'a [u8] { + fn from(name: &'a Name) -> Self { + name.as_bytes() + } +} + /// An error in constructing a [`Name`]. #[derive(Clone, Debug)] pub struct NameError; diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 36de8a5f8..3e7950e57 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -176,6 +176,20 @@ impl AsRef<[u8]> for Label { } } +impl<'a> TryFrom<&'a [u8]> for &'a Label { + type Error = LabelError; + + fn try_from(bytes: &'a [u8]) -> Result { + Label::from_bytes(bytes) + } +} + +impl<'a> From<&'a Label> for &'a [u8] { + fn from(label: &'a Label) -> Self { + label.as_bytes() + } +} + /// An error in constructing a [`Label`]. #[derive(Clone, Debug)] pub struct LabelError; diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs index f1267578a..81c3ee8f6 100644 --- a/src/base/new_name/relative.rs +++ b/src/base/new_name/relative.rs @@ -59,6 +59,14 @@ impl RelName { // SAFETY: 'bytes' has been confirmed to be correctly encoded. Ok(unsafe { Self::from_bytes_unchecked(bytes) }) } + + /// Treat a single label as a [`RelName`]. + /// + /// Runtime: `O(1)`. + pub fn from_label(label: &Label) -> &Self { + // SAFETY: 'bytes' is a valid label and always fits within a name. + unsafe { Self::from_bytes_unchecked(label.as_bytes()) } + } } impl RelName { @@ -259,6 +267,26 @@ impl AsRef<[u8]> for RelName { } } +impl<'a> From<&'a Label> for &'a RelName { + fn from(label: &'a Label) -> Self { + RelName::from_label(label) + } +} + +impl<'a> TryFrom<&'a [u8]> for &'a RelName { + type Error = RelNameError; + + fn try_from(bytes: &'a [u8]) -> Result { + RelName::from_bytes(bytes) + } +} + +impl<'a> From<&'a RelName> for &'a [u8] { + fn from(name: &'a RelName) -> Self { + name.as_bytes() + } +} + /// An error in constructing a [`RelName`]. #[derive(Clone, Debug)] pub struct RelNameError; From 97030db7d89c466b16366aeb5eecd42afda25f67 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 10 Oct 2024 11:10:49 +0200 Subject: [PATCH 10/21] [base/new_name/label] fix missing doc links --- src/base/new_name/label.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 3e7950e57..634bcff19 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -155,6 +155,9 @@ impl Hash for Label { /// of these types will have the same hash as the concatenation of the /// labels. /// + /// [`Name`]: super::Name + /// [`RelName`]: super::RelName + /// /// Runtime: `O(self.len())`. fn hash(&self, state: &mut H) { // Individual labels and names should hash in the same way. From 148dcfe3dea6b4e1b2f33b3e5431efe17fbd0f9c Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 10 Oct 2024 12:14:12 +0200 Subject: [PATCH 11/21] [base/new_name] Add 'Labels' for iteration --- src/base/new_name/absolute.rs | 21 ++++++++++++++++++- src/base/new_name/labels.rs | 38 +++++++++++++++++++++++++++++++++++ src/base/new_name/mod.rs | 3 +++ src/base/new_name/relative.rs | 19 +++++++++++++++++- 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 src/base/new_name/labels.rs diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index e3efdcdcc..a49d1287c 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -4,7 +4,7 @@ use core::{ iter, }; -use super::{Label, RelName}; +use super::{Label, Labels, RelName}; /// An absolute domain name. #[repr(transparent)] @@ -101,6 +101,16 @@ impl Name { unsafe { RelName::from_bytes_unchecked(bytes) } } + /// The labels in this name. + /// + /// The root label is included in the iterator. + /// + /// Runtime: `O(1)`. Each step of the iterator has runtime `O(1)` too. + pub const fn labels(&self) -> Labels<'_> { + // SAFETY: This is a valid absolute name. + unsafe { Labels::from_bytes_unchecked(self.as_bytes()) } + } + /// Whether this name starts with a particular relative name. /// /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. @@ -359,6 +369,15 @@ impl<'a> From<&'a Name> for &'a [u8] { } } +impl<'a> IntoIterator for &'a Name { + type Item = &'a Label; + type IntoIter = Labels<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.labels() + } +} + /// An error in constructing a [`Name`]. #[derive(Clone, Debug)] pub struct NameError; diff --git a/src/base/new_name/labels.rs b/src/base/new_name/labels.rs new file mode 100644 index 000000000..cd1a7d209 --- /dev/null +++ b/src/base/new_name/labels.rs @@ -0,0 +1,38 @@ +use super::Label; + +/// An iterator over the labels in a name. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Labels<'a> { + /// The absolute or relative name. + name: &'a [u8], +} + +impl<'a> Labels<'a> { + /// Assume a byte string contains valid labels. + /// + /// # Safety + /// + /// The byte string must be a valid absolute or relative domain name. + pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { + Self { name: bytes } + } + + /// Retrieve the remaining bytes to be iterated over. + pub const fn remaining(&self) -> &'a [u8] { + self.name + } +} + +impl<'a> Iterator for Labels<'a> { + type Item = &'a Label; + + fn next(&mut self) -> Option { + // Based on 'Label::split_off()'. + let (&size, name) = self.name.split_first()?; + let (label, name) = name.split_at(size as usize); + self.name = name; + + // SAFETY: 'label' is from a valid name. + Some(unsafe { Label::from_bytes_unchecked(label) }) + } +} diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index 00ca2ac34..6d53757c4 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -14,5 +14,8 @@ pub use relative::{RelName, RelNameError}; mod label; pub use label::{Label, LabelError}; +mod labels; +pub use labels::Labels; + mod builder; pub use builder::NameBuilder; diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs index 81c3ee8f6..d2a3ec723 100644 --- a/src/base/new_name/relative.rs +++ b/src/base/new_name/relative.rs @@ -3,7 +3,7 @@ use core::{ hash::{Hash, Hasher}, }; -use super::{Label, Name}; +use super::{Label, Labels, Name}; /// A relative domain name. #[repr(transparent)] @@ -103,6 +103,14 @@ impl RelName { Some(unsafe { Self::from_bytes_unchecked(bytes) }) } + /// The labels in this name. + /// + /// Runtime: `O(1)`. Each step of the iterator has runtime `O(1)` too. + pub const fn labels(&self) -> Labels<'_> { + // SAFETY: This is a valid relative name. + unsafe { Labels::from_bytes_unchecked(self.as_bytes()) } + } + /// Whether this name starts with a particular relative name. /// /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. @@ -287,6 +295,15 @@ impl<'a> From<&'a RelName> for &'a [u8] { } } +impl<'a> IntoIterator for &'a RelName { + type Item = &'a Label; + type IntoIter = Labels<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.labels() + } +} + /// An error in constructing a [`RelName`]. #[derive(Clone, Debug)] pub struct RelNameError; From 4e2da526ec3371f169fb234f7321237f5f570a77 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 10 Oct 2024 12:15:14 +0200 Subject: [PATCH 12/21] [base/new_name/builder] Fix bug in 'len()' It was returning the value of 'total_len()'. Also fixed a clippy warning. --- src/base/new_name/builder.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/base/new_name/builder.rs b/src/base/new_name/builder.rs index 106b2efb6..61bf6e614 100644 --- a/src/base/new_name/builder.rs +++ b/src/base/new_name/builder.rs @@ -105,8 +105,9 @@ impl> NameBuilder { /// The length of the name built this far. /// /// This does not include the length of any partially-built label. + #[allow(clippy::len_without_is_empty)] pub fn len(&self) -> usize { - self.write_offset as usize + self.label_offset as usize } /// The length of the name built this far. From aa70192bebbe74e9faa7c96f7450316ccf154e0b Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 10 Oct 2024 18:03:48 +0200 Subject: [PATCH 13/21] [base/new_name] Implement 'UncertainName' I also fixed a few bugs in the existing name methods. --- src/base/new_name/absolute.rs | 4 +- src/base/new_name/mod.rs | 3 + src/base/new_name/relative.rs | 84 ++++++- src/base/new_name/uncertain.rs | 405 +++++++++++++++++++++++++++++++++ 4 files changed, 492 insertions(+), 4 deletions(-) create mode 100644 src/base/new_name/uncertain.rs diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index a49d1287c..9754265cc 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -166,7 +166,7 @@ impl Name { } let bytes = self.as_bytes(); - let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); + let (label, rest) = bytes[1..].split_at(bytes[0] as usize); // SAFETY: 'self' only contains valid labels. let label = unsafe { Label::from_bytes_unchecked(label) }; @@ -275,7 +275,7 @@ impl Ord for Name { let this_iter = self.as_bytes().iter().rev(); let that_iter = that.as_bytes().iter().rev(); let suffix = iter::zip(this_iter, that_iter) - .position(|(l, r)| l.eq_ignore_ascii_case(r)); + .position(|(l, r)| !l.eq_ignore_ascii_case(r)); if let Some(suffix) = suffix { // Iterate through the labels in both names until both have a tail diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index 6d53757c4..c4b3efa3f 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -11,6 +11,9 @@ pub use absolute::{Name, NameError}; mod relative; pub use relative::{RelName, RelNameError}; +mod uncertain; +pub use uncertain::{UncertainName, UncertainNameError}; + mod label; pub use label::{Label, LabelError}; diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs index d2a3ec723..b984e4aeb 100644 --- a/src/base/new_name/relative.rs +++ b/src/base/new_name/relative.rs @@ -1,6 +1,7 @@ use core::{ - fmt, + cmp, fmt, hash::{Hash, Hasher}, + iter, }; use super::{Label, Labels, Name}; @@ -166,7 +167,7 @@ impl RelName { } let bytes = self.as_bytes(); - let (label, rest) = bytes[1..].split_at(1 + bytes[0] as usize); + let (label, rest) = bytes[1..].split_at(bytes[0] as usize); // SAFETY: 'self' only contains valid labels. let label = unsafe { Label::from_bytes_unchecked(label) }; @@ -244,6 +245,85 @@ impl PartialEq for RelName { impl Eq for RelName {} +impl PartialOrd for RelName { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// + /// Runtime: `O(self.len() + that.len())`. + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for RelName { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// + /// Runtime: `O(self.len() + that.len())`. + fn cmp(&self, that: &Self) -> cmp::Ordering { + // We want to find a shared suffix between the two names, and the labels + // immediately before that shared suffix. However, we can't determine + // label boundaries when working backward. So, we find a shared suffix + // (even if it crosses partially between labels), then iterate through + // both names until we find their label boundaries up to the suffix. + + let this_iter = self.as_bytes().iter().rev(); + let that_iter = that.as_bytes().iter().rev(); + let suffix = iter::zip(this_iter, that_iter) + .position(|(l, r)| !l.eq_ignore_ascii_case(r)); + + if let Some(suffix) = suffix { + // Iterate through the labels in both names until both have a tail + // of equal size within the shared suffix we found. + + // SAFETY: At least one unequal byte exists in both names, and it + // cannot be the root label, so there must be at least one non-root + // label in both names. + let (mut this_head, mut this_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + let (mut that_head, mut that_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + + loop { + let (this_len, that_len) = (this_tail.len(), that_tail.len()); + + if this_len == that_len && this_len < suffix { + // We have found the shared suffix of labels. Now, we must + // have two unequal head labels; we compare them (ASCII case + // insensitively). + break Ord::cmp(this_head, that_head); + } + + // If one tail is longer than the other, it will be shortened. + // Any tail longer than the suffix will also be shortened. + + if this_len > that_len || this_len > suffix { + // SAFETY: 'this_tail' has strictly more than one byte. + (this_head, this_tail) = + unsafe { this_tail.split_first().unwrap_unchecked() }; + } + + if that_len > this_len || that_len > suffix { + // SAFETY: 'that_tail' has strictly more than one byte. + (that_head, that_tail) = + unsafe { that_tail.split_first().unwrap_unchecked() }; + } + } + } else { + // The shorter name is a suffix of the longer one. If the names are + // of equal length, they are equal; otherwise, the longer one has + // more labels, and is greater than the shorter one. + Ord::cmp(&self.len(), &that.len()) + } + } +} + impl Hash for RelName { /// Hash this label by its canonical value. /// diff --git a/src/base/new_name/uncertain.rs b/src/base/new_name/uncertain.rs new file mode 100644 index 000000000..586d192d1 --- /dev/null +++ b/src/base/new_name/uncertain.rs @@ -0,0 +1,405 @@ +use core::{ + cmp, fmt, + hash::{Hash, Hasher}, + iter, +}; + +use super::{Label, Labels, Name, RelName}; + +/// An absolute domain name. +#[repr(transparent)] +pub struct UncertainName([u8]); + +impl UncertainName { + /// The maximum size of an uncertain domain name in the wire format. + pub const MAX_SIZE: usize = 255; + + /// The root name. + pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[0u8]) }; +} + +impl UncertainName { + /// Assume a byte string is a valid [`UncertainName`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and within + /// the size restriction (255 bytes or fewer). If it is 255 bytes long, it + /// must end with a root label. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'UncertainName' is a 'repr(transparent)' wrapper around + // '[u8]', so casting a '[u8]' into an 'UncertainName' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`UncertainName`]. + /// + /// The byte string is confirmed to be correctly encoded in the wire format. + /// If it is not properly encoded, an error is returned. + /// + /// Runtime: `O(bytes.len())`. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, UncertainNameError> { + if bytes.len() > Name::MAX_SIZE { + // Absolute or relative, this domain name is too long. + return Err(UncertainNameError); + } + + // Iterate through labels in the name. + let mut index = 0usize; + while index < bytes.len() { + let length = bytes[index]; + if length == 0 { + // Assume this was the end of the name. + index += 1; + break; + } else if length >= 64 { + // An invalid label length (or a compression pointer). + return Err(UncertainNameError); + } else { + // This was the length of the label, excluding the length octet. + index += 1 + length as usize; + } + } + + // We must land exactly at the end of the name, otherwise there was an + // empty label in the middle of the name, or the previous label reported + // a length that was too long. + if index != bytes.len() { + return Err(UncertainNameError); + } + + // SAFETY: 'bytes' has been confirmed to be correctly encoded. + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } +} + +impl UncertainName { + /// The size of this name in the wire format. + pub const fn len(&self) -> usize { + self.0.len() + } + + /// Whether this name contains no labels at all. + pub const fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.len() == 1 + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// The parent of this name, if any. + /// + /// The name containing all but the first label is returned. If this is a + /// root name, [`None`] is returned. + /// + /// Runtime: `O(1)`. + pub fn parent(&self) -> Option<&Self> { + if self.is_empty() || self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let bytes = &bytes[1 + bytes[0] as usize..]; + + // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// The labels in this name. + /// + /// The root label is included in the iterator. + /// + /// Runtime: `O(1)`. Each step of the iterator has runtime `O(1)` too. + pub const fn labels(&self) -> Labels<'_> { + // SAFETY: This is a valid absolute or relative name. + unsafe { Labels::from_bytes_unchecked(self.as_bytes()) } + } + + /// Whether this name starts with a particular relative name. + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. + pub fn starts_with(&self, prefix: &RelName) -> bool { + if self.len() < prefix.len() { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[..prefix.len()] + .eq_ignore_ascii_case(prefix.as_bytes()) + } + + /// Whether this name ends with a particular absolute name. + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. + pub fn ends_with(&self, suffix: &Self) -> bool { + if self.len() < suffix.len() { + return false; + } + + // We want to compare the last bytes of the current name to the given + // candidate. To do so, we need to ensure that those last bytes start + // at a valid label boundary. + + let mut index = 0usize; + let offset = self.len() - suffix.len(); + while index < offset { + index += 1 + self.0[index] as usize; + } + + if index != offset { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[offset..].eq_ignore_ascii_case(suffix.as_bytes()) + } +} + +impl UncertainName { + /// Split this name into a label and the rest. + /// + /// If this is the root name, [`None`] is returned. The returned label will + /// always be non-empty. + /// + /// Runtime: `O(1)`. + pub fn split_first(&self) -> Option<(&Label, &Self)> { + if self.is_empty() || self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let (label, rest) = bytes[1..].split_at(bytes[0] as usize); + + // SAFETY: 'self' only contains valid labels. + let label = unsafe { Label::from_bytes_unchecked(label) }; + // SAFETY: 'rest' is 253 bytes or smaller and has valid labels. + let rest = unsafe { Self::from_bytes_unchecked(rest) }; + + Some((label, rest)) + } + + /// Strip a prefix from this name. + /// + /// If this name has the given prefix (see [`Self::starts_with()`]), the + /// rest of the name without the prefix is returned. Otherwise, [`None`] is + /// returned. + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. + pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { + if self.starts_with(prefix) { + let bytes = &self.as_bytes()[prefix.len()..]; + + // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' + // start with the same labels as 'prefix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Strip a suffix from this name. + /// + /// If this name has the given suffix (see [`Self::ends_with()`]), the rest + /// of the name without the suffix is returned. Otherwise, [`None`] is + /// returned. + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. + pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { + if self.ends_with(suffix) { + let bytes = &self.as_bytes()[..self.len() - suffix.len()]; + + // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' + // ended with the same labels as 'suffix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Canonicalize this domain name. + /// + /// All uppercase ASCII characters in the name will be lowercased. + /// + /// Runtime: `O(self.len())`. + pub fn canonicalize(&mut self) { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for UncertainName { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. + fn eq(&self, that: &Self) -> bool { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for UncertainName {} + +impl PartialOrd for UncertainName { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// Absolute domain names come before relative ones. + /// + /// Runtime: `O(self.len() + that.len())`. + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for UncertainName { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// Absolute domain names come before relative ones. + /// + /// Runtime: `O(self.len() + that.len())`. + fn cmp(&self, that: &Self) -> cmp::Ordering { + // We want to find a shared suffix between the two names, and the labels + // immediately before that shared suffix. However, we can't determine + // label boundaries when working backward. So, we find a shared suffix + // (even if it crosses partially between labels), then iterate through + // both names until we find their label boundaries up to the suffix. + + let this_iter = self.as_bytes().iter().rev(); + let that_iter = that.as_bytes().iter().rev(); + let suffix = iter::zip(this_iter, that_iter) + .position(|(l, r)| !l.eq_ignore_ascii_case(r)); + + if let Some(suffix) = suffix { + // Iterate through the labels in both names until both have a tail + // of equal size within the shared suffix we found. + + // SAFETY: At least one unequal byte exists in both names, and it + // cannot be the root label, so there must be at least one non-root + // label in both names. + let (mut this_head, mut this_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + let (mut that_head, mut that_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + + loop { + let (this_len, that_len) = (this_tail.len(), that_tail.len()); + + if this_len == that_len && this_len < suffix { + // We have found the shared suffix of labels. Now, we must + // have two unequal head labels; we compare them (ASCII case + // insensitively). + break Ord::cmp(this_head, that_head); + } + + // If one tail is longer than the other, it will be shortened. + // Any tail longer than the suffix will also be shortened. + + if this_len > that_len || this_len > suffix { + // SAFETY: 'this_tail' has strictly more than one byte. + (this_head, this_tail) = + unsafe { this_tail.split_first().unwrap_unchecked() }; + } + + if that_len > this_len || that_len > suffix { + // SAFETY: 'that_tail' has strictly more than one byte. + (that_head, that_tail) = + unsafe { that_tail.split_first().unwrap_unchecked() }; + } + } + } else { + // The shorter name is a suffix of the longer one. If the names are + // of equal length, they are equal; otherwise, the longer one has + // more labels, and is greater than the shorter one. + Ord::cmp(&self.len(), &that.len()) + } + } +} + +impl Hash for UncertainName { + /// Hash this label by its canonical value. + /// + /// The hasher is provided with the labels in this name with ASCII + /// characters lowercased. Each label is preceded by its length as `u8`. + /// + /// The same scheme is used by [`RelName`] and [`Label`], so a tuple of any + /// of these types will have the same hash as the concatenation of the + /// labels. + /// + /// Runtime: `O(self.len())`. + fn hash(&self, state: &mut H) { + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since + // they are always less than 64. As such, we don't need to iterate over + // the labels manually; we can just give them to the hasher as-is. + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + +impl AsRef<[u8]> for UncertainName { + /// The bytes in the name in the wire format. + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl<'a> TryFrom<&'a [u8]> for &'a UncertainName { + type Error = UncertainNameError; + + fn try_from(bytes: &'a [u8]) -> Result { + UncertainName::from_bytes(bytes) + } +} + +impl<'a> From<&'a UncertainName> for &'a [u8] { + fn from(name: &'a UncertainName) -> Self { + name.as_bytes() + } +} + +impl<'a> IntoIterator for &'a UncertainName { + type Item = &'a Label; + type IntoIter = Labels<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.labels() + } +} + +/// An error in constructing an [`UncertainName`]. +#[derive(Clone, Debug)] +pub struct UncertainNameError; + +impl fmt::Display for UncertainNameError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("could not parse an absolute/relative domain name") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for UncertainNameError {} From 2ca8a335b539183138755b099a395af41902f8d9 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Fri, 11 Oct 2024 15:13:14 +0200 Subject: [PATCH 14/21] [base/new_name] Try out a custom 'Octets' and 'Owned' --- src/base/new_name/absolute.rs | 20 ++- src/base/new_name/label.rs | 20 +++ src/base/new_name/mod.rs | 11 +- src/base/new_name/octets.rs | 216 +++++++++++++++++++++++++++++++++ src/base/new_name/relative.rs | 20 ++- src/base/new_name/uncertain.rs | 20 ++- 6 files changed, 300 insertions(+), 7 deletions(-) create mode 100644 src/base/new_name/octets.rs diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index 9754265cc..0aa8c689c 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -4,7 +4,7 @@ use core::{ iter, }; -use super::{Label, Labels, RelName}; +use super::{Label, Labels, Octets, Owned, RelName, SmallOctets}; /// An absolute domain name. #[repr(transparent)] @@ -228,6 +228,21 @@ impl Name { } } +unsafe impl Octets for Name { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + Self::from_bytes_unchecked(bytes) + } + + fn as_bytes(&self) -> &[u8] { + self.as_bytes() + } +} + +unsafe impl SmallOctets for Name where + Buffer: AsRef<[u8; 256]> + AsRef<[u8]> +{ +} + impl PartialEq for Name { /// Compare labels by their canonical value. /// @@ -378,6 +393,9 @@ impl<'a> IntoIterator for &'a Name { } } +/// An owned [`Name`]. +pub type OwnedName = Owned<[u8; 256], Name>; + /// An error in constructing a [`Name`]. #[derive(Clone, Debug)] pub struct NameError; diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 634bcff19..4cd7f7c3a 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -4,6 +4,8 @@ use core::{ iter, }; +use super::{Octets, Owned, SmallOctets}; + /// A label in a domain name. #[repr(transparent)] pub struct Label([u8]); @@ -101,6 +103,21 @@ impl Label { } } +unsafe impl Octets for Label { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + Label::from_bytes_unchecked(bytes) + } + + fn as_bytes(&self) -> &[u8] { + self.as_bytes() + } +} + +unsafe impl SmallOctets for Label where + Buffer: AsRef<[u8; 64]> + AsRef<[u8]> +{ +} + impl PartialEq for Label { /// Compare labels by their canonical value. /// @@ -193,6 +210,9 @@ impl<'a> From<&'a Label> for &'a [u8] { } } +/// An owned label. +pub type OwnedLabel = Owned<[u8; 64], Label>; + /// An error in constructing a [`Label`]. #[derive(Clone, Debug)] pub struct LabelError; diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index c4b3efa3f..c67e440f8 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -6,19 +6,22 @@ //! stored in reverse order, from innermost to outermost. mod absolute; -pub use absolute::{Name, NameError}; +pub use absolute::{Name, NameError, OwnedName}; mod relative; -pub use relative::{RelName, RelNameError}; +pub use relative::{OwnedRelName, RelName, RelNameError}; mod uncertain; -pub use uncertain::{UncertainName, UncertainNameError}; +pub use uncertain::{OwnedUncertainName, UncertainName, UncertainNameError}; mod label; -pub use label::{Label, LabelError}; +pub use label::{Label, LabelError, OwnedLabel}; mod labels; pub use labels::Labels; mod builder; pub use builder::NameBuilder; + +mod octets; +pub use octets::{Octets, Owned, SmallOctets}; diff --git a/src/base/new_name/octets.rs b/src/base/new_name/octets.rs new file mode 100644 index 000000000..158de0034 --- /dev/null +++ b/src/base/new_name/octets.rs @@ -0,0 +1,216 @@ +use core::{ + borrow::Borrow, + cmp::Ordering, + hash::{Hash, Hasher}, + marker::PhantomData, + ops::Deref, +}; + +/// A type backed by a byte string. +/// +/// # Safety +/// +/// A type `T` can implement `Octets` if and only if: +/// +/// - It has the same layout as `[u8]` or `[u8; N]`. +/// - It can be safely transmuted into a byte slice. +/// - It can be fallibly transmuted from a byte slice. +pub unsafe trait Octets { + /// Assume a byte string is a valid instance of `Self`. + /// + /// # Safety + /// + /// This function can only be called if `bytes` is known to be a valid + /// instance of `Self` -- for example, if it was the result of `as_bytes()`. + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self; + + /// Access the byte string underlying `Self`. + fn as_bytes(&self) -> &[u8]; +} + +unsafe impl Octets for [u8] { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + bytes + } + + fn as_bytes(&self) -> &[u8] { + self + } +} + +unsafe impl Octets for [u8; N] { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + bytes.try_into().unwrap_unchecked() + } + + fn as_bytes(&self) -> &[u8] { + self + } +} + +unsafe impl Octets for str { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + core::str::from_utf8_unchecked(bytes) + } + + fn as_bytes(&self) -> &[u8] { + self.as_bytes() + } +} + +/// A bytes-backed type which can be stored in a buffer. +/// +/// This trait should be implemented by types which can be stored in buffers of +/// some fixed size. Due to limitations in const generics, the size cannot be +/// expressed as an associated constant; implementing types should support any +/// buffer type that implements `AsRef<[u8; MAX_SIZE]>`. +/// +/// # Safety +/// +/// A type `T` can soundly implement `SmallOctets` if: +/// +/// - `Buffer` implements `AsRef<[u8; MAX_SIZE]>` for some `MAX_SIZE`. +/// - The size of `T` is always less than or equal to `MAX_SIZE`. +/// - `as_ref()` and `as_mut()` on `Buffer` always return the same slice. +pub unsafe trait SmallOctets>: Octets {} + +unsafe impl SmallOctets for [u8; N] where + Buffer: AsRef<[u8; N]> + AsRef<[u8]> +{ +} + +/// A byte string in a fixed-sized buffer. +pub struct Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + ?Sized, +{ + /// The underlying buffer. + buffer: Buffer, + + /// The size of the value, in bytes. + length: usize, + + /// The phantom representation of the value. + _value: PhantomData, +} + +impl Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + ?Sized, +{ + /// Copy a value into an owned buffer. + pub fn copy_from(value: &T) -> Self + where + Buffer: Default + AsMut<[u8]>, + { + let bytes = value.as_bytes(); + let length = bytes.len(); + let mut buffer = Buffer::default(); + buffer.as_mut()[..length].copy_from_slice(bytes); + + Self { + buffer, + length, + _value: PhantomData, + } + } +} + +impl Clone for Owned +where + Buffer: Clone + AsRef<[u8]>, + T: SmallOctets + ?Sized, +{ + fn clone(&self) -> Self { + Self { + buffer: self.buffer.clone(), + length: self.length, + _value: PhantomData, + } + } +} + +impl AsRef for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + AsRef + ?Sized, + U: ?Sized, +{ + fn as_ref(&self) -> &U { + let bytes = &self.buffer.as_ref()[..self.length]; + unsafe { T::from_bytes_unchecked(bytes) }.as_ref() + } +} + +impl Borrow for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + ?Sized, +{ + fn borrow(&self) -> &T { + let bytes = &self.buffer.as_ref()[..self.length]; + unsafe { T::from_bytes_unchecked(bytes) } + } +} + +impl PartialEq for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + PartialEq + ?Sized, +{ + fn eq(&self, other: &Self) -> bool { + **self == **other + } +} + +impl Eq for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + Eq + ?Sized, +{ +} + +impl PartialOrd for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + PartialOrd + ?Sized, +{ + fn partial_cmp(&self, other: &Self) -> Option { + (**self).partial_cmp(&**other) + } +} + +impl Ord for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + Ord + ?Sized, +{ + fn cmp(&self, other: &Self) -> Ordering { + (**self).cmp(&**other) + } +} + +impl Hash for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + Hash + ?Sized, +{ + fn hash(&self, state: &mut H) { + (**self).hash(state) + } +} + +impl Deref for Owned +where + Buffer: AsRef<[u8]>, + T: SmallOctets + ?Sized, +{ + type Target = T; + + fn deref(&self) -> &Self::Target { + let bytes = &self.buffer.as_ref()[..self.length]; + unsafe { T::from_bytes_unchecked(bytes) } + } +} diff --git a/src/base/new_name/relative.rs b/src/base/new_name/relative.rs index b984e4aeb..82a8edc9c 100644 --- a/src/base/new_name/relative.rs +++ b/src/base/new_name/relative.rs @@ -4,7 +4,7 @@ use core::{ iter, }; -use super::{Label, Labels, Name}; +use super::{Label, Labels, Name, Octets, Owned, SmallOctets}; /// A relative domain name. #[repr(transparent)] @@ -229,6 +229,21 @@ impl RelName { } } +unsafe impl Octets for RelName { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + Self::from_bytes_unchecked(bytes) + } + + fn as_bytes(&self) -> &[u8] { + self.as_bytes() + } +} + +unsafe impl SmallOctets for RelName where + Buffer: AsRef<[u8; 256]> + AsRef<[u8]> +{ +} + impl PartialEq for RelName { /// Compare labels by their canonical value. /// @@ -384,6 +399,9 @@ impl<'a> IntoIterator for &'a RelName { } } +/// An owned [`RelName`]. +pub type OwnedRelName = Owned<[u8; 256], RelName>; + /// An error in constructing a [`RelName`]. #[derive(Clone, Debug)] pub struct RelNameError; diff --git a/src/base/new_name/uncertain.rs b/src/base/new_name/uncertain.rs index 586d192d1..d7e8ba5d5 100644 --- a/src/base/new_name/uncertain.rs +++ b/src/base/new_name/uncertain.rs @@ -4,7 +4,7 @@ use core::{ iter, }; -use super::{Label, Labels, Name, RelName}; +use super::{Label, Labels, Name, Octets, Owned, RelName, SmallOctets}; /// An absolute domain name. #[repr(transparent)] @@ -239,6 +239,21 @@ impl UncertainName { } } +unsafe impl Octets for UncertainName { + unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + Self::from_bytes_unchecked(bytes) + } + + fn as_bytes(&self) -> &[u8] { + self.as_bytes() + } +} + +unsafe impl SmallOctets for UncertainName where + Buffer: AsRef<[u8; 256]> + AsRef<[u8]> +{ +} + impl PartialEq for UncertainName { /// Compare labels by their canonical value. /// @@ -391,6 +406,9 @@ impl<'a> IntoIterator for &'a UncertainName { } } +/// An owned [`UncertainName`]. +pub type OwnedUncertainName = Owned<[u8; 256], UncertainName>; + /// An error in constructing an [`UncertainName`]. #[derive(Clone, Debug)] pub struct UncertainNameError; From 6614676e94472b64a31ee9991480f9af0068c57f Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Mon, 21 Oct 2024 17:31:05 +0200 Subject: [PATCH 15/21] [new_name/label] Add 'WILDCARD' and 'is_wildcard()' --- src/base/new_name/label.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 4cd7f7c3a..9b09a9a8b 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -15,7 +15,10 @@ impl Label { pub const MAX_SIZE: usize = 63; /// The root label. - pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[]) }; + pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(b"") }; + + /// The wildcard label. + pub const WILDCARD: &Self = unsafe { Self::from_bytes_unchecked(b"*") }; } impl Label { @@ -71,6 +74,12 @@ impl Label { self.0.is_empty() } + /// Whether this is the wildcard label. + pub const fn is_wildcard(&self) -> bool { + // NOTE: 'self.0 == *b"*"' is not const. + self.0.len() == 1 && self.0[0] == b'*' + } + /// The size of this name in the wire format. #[allow(clippy::len_without_is_empty)] pub const fn len(&self) -> usize { From 7e995b47ffab84f6868ecdd22c9776974e71e833 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Mon, 28 Oct 2024 11:57:48 +0100 Subject: [PATCH 16/21] [base/new_name] Use 'static for const references --- src/base/new_name/absolute.rs | 3 ++- src/base/new_name/label.rs | 6 ++++-- src/base/new_name/uncertain.rs | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index 0aa8c689c..2bad779f7 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -15,7 +15,8 @@ impl Name { pub const MAX_SIZE: usize = 255; /// The root name. - pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[0u8]) }; + pub const ROOT: &'static Self = + unsafe { Self::from_bytes_unchecked(&[0u8]) }; } impl Name { diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 9b09a9a8b..9997154d1 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -15,10 +15,12 @@ impl Label { pub const MAX_SIZE: usize = 63; /// The root label. - pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(b"") }; + pub const ROOT: &'static Self = + unsafe { Self::from_bytes_unchecked(b"") }; /// The wildcard label. - pub const WILDCARD: &Self = unsafe { Self::from_bytes_unchecked(b"*") }; + pub const WILDCARD: &'static Self = + unsafe { Self::from_bytes_unchecked(b"*") }; } impl Label { diff --git a/src/base/new_name/uncertain.rs b/src/base/new_name/uncertain.rs index d7e8ba5d5..cfabe98ef 100644 --- a/src/base/new_name/uncertain.rs +++ b/src/base/new_name/uncertain.rs @@ -15,7 +15,8 @@ impl UncertainName { pub const MAX_SIZE: usize = 255; /// The root name. - pub const ROOT: &Self = unsafe { Self::from_bytes_unchecked(&[0u8]) }; + pub const ROOT: &'static Self = + unsafe { Self::from_bytes_unchecked(&[0u8]) }; } impl UncertainName { From 615c2ebadfa98ed8f7d0716f3193e596b2472228 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Mon, 28 Oct 2024 22:27:27 +0100 Subject: [PATCH 17/21] [new_name] Implement basic IDNA awareness An incomplete and untested Punycode decoder has also been added. --- src/base/new_name/idna.rs | 194 +++++++++++++++++++++++++++++++++++++ src/base/new_name/label.rs | 38 ++++++-- src/base/new_name/mod.rs | 2 + 3 files changed, 228 insertions(+), 6 deletions(-) create mode 100644 src/base/new_name/idna.rs diff --git a/src/base/new_name/idna.rs b/src/base/new_name/idna.rs new file mode 100644 index 000000000..0f11c7c20 --- /dev/null +++ b/src/base/new_name/idna.rs @@ -0,0 +1,194 @@ +//! International Domain Names in Applications. + +use core::{fmt, str}; + +use super::Label; + +impl Label { + /// Whether this could be an A-label. + /// + /// An A-label is defined by [RFC 5890, section 2.3.2.1] to be the ASCII + /// encoding of an IDNA-valid label. This method tests that the current + /// label begins with the ACE (ASCII-Compatible Encoding) prefix `xn--`, + /// like all A-labels. It does not perform the more expensive validation + /// that the label can be decoded into a Unicode string. + /// + /// [RFC 5890, section 2.3.2.1]: https://datatracker.ietf.org/doc/html/rfc5890#section-2.3.2.1 + pub fn has_ace_prefix(&self) -> bool { + self.len() >= 4 && self.as_bytes()[..4].eq_ignore_ascii_case(b"xn--") + } + + /// Decode this label into a Unicode string. + /// + /// If this is an A-label, the Punycode algorithm is applied to decode the + /// ASCII label contents into Unicode characters. If this is an NR-LDH + /// label, it is copied into the output verbatim. + pub fn to_unicode( + &self, + mut w: impl fmt::Write, + ) -> Result<(), DecodeError> { + // If this is an NR-LDH label, write it out and stop. + if self.is_nr_ldh() { + // SAFETY: The label consists of ASCII letters, digits, and + // hyphens, which never compose invalid UTF-8 strings. + w.write_str(unsafe { + str::from_utf8_unchecked(self.as_bytes()) + })?; + + return Ok(()); + } else if !self.has_ace_prefix() { + // This is an R-LDH or non-LDH label. + return Err(DecodeError::BadLabel); + } + + // This is an implementation of the Punycode algorithm as specified in + // RFC 3492 (https://datatracker.ietf.org/doc/html/rfc3492). A number + // of careful implementation decisions have been made in the interests + // of performance. + + // An A-label consists of at most 63 characters. The first 4 are the + // ACE prefix, 'xn--'. Assuming there are no ASCII characters to copy + // to the output, there are 59 encoded characters. If each character + // results in the output of a Unicode character, there are 59 Unicode + // characters (each at most 4 bytes) produced. Thus, we only have to + // contend with 59 characters at any time. + + let mut input = &self.as_bytes()[4..]; + + // TODO: I believe there is a linear algorithm for sorting output + // characters based on their positions. For now, however, a simple + // quadratic-time solution is used. + + // The decoder specifies where characters must be inserted in the + // output string. Inserting them immediately would yield quadratic + // runtime as characters following the insertion point would have to + // be copied every time. Instead, we maintain an array of character + // and insertion position; after decoding, this array is evaluated in + // reverse to determine the correct insertion points in linear time. + + let mut output_chars = ['\0'; 59]; + let mut output_indices = [0u8; 59]; + let mut output_len = 0; + + // Copy over any ASCII characters directly into the output. + + if let Some(num_ascii) = input.iter().rposition(|&b| b == b'-') { + for i in 0..num_ascii { + output_chars[i] = input[i] as char; + output_indices[i] = i as u8; + } + output_len += num_ascii; + input = &input[num_ascii + 1..]; + } + + if input.is_empty() { + // The ACE prefix shouldn't be used if there are no non-ASCII + // characters in the label. + return Err(DecodeError::BadLabel); + } + + // Determine the "digit-value" for every remaining character. + + let mut input_values = [0u8; 59]; + + for (i, &b) in input.iter().enumerate() { + // 'A'..'Z' => 0..25 + // 'a'..'z' => 0..25 + // '0'..'9' => 26..35 + if b.is_ascii_uppercase() { + input_values[i] = b - b'A'; + } else if b.is_ascii_lowercase() { + input_values[i] = b - b'a'; + } else if b.is_ascii_digit() { + input_values[i] = b - b'0' + 26; + } else { + return Err(DecodeError::BadLabel); + } + } + + let mut input = &input_values[..input.len()]; + + // Begin decoding Unicode characters. + + let mut n = 128u32; + let mut i = 0u32; + let mut bias = 72; + let mut first = true; + while !input.is_empty() { + // Find the end of the current variable-width integer. + let end = input + .iter() + .enumerate() + .position(|(k, &b)| { + let t = ((k + 1) * 36).saturating_sub(bias).clamp(1, 26); + b < t as u8 + }) + .ok_or(DecodeError::BadLabel)?; + input = &input[end + 1..]; + + // Compute the variable-width integer. + let int = input[..end] + .iter() + .enumerate() + .map(|(k, &v)| { + let t = ((k + 1) * 36).saturating_sub(bias).clamp(1, 26); + (t as u8, v) + }) + .try_rfold(input[end] as u32, |int, (t, v)| { + int.checked_mul(36 - t as u32)?.checked_add(v as u32) + }) + .ok_or(DecodeError::BadLabel)?; + + // Update the bias value. + bias = punycode_adapt(int, output_len as u8 + 1, first) as usize; + i = i.checked_add(int).ok_or(DecodeError::BadLabel)?; + + // Save the decoded position-character pair. + n += i / (output_len as u32 + 1); + i %= output_len as u32 + 1; + output_chars[output_len] = + char::try_from(n).map_err(|_| DecodeError::BadLabel)?; + output_indices[output_len] = i as u8; + output_len += 1; + + // Prepare for the next iteration. + first = false; + i += 1; + } + + // Build up the output string. + todo!() + } +} + +/// Adjust the Punycode transcoding bias. +fn punycode_adapt(mut delta: u32, length: u8, first: bool) -> u32 { + delta /= if first { 700 } else { 2 }; + delta += delta / length as u32; + let mut k = 0; + while delta > 455 { + delta /= 35; + k += 1; + } + k + (36 * delta) / (38 + delta) +} + +/// A decoding error. +pub enum DecodeError { + /// The label was not an NR-LDH label or an A-label. + /// + /// The label may have been: + /// - An R-LDH label (the decoding process is unknown). + /// - A non-LDH label (all ASCII, but not in the preferred name syntax). + /// - A non-ASCII label. + BadLabel, + + /// The output stream could not be written to. + Fmt(fmt::Error), +} + +impl From for DecodeError { + fn from(value: fmt::Error) -> Self { + Self::Fmt(value) + } +} diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 9997154d1..76343cd3c 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -92,14 +92,40 @@ impl Label { pub const fn as_bytes(&self) -> &[u8] { &self.0 } +} - /// Whether this is an internationalized label. +impl Label { + /// Whether this is an LDH label. + /// + /// LDH ("letter-digit-hyphen") labels consist exclusively of ASCII letter + /// (A-Z, a-z), digit (0-9), and hyphen (-) characters, where labels begin + /// and end with non-hyphen characters. + /// + /// See [RFC 5890, section 2.3.1]. This is also known as the "preferred + /// name syntax" of [RFC 1034, section 3.5]. + /// + /// [RFC 5890, section 2.3.1]: https://datatracker.ietf.org/doc/html/rfc5890#section-2.3.1 + /// [RFC 1034, section 3.5]: https://datatracker.ietf.org/doc/html/rfc1034#section-3.5 + pub fn is_ldh(&self) -> bool { + self.as_bytes() + .iter() + .all(|&b| b.is_ascii_alphanumeric() || b == b'-') + && !self.as_bytes().starts_with(b"-") + && !self.as_bytes().ends_with(b"-") + } + + /// Whether this is an NR-LDH label. + /// + /// A "non-reserved" LDH label is slightly stricter than an LDH label (see + /// [`is_ldh()`]); it further does not allow the third and fourth + /// characters to both be hyphens. A-labels (Unicode labels encoded into + /// ASCII) are not NR-LDH labels as they begin with `xn--`. + /// + /// See [RFC 5890, section 2.3.1]. /// - /// If the label begins with the ACE (ASCII Compatible Encoding) prefix - /// `xn--`, it is assumed to be a Unicode string encoded into ASCII using - /// the Nameprep and Punycode algorithms. - pub fn is_internationalized(&self) -> bool { - self.as_bytes().starts_with(b"xn--") + /// [RFC 5890, section 2.3.1]: https://datatracker.ietf.org/doc/html/rfc5890#section-2.3.1 + pub fn is_nr_ldh(&self) -> bool { + self.is_ldh() && self.as_bytes().get(2..4) != Some(b"--") } } diff --git a/src/base/new_name/mod.rs b/src/base/new_name/mod.rs index c67e440f8..b91527f9c 100644 --- a/src/base/new_name/mod.rs +++ b/src/base/new_name/mod.rs @@ -17,6 +17,8 @@ pub use uncertain::{OwnedUncertainName, UncertainName, UncertainNameError}; mod label; pub use label::{Label, LabelError, OwnedLabel}; +mod idna; + mod labels; pub use labels::Labels; From f2761fb8222ce19d0306db08b178440242834162 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Tue, 29 Oct 2024 10:18:15 +0100 Subject: [PATCH 18/21] [idna] add tests for A-label decoding A basic quadratic-time output builder has been implemented. At the moment, no further validation of U-labels is performed; we need a way to represent U-labels (even owned ones) and to validate and encode them from there. --- src/base/new_name/idna.rs | 78 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/src/base/new_name/idna.rs b/src/base/new_name/idna.rs index 0f11c7c20..830e13d2d 100644 --- a/src/base/new_name/idna.rs +++ b/src/base/new_name/idna.rs @@ -124,7 +124,6 @@ impl Label { b < t as u8 }) .ok_or(DecodeError::BadLabel)?; - input = &input[end + 1..]; // Compute the variable-width integer. let int = input[..end] @@ -152,12 +151,25 @@ impl Label { output_len += 1; // Prepare for the next iteration. + input = &input[end + 1..]; first = false; i += 1; } - // Build up the output string. - todo!() + let mut output = ['\0'; 59]; + for i in 0..output_len { + let o = output_indices[i] as usize; + output.copy_within(o..i, o + 1); + output[o] = output_chars[i]; + } + + // TODO: Verify the properties of this U-label. + + for i in 0..output_len { + w.write_char(output[i])?; + } + + Ok(()) } } @@ -168,12 +180,13 @@ fn punycode_adapt(mut delta: u32, length: u8, first: bool) -> u32 { let mut k = 0; while delta > 455 { delta /= 35; - k += 1; + k += 36; } k + (36 * delta) / (38 + delta) } /// A decoding error. +#[derive(Clone, Debug)] pub enum DecodeError { /// The label was not an NR-LDH label or an A-label. /// @@ -192,3 +205,60 @@ impl From for DecodeError { Self::Fmt(value) } } + +#[cfg(test)] +mod tests { + use std::string::String; + + use crate::base::new_name::Label; + + #[test] + fn rfc3492_samples() { + const A_LABELS: &[&[u8]] = &[ + b"xn--egbpdaj6bu4bxfgehfvwxn", + b"xn--ihqwcrb4cv8a8dqg056pqjye", + b"xn--ihqwctvzc91f659drss3x8bo0yb", + b"xn--Proprostnemluvesky-uyb24dma41a", + b"xn--4dbcagdahymbxekheh6e0a7fei0b", + b"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", + b"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", + b"xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l", + b"xn--PorqunopuedensimplementehablarenEspaol-fmd56a", + b"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", + b"xn--3B-ww4c5e180e575a65lsy2b", + b"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", + b"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b", + b"xn--2-u9tlzr9756bt3uc0v", + b"xn--MajiKoi5-783gue6qz075azm5e", + b"xn--de-jg4avhby1noc0d", + b"xn--d9juau41awczczp", + ]; + + const U_LABELS: &[&str] = &[ + "\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}", + "\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}", + "\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}", + "\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\u{0065}\u{0073}\u{006B}\u{0079}", + "\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\u{05D1}\u{05E8}\u{05D9}\u{05EA}", + "\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\u{0939}\u{0948}\u{0902}", + "\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}", + "\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\u{0438}", + "\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\u{0061}\u{00F1}\u{006F}\u{006C}", + "\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\u{0056}\u{0069}\u{1EC7}\u{0074}", + "\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}", + "\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}", + "\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}", + "\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}", + "\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\u{308B}\u{0035}\u{79D2}\u{524D}", + "\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}", + "\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}", + ]; + + for (&a, &u) in core::iter::zip(A_LABELS, U_LABELS) { + let a_label = Label::from_bytes(a).unwrap(); + let mut u_label = String::new(); + a_label.to_unicode(&mut u_label).unwrap(); + assert_eq!(&u_label, u); + } + } +} From ab65fd0efd5d949d4bf8b14ddc76a71197e2a2c7 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Tue, 29 Oct 2024 11:04:04 +0100 Subject: [PATCH 19/21] fix clippy warnings --- src/base/new_name/idna.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/base/new_name/idna.rs b/src/base/new_name/idna.rs index 830e13d2d..469c7744a 100644 --- a/src/base/new_name/idna.rs +++ b/src/base/new_name/idna.rs @@ -165,11 +165,10 @@ impl Label { // TODO: Verify the properties of this U-label. - for i in 0..output_len { - w.write_char(output[i])?; - } - - Ok(()) + output[..output_len] + .iter() + .try_for_each(|&c| w.write_char(c)) + .map_err(DecodeError::Fmt) } } From 94371d1dbcc7ffe09f114a85aba671c4601ab8d9 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 31 Oct 2024 10:35:11 +0100 Subject: [PATCH 20/21] [new_name/idna] Note 'std' requirement for tests --- src/base/new_name/idna.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/new_name/idna.rs b/src/base/new_name/idna.rs index 469c7744a..cc6c2abb1 100644 --- a/src/base/new_name/idna.rs +++ b/src/base/new_name/idna.rs @@ -205,7 +205,7 @@ impl From for DecodeError { } } -#[cfg(test)] +#[cfg(all(feature = "std", test))] mod tests { use std::string::String; From d25e482359d419724dd24d7c331553709fb64a68 Mon Sep 17 00:00:00 2001 From: arya dradjica Date: Thu, 31 Oct 2024 16:02:04 +0100 Subject: [PATCH 21/21] [new_name] Replace 'Owned' with manual '*Buf' types I wasn't comfortable with the 'Owned' paradigm, particularly due to the generic buffer parameter. It's easier to work with an explicit buffer type, and in some cases it may even have useful additional methods. --- src/base/new_name/absolute.rs | 193 +++++++++++++++++++++-------- src/base/new_name/label.rs | 141 +++++++++++++++++---- src/base/new_name/mod.rs | 11 +- src/base/new_name/octets.rs | 216 -------------------------------- src/base/new_name/relative.rs | 198 ++++++++++++++++++++++-------- src/base/new_name/uncertain.rs | 218 ++++++++++++++++++++++++--------- 6 files changed, 571 insertions(+), 406 deletions(-) delete mode 100644 src/base/new_name/octets.rs diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs index 2bad779f7..f861f2eb5 100644 --- a/src/base/new_name/absolute.rs +++ b/src/base/new_name/absolute.rs @@ -1,10 +1,12 @@ use core::{ + borrow::{Borrow, BorrowMut}, cmp, fmt, hash::{Hash, Hasher}, iter, + ops::{Deref, DerefMut}, }; -use super::{Label, Labels, Octets, Owned, RelName, SmallOctets}; +use super::{Label, Labels, RelName}; /// An absolute domain name. #[repr(transparent)] @@ -24,18 +26,32 @@ impl Name { /// /// # Safety /// - /// The byte string must be correctly encoded in the wire format, and within - /// the size restriction (255 bytes or fewer). It must be absolute. + /// The byte string must be correctly encoded in the wire format, and + /// within the size restriction (255 bytes or fewer). It must be + /// absolute. pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so // casting a '[u8]' into a 'Name' is sound. core::mem::transmute(bytes) } + /// Assume a mutable byte string is a valid [`Name`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and + /// within the size restriction (255 bytes or fewer). It must be + /// absolute. + pub unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Name' is sound. + core::mem::transmute(bytes) + } + /// Try converting a byte string into a [`Name`]. /// - /// The byte string is confirmed to be correctly encoded in the wire format. - /// If it is not properly encoded, an error is returned. + /// The byte string is confirmed to be correctly encoded in the wire + /// format. If it is not properly encoded, an error is returned. /// /// Runtime: `O(bytes.len())`. pub fn from_bytes(bytes: &[u8]) -> Result<&Self, NameError> { @@ -157,8 +173,8 @@ impl Name { impl Name { /// Split this name into a label and the rest. /// - /// If this is the root name, [`None`] is returned. The returned label will - /// always be non-empty. + /// If this is the root name, [`None`] is returned. The returned label + /// will always be non-empty. /// /// Runtime: `O(1)`. pub fn split_first(&self) -> Option<(&Label, &Self)> { @@ -180,8 +196,8 @@ impl Name { /// Strip a prefix from this name. /// /// If this name has the given prefix (see [`Self::starts_with()`]), the - /// rest of the name without the prefix is returned. Otherwise, [`None`] is - /// returned. + /// rest of the name without the prefix is returned. Otherwise, [`None`] + /// is returned. /// /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { @@ -199,9 +215,9 @@ impl Name { /// Strip a suffix from this name. /// - /// If this name has the given suffix (see [`Self::ends_with()`]), the rest - /// of the name without the suffix is returned. Otherwise, [`None`] is - /// returned. + /// If this name has the given suffix (see [`Self::ends_with()`]), the + /// rest of the name without the suffix is returned. Otherwise, [`None`] + /// is returned. /// /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { @@ -229,26 +245,11 @@ impl Name { } } -unsafe impl Octets for Name { - unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { - Self::from_bytes_unchecked(bytes) - } - - fn as_bytes(&self) -> &[u8] { - self.as_bytes() - } -} - -unsafe impl SmallOctets for Name where - Buffer: AsRef<[u8; 256]> + AsRef<[u8]> -{ -} - impl PartialEq for Name { /// Compare labels by their canonical value. /// - /// Canonicalized labels have uppercase ASCII characters lowercased, so this - /// function compares the two names ASCII-case-insensitively. + /// Canonicalized labels have uppercase ASCII characters lowercased, so + /// this function compares the two names case-insensitively. /// // Runtime: `O(self.len())`, which is equal to `O(that.len())`. fn eq(&self, that: &Self) -> bool { @@ -265,7 +266,7 @@ impl PartialOrd for Name { /// /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. /// Essentially, any shared suffix of labels is stripped away, and the - /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// remaining unequal label at the end is compared case-insensitively. /// /// Runtime: `O(self.len() + that.len())`. fn partial_cmp(&self, that: &Self) -> Option { @@ -278,15 +279,16 @@ impl Ord for Name { /// /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. /// Essentially, any shared suffix of labels is stripped away, and the - /// remaining unequal label at the end is compared ASCII-case-insensitively. + /// remaining unequal label at the end is compared case-insensitively. /// /// Runtime: `O(self.len() + that.len())`. fn cmp(&self, that: &Self) -> cmp::Ordering { - // We want to find a shared suffix between the two names, and the labels - // immediately before that shared suffix. However, we can't determine - // label boundaries when working backward. So, we find a shared suffix - // (even if it crosses partially between labels), then iterate through - // both names until we find their label boundaries up to the suffix. + // We want to find a shared suffix between the two names, and the + // labels immediately before that shared suffix. However, we can't + // determine label boundaries when working backward. So, we find a + // shared suffix (even if it crosses partially between labels), then + // iterate through both names until we find their label boundaries up + // to the suffix. let this_iter = self.as_bytes().iter().rev(); let that_iter = that.as_bytes().iter().rev(); @@ -298,8 +300,8 @@ impl Ord for Name { // of equal size within the shared suffix we found. // SAFETY: At least one unequal byte exists in both names, and it - // cannot be the root label, so there must be at least one non-root - // label in both names. + // cannot be the root label, so there must be at least one + // non-root label in both names. let (mut this_head, mut this_tail) = unsafe { self.split_first().unwrap_unchecked() }; let (mut that_head, mut that_tail) = @@ -309,9 +311,9 @@ impl Ord for Name { let (this_len, that_len) = (this_tail.len(), that_tail.len()); if this_len == that_len && this_len < suffix { - // We have found the shared suffix of labels. Now, we must - // have two unequal head labels; we compare them (ASCII case - // insensitively). + // We have found the shared suffix of labels. Now, we + // must have two unequal head labels; we compare them + // (ASCII case insensitively). break Ord::cmp(this_head, that_head); } @@ -331,9 +333,9 @@ impl Ord for Name { } } } else { - // The shorter name is a suffix of the longer one. If the names are - // of equal length, they are equal; otherwise, the longer one has - // more labels, and is greater than the shorter one. + // The shorter name is a suffix of the longer one. If the names + // are of equal length, they are equal; otherwise, the longer one + // has more labels, and is greater than the shorter one. Ord::cmp(&self.len(), &that.len()) } } @@ -345,15 +347,16 @@ impl Hash for Name { /// The hasher is provided with the labels in this name with ASCII /// characters lowercased. Each label is preceded by its length as `u8`. /// - /// The same scheme is used by [`RelName`] and [`Label`], so a tuple of any - /// of these types will have the same hash as the concatenation of the + /// The same scheme is used by [`RelName`] and [`Label`], so a tuple of + /// any of these types will have the same hash as the concatenation of the /// labels. /// /// Runtime: `O(self.len())`. fn hash(&self, state: &mut H) { - // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' since - // they are always less than 64. As such, we don't need to iterate over - // the labels manually; we can just give them to the hasher as-is. + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' + // since they are always less than 64. As such, we don't need to + // iterate over the labels manually; we can just give them to the + // hasher as-is. // The default 'std' hasher actually buffers 8 bytes of input before // processing them. There's no point trying to chunk the input here. @@ -394,8 +397,96 @@ impl<'a> IntoIterator for &'a Name { } } -/// An owned [`Name`]. -pub type OwnedName = Owned<[u8; 256], Name>; +/// A [`Name`] in a 256-byte buffer. +/// +/// This is a simple wrapper around a 256-byte buffer that stores a [`Name`] +/// within it. It can be used in situations where a [`Name`] must be placed +/// on the stack or within a `struct`, although it is also possible to store +/// [`Name`]s on the heap as `Box` or `Rc`. +#[derive(Clone)] +#[repr(transparent)] +pub struct NameBuf([u8; 256]); + +impl NameBuf { + /// Copy the given name. + pub fn copy(name: &Name) -> Self { + let mut buf = [0u8; 256]; + buf[1..1 + name.len()].copy_from_slice(name.as_bytes()); + buf[0] = name.len() as u8; + Self(buf) + } + + /// Overwrite this by copying in a different name. + /// + /// Any name contained in this buffer previously will be overwritten. + pub fn replace_with(&mut self, name: &Name) { + self.0[1..1 + name.len()].copy_from_slice(name.as_bytes()); + self.0[0] = name.len() as u8; + } +} + +impl NameBuf { + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0[0] as usize + } + + /// The wire format representation of the name. + pub fn as_bytes(&self) -> &[u8] { + &self.0[1..1 + self.len()] + } +} + +impl Deref for NameBuf { + type Target = Name; + + fn deref(&self) -> &Self::Target { + // SAFETY: 'NameBuf' always contains a valid name. + let len = self.len(); + let bytes = &self.0[1..1 + len]; + unsafe { Name::from_bytes_unchecked(bytes) } + } +} + +impl DerefMut for NameBuf { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: 'NameBuf' always contains a valid name. + let len = self.len(); + let bytes = &mut self.0[1..1 + len]; + unsafe { Name::from_bytes_unchecked_mut(bytes) } + } +} + +impl Borrow for NameBuf { + fn borrow(&self) -> &Name { + self + } +} + +impl BorrowMut for NameBuf { + fn borrow_mut(&mut self) -> &mut Name { + self + } +} + +impl AsRef for NameBuf { + fn as_ref(&self) -> &Name { + self + } +} + +impl AsMut for NameBuf { + fn as_mut(&mut self) -> &mut Name { + self + } +} + +impl From<&Name> for NameBuf { + fn from(value: &Name) -> Self { + Self::copy(value) + } +} /// An error in constructing a [`Name`]. #[derive(Clone, Debug)] diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs index 76343cd3c..e4dc17374 100644 --- a/src/base/new_name/label.rs +++ b/src/base/new_name/label.rs @@ -1,10 +1,12 @@ use core::{ + borrow::{Borrow, BorrowMut}, cmp, fmt, hash::{Hash, Hasher}, iter, + ops::{Deref, DerefMut}, }; -use super::{Octets, Owned, SmallOctets}; +use super::UncertainName; /// A label in a domain name. #[repr(transparent)] @@ -28,13 +30,26 @@ impl Label { /// /// # Safety /// - /// The byte string must be within the size restriction (63 bytes or fewer). + /// The byte string must be within the size restriction (63 bytes or + /// fewer). pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so // casting a '[u8]' into a 'Label' is sound. core::mem::transmute(bytes) } + /// Assume a mutable byte string is a valid [`Label`]. + /// + /// # Safety + /// + /// The byte string must be within the size restriction (63 bytes or + /// fewer). + pub unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Label' is sound. + core::mem::transmute(bytes) + } + /// Try converting a byte string into a [`Label`]. /// /// If the byte string is too long, an error is returned. @@ -117,7 +132,7 @@ impl Label { /// Whether this is an NR-LDH label. /// /// A "non-reserved" LDH label is slightly stricter than an LDH label (see - /// [`is_ldh()`]); it further does not allow the third and fourth + /// [`Self::is_ldh()`]); it further does not allow the third and fourth /// characters to both be hyphens. A-labels (Unicode labels encoded into /// ASCII) are not NR-LDH labels as they begin with `xn--`. /// @@ -140,21 +155,6 @@ impl Label { } } -unsafe impl Octets for Label { - unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { - Label::from_bytes_unchecked(bytes) - } - - fn as_bytes(&self) -> &[u8] { - self.as_bytes() - } -} - -unsafe impl SmallOctets for Label where - Buffer: AsRef<[u8; 64]> + AsRef<[u8]> -{ -} - impl PartialEq for Label { /// Compare labels by their canonical value. /// @@ -219,10 +219,9 @@ impl Hash for Label { // The default 'std' hasher actually buffers 8 bytes of input before // processing them. There's no point trying to chunk the input here. - self.as_bytes() - .iter() - .map(|&b| b.to_ascii_lowercase()) - .for_each(|b| state.write_u8(b)); + for &b in self.as_bytes() { + state.write_u8(b.to_ascii_lowercase()); + } } } @@ -247,8 +246,102 @@ impl<'a> From<&'a Label> for &'a [u8] { } } -/// An owned label. -pub type OwnedLabel = Owned<[u8; 64], Label>; +/// A [`Label`] in a 64-byte buffer. +/// +/// This is a simple wrapper around a 64-byte buffer that stores a [`Label`] +/// within it. It can be used in situations where a [`Label`] must be placed +/// on the stack or within a `struct`, although it is also possible to store +/// [`Label`]s on the heap as `Box