diff --git a/src/base/mod.rs b/src/base/mod.rs index f242ad058..b29cd58d5 100644 --- a/src/base/mod.rs +++ b/src/base/mod.rs @@ -115,6 +115,7 @@ pub mod message; pub mod message_builder; pub mod name; pub mod net; +pub mod new_name; pub mod opt; pub mod question; pub mod rdata; diff --git a/src/base/new_name/absolute.rs b/src/base/new_name/absolute.rs new file mode 100644 index 000000000..f861f2eb5 --- /dev/null +++ b/src/base/new_name/absolute.rs @@ -0,0 +1,502 @@ +use core::{ + borrow::{Borrow, BorrowMut}, + cmp, fmt, + hash::{Hash, Hasher}, + iter, + ops::{Deref, DerefMut}, +}; + +use super::{Label, Labels, RelName}; + +/// An absolute domain name. +#[repr(transparent)] +pub struct Name([u8]); + +impl Name { + /// The maximum size of an absolute domain name in the wire format. + pub const MAX_SIZE: usize = 255; + + /// The root name. + pub const ROOT: &'static Self = + unsafe { Self::from_bytes_unchecked(&[0u8]) }; +} + +impl Name { + /// Assume a byte string is a valid [`Name`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and + /// within the size restriction (255 bytes or fewer). It must be + /// absolute. + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Name' is sound. + core::mem::transmute(bytes) + } + + /// Assume a mutable byte string is a valid [`Name`]. + /// + /// # Safety + /// + /// The byte string must be correctly encoded in the wire format, and + /// within the size restriction (255 bytes or fewer). It must be + /// absolute. + pub unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // SAFETY: 'Name' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Name' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`Name`]. + /// + /// The byte string is confirmed to be correctly encoded in the wire + /// format. If it is not properly encoded, an error is returned. + /// + /// Runtime: `O(bytes.len())`. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, NameError> { + // Without the last byte, this should be a relative name. + let (root, rel_name) = bytes.split_last().ok_or(NameError)?; + + if RelName::from_bytes(rel_name).is_err() { + return Err(NameError); + } else if *root != 0u8 { + // The last byte must be a root label. + return Err(NameError); + } + + // SAFETY: 'bytes' has been confirmed to be correctly encoded. + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } +} + +impl Name { + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.len() == 1 + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// The parent of this name, if any. + /// + /// The name containing all but the first label is returned. If this is a + /// root name, [`None`] is returned. + /// + /// Runtime: `O(1)`. + pub fn parent(&self) -> Option<&Self> { + if self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let bytes = &bytes[1 + bytes[0] as usize..]; + + // SAFETY: 'bytes' is 253 bytes or smaller and has valid labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// The whole name without the root label. + /// + /// If this is a root name, an empty relative name is returned. + /// + /// This is equivalent to `self.strip_suffix(Name::ROOT).unwrap()`. + /// + /// Runtime: `O(1)`. + pub fn without_root(&self) -> &RelName { + let bytes = &self.as_bytes()[..self.len() - 1]; + // SAFETY: A slice of labels (as from 'self') is a relative name. + unsafe { RelName::from_bytes_unchecked(bytes) } + } + + /// The labels in this name. + /// + /// The root label is included in the iterator. + /// + /// Runtime: `O(1)`. Each step of the iterator has runtime `O(1)` too. + pub const fn labels(&self) -> Labels<'_> { + // SAFETY: This is a valid absolute name. + unsafe { Labels::from_bytes_unchecked(self.as_bytes()) } + } + + /// Whether this name starts with a particular relative name. + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. + pub fn starts_with(&self, prefix: &RelName) -> bool { + if self.len() < prefix.len() { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[..prefix.len()] + .eq_ignore_ascii_case(prefix.as_bytes()) + } + + /// Whether this name ends with a particular absolute name. + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. + pub fn ends_with(&self, suffix: &Self) -> bool { + if self.len() < suffix.len() { + return false; + } + + // We want to compare the last bytes of the current name to the given + // candidate. To do so, we need to ensure that those last bytes start + // at a valid label boundary. + + let mut index = 0usize; + let offset = self.len() - suffix.len(); + while index < offset { + index += 1 + self.0[index] as usize; + } + + if index != offset { + return false; + } + + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the byte strings as ASCII. + self.as_bytes()[offset..].eq_ignore_ascii_case(suffix.as_bytes()) + } +} + +impl Name { + /// Split this name into a label and the rest. + /// + /// If this is the root name, [`None`] is returned. The returned label + /// will always be non-empty. + /// + /// Runtime: `O(1)`. + pub fn split_first(&self) -> Option<(&Label, &Self)> { + if self.is_root() { + return None; + } + + let bytes = self.as_bytes(); + let (label, rest) = bytes[1..].split_at(bytes[0] as usize); + + // SAFETY: 'self' only contains valid labels. + let label = unsafe { Label::from_bytes_unchecked(label) }; + // SAFETY: 'rest' is 253 bytes or smaller and has valid labels. + let rest = unsafe { Self::from_bytes_unchecked(rest) }; + + Some((label, rest)) + } + + /// Strip a prefix from this name. + /// + /// If this name has the given prefix (see [`Self::starts_with()`]), the + /// rest of the name without the prefix is returned. Otherwise, [`None`] + /// is returned. + /// + /// Runtime: `O(prefix.len())`, which is less than `O(self.len())`. + pub fn strip_prefix<'a>(&'a self, prefix: &RelName) -> Option<&'a Self> { + if self.starts_with(prefix) { + let bytes = &self.as_bytes()[prefix.len()..]; + + // SAFETY: 'self' and 'prefix' consist of whole labels, and 'self' + // start with the same labels as 'prefix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Strip a suffix from this name. + /// + /// If this name has the given suffix (see [`Self::ends_with()`]), the + /// rest of the name without the suffix is returned. Otherwise, [`None`] + /// is returned. + /// + /// Runtime: `O(self.len())`, which is more than `O(suffix.len())`. + pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { + if self.ends_with(suffix) { + let bytes = &self.as_bytes()[..self.len() - suffix.len()]; + + // SAFETY: 'self' and 'suffix' consist of whole labels, and 'self' + // ended with the same labels as 'suffix'; removing those labels + // still leaves 'self' with whole labels. + Some(unsafe { Self::from_bytes_unchecked(bytes) }) + } else { + None + } + } + + /// Canonicalize this domain name. + /// + /// All uppercase ASCII characters in the name will be lowercased. + /// + /// Runtime: `O(self.len())`. + pub fn canonicalize(&mut self) { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for Name { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so + /// this function compares the two names case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. + fn eq(&self, that: &Self) -> bool { + // Label lengths are never ASCII characters, because they start from + // byte value 65. So we can treat the entire byte string as ASCII. + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for Name {} + +impl PartialOrd for Name { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared case-insensitively. + /// + /// Runtime: `O(self.len() + that.len())`. + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for Name { + /// Compare names according to the canonical ordering. + /// + /// The 'canonical DNS name order' is defined in RFC 4034, section 6.1. + /// Essentially, any shared suffix of labels is stripped away, and the + /// remaining unequal label at the end is compared case-insensitively. + /// + /// Runtime: `O(self.len() + that.len())`. + fn cmp(&self, that: &Self) -> cmp::Ordering { + // We want to find a shared suffix between the two names, and the + // labels immediately before that shared suffix. However, we can't + // determine label boundaries when working backward. So, we find a + // shared suffix (even if it crosses partially between labels), then + // iterate through both names until we find their label boundaries up + // to the suffix. + + let this_iter = self.as_bytes().iter().rev(); + let that_iter = that.as_bytes().iter().rev(); + let suffix = iter::zip(this_iter, that_iter) + .position(|(l, r)| !l.eq_ignore_ascii_case(r)); + + if let Some(suffix) = suffix { + // Iterate through the labels in both names until both have a tail + // of equal size within the shared suffix we found. + + // SAFETY: At least one unequal byte exists in both names, and it + // cannot be the root label, so there must be at least one + // non-root label in both names. + let (mut this_head, mut this_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + let (mut that_head, mut that_tail) = + unsafe { self.split_first().unwrap_unchecked() }; + + loop { + let (this_len, that_len) = (this_tail.len(), that_tail.len()); + + if this_len == that_len && this_len < suffix { + // We have found the shared suffix of labels. Now, we + // must have two unequal head labels; we compare them + // (ASCII case insensitively). + break Ord::cmp(this_head, that_head); + } + + // If one tail is longer than the other, it will be shortened. + // Any tail longer than the suffix will also be shortened. + + if this_len > that_len || this_len > suffix { + // SAFETY: 'this_tail' has strictly more than one byte. + (this_head, this_tail) = + unsafe { this_tail.split_first().unwrap_unchecked() }; + } + + if that_len > this_len || that_len > suffix { + // SAFETY: 'that_tail' has strictly more than one byte. + (that_head, that_tail) = + unsafe { that_tail.split_first().unwrap_unchecked() }; + } + } + } else { + // The shorter name is a suffix of the longer one. If the names + // are of equal length, they are equal; otherwise, the longer one + // has more labels, and is greater than the shorter one. + Ord::cmp(&self.len(), &that.len()) + } + } +} + +impl Hash for Name { + /// Hash this label by its canonical value. + /// + /// The hasher is provided with the labels in this name with ASCII + /// characters lowercased. Each label is preceded by its length as `u8`. + /// + /// The same scheme is used by [`RelName`] and [`Label`], so a tuple of + /// any of these types will have the same hash as the concatenation of the + /// labels. + /// + /// Runtime: `O(self.len())`. + fn hash(&self, state: &mut H) { + // NOTE: Label lengths are not affected by 'to_ascii_lowercase()' + // since they are always less than 64. As such, we don't need to + // iterate over the labels manually; we can just give them to the + // hasher as-is. + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + self.as_bytes() + .iter() + .map(|&b| b.to_ascii_lowercase()) + .for_each(|b| state.write_u8(b)); + } +} + +impl AsRef<[u8]> for Name { + /// The bytes in the name in the wire format. + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl<'a> TryFrom<&'a [u8]> for &'a Name { + type Error = NameError; + + fn try_from(bytes: &'a [u8]) -> Result { + Name::from_bytes(bytes) + } +} + +impl<'a> From<&'a Name> for &'a [u8] { + fn from(name: &'a Name) -> Self { + name.as_bytes() + } +} + +impl<'a> IntoIterator for &'a Name { + type Item = &'a Label; + type IntoIter = Labels<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.labels() + } +} + +/// A [`Name`] in a 256-byte buffer. +/// +/// This is a simple wrapper around a 256-byte buffer that stores a [`Name`] +/// within it. It can be used in situations where a [`Name`] must be placed +/// on the stack or within a `struct`, although it is also possible to store +/// [`Name`]s on the heap as `Box` or `Rc`. +#[derive(Clone)] +#[repr(transparent)] +pub struct NameBuf([u8; 256]); + +impl NameBuf { + /// Copy the given name. + pub fn copy(name: &Name) -> Self { + let mut buf = [0u8; 256]; + buf[1..1 + name.len()].copy_from_slice(name.as_bytes()); + buf[0] = name.len() as u8; + Self(buf) + } + + /// Overwrite this by copying in a different name. + /// + /// Any name contained in this buffer previously will be overwritten. + pub fn replace_with(&mut self, name: &Name) { + self.0[1..1 + name.len()].copy_from_slice(name.as_bytes()); + self.0[0] = name.len() as u8; + } +} + +impl NameBuf { + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0[0] as usize + } + + /// The wire format representation of the name. + pub fn as_bytes(&self) -> &[u8] { + &self.0[1..1 + self.len()] + } +} + +impl Deref for NameBuf { + type Target = Name; + + fn deref(&self) -> &Self::Target { + // SAFETY: 'NameBuf' always contains a valid name. + let len = self.len(); + let bytes = &self.0[1..1 + len]; + unsafe { Name::from_bytes_unchecked(bytes) } + } +} + +impl DerefMut for NameBuf { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: 'NameBuf' always contains a valid name. + let len = self.len(); + let bytes = &mut self.0[1..1 + len]; + unsafe { Name::from_bytes_unchecked_mut(bytes) } + } +} + +impl Borrow for NameBuf { + fn borrow(&self) -> &Name { + self + } +} + +impl BorrowMut for NameBuf { + fn borrow_mut(&mut self) -> &mut Name { + self + } +} + +impl AsRef for NameBuf { + fn as_ref(&self) -> &Name { + self + } +} + +impl AsMut for NameBuf { + fn as_mut(&mut self) -> &mut Name { + self + } +} + +impl From<&Name> for NameBuf { + fn from(value: &Name) -> Self { + Self::copy(value) + } +} + +/// An error in constructing a [`Name`]. +#[derive(Clone, Debug)] +pub struct NameError; + +impl fmt::Display for NameError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("could not parse an absolute domain name") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for NameError {} diff --git a/src/base/new_name/builder.rs b/src/base/new_name/builder.rs new file mode 100644 index 000000000..61bf6e614 --- /dev/null +++ b/src/base/new_name/builder.rs @@ -0,0 +1,384 @@ +use super::{Label, Name, NameError, RelName}; + +/// An incremental builder for domain names. +/// +/// This type can be used to build up a domain name from a sequence of labels or +/// even simple bytes. It can be used to concatenate or modify domain names. +/// +/// The name is written into a 256-byte buffer. This buffer can be placed on +/// the stack as it will not grow dynamically. By requiring the buffer to have +/// a fixed size, the builder is simpler and faster. +/// +/// # Building Labels +/// +/// The builder may be in the middle of constructing a label. It reaches this +/// state using [`begin_label()`], it must be in it to call [`write_slice()`], +/// and it exits this state using [`end_label()`]. At initialization, no label +/// is being built, so [`begin_label()`] must be called. +/// +/// [`begin_label()`]: Self::begin_label() +/// [`write_slice()`]: Self::write_slice() +/// [`end_label()`]: Self::end_label() +/// +/// Importantly, the root label (which is empty) cannot be ended explicitly. To +/// write a root label, begin a new label by calling [`begin_label()`] and then +/// do not call [`end_label()`], directly going to [`get_name()`]. +/// +/// [`get_name()`]: Self::get_name() +/// +/// When the builder is not in the middle of a label, convenience methods like +/// [`write_label()`], [`write_rel_name()`], and [`write_name()`] are available. +/// +/// [`write_label()`]: Self::write_label() +/// [`write_rel_name()`]: Self::write_rel_name() +/// [`write_name()`]: Self::write_name() +#[derive(Clone, Default)] +pub struct NameBuilder { + /// The offset the next byte will be written to. + /// + /// Invariants: + /// + /// - `write_offset <= 255` + write_offset: u8, + + /// The offset of the current label. + /// + /// This is the position of the length byte of the current label. + /// + /// Invariants: + /// + /// - `label_offset < 255` + /// - `label_offset <= write_offset` + /// - if `label_offset < write_offset`: + /// - `buffer[label_offset] == 0` + /// - `write_offset - label_offset <= 64` + label_offset: u8, + + /// The name being constructed. + buffer: Buffer, +} + +/// # Initialization +impl NameBuilder { + /// Construct a new [`NameBuilder`] over the given buffer. + /// + /// Any existing contents of the buffer will be overwritten by the builder + /// (either upon this function call or later). They should not be relied + /// upon. + #[must_use] + pub const fn new(buffer: Buffer) -> Self { + // TODO: Do we want to zero the buffer already? Check benchmarks. + + Self { + write_offset: 0, + label_offset: 0, + buffer, + } + } +} + +/// # Inspection +impl> NameBuilder { + /// Whether a label is currently being built. + /// + /// This is true in the time between [`begin_label()`] and [`end_label()`]. + /// + /// [`begin_label()`]: Self::begin_label() + /// [`end_label()`]: Self::end_label() + pub fn mid_label(&self) -> bool { + self.label_offset < self.write_offset + } + + /// The label currently being built, if any. + pub fn cur_label(&self) -> Option<&Label> { + if self.label_offset < self.write_offset { + let start = self.label_offset as usize + 1; + let end = self.write_offset as usize; + let label = &self.buffer.as_ref()[start..end]; + // SAFETY: The label is built correctly. + Some(unsafe { Label::from_bytes_unchecked(label) }) + } else { + None + } + } + + /// The length of the name built this far. + /// + /// This does not include the length of any partially-built label. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.label_offset as usize + } + + /// The length of the name built this far. + /// + /// This includes the length of any partially-built label. + pub fn total_len(&self) -> usize { + self.write_offset as usize + } + + /// The name built thus far. + /// + /// This does not include any partially-built label. + pub fn cur_name(&self) -> &RelName { + let name = &self.buffer.as_ref()[..self.label_offset as usize]; + // SAFETY: The name is built correctly. + unsafe { RelName::from_bytes_unchecked(name) } + } +} + +/// # Construction +impl> NameBuilder { + /// Begin a new label. + /// + /// A length byte for the label is allocated in the buffer. It is left + /// uninitialized -- it will be written to once the entire label is ready. + /// + /// # Panics + /// + /// Panics if a label is already being built. + pub fn begin_label(&mut self) { + assert!( + self.label_offset == self.write_offset, + "begin_label() was called before a previous label was ended" + ); + + // In case of the root label, 'end_label()' will not be called; we need + // to set the length byte to 0 right now as we won't have a chance to do + // it later. + self.buffer.as_mut()[self.write_offset as usize] = 0; + + // Since 'label_offset < 255', this will not overflow. + self.write_offset += 1; + } + + /// Write a slice to a label. + /// + /// # Errors + /// + /// Fails if the bytes being written would make the label too long (64 bytes + /// or more) or the entire name too long (256 bytes or more, including the + /// 1-byte root label, which will follow the current bytes). + /// + /// # Panics + /// + /// Panics if no label is being built. + pub fn write_slice(&mut self, data: &[u8]) -> Result<(), OverlongError> { + assert!( + self.label_offset < self.write_offset, + "begin_label() must be called before using write_slice()" + ); + + if self.write_offset as usize + data.len() + >= self.label_offset as usize + 1 + 64 + { + // The label would become 64 bytes or larger. + return Err(OverlongError); + } else if self.write_offset as usize + data.len() + 1 > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError); + } + + let buffer = &mut self.buffer.as_mut()[self.write_offset as usize..]; + buffer[..data.len()].copy_from_slice(data); + self.write_offset += data.len() as u8; + + Ok(()) + } + + /// End a label being built. + /// + /// The length byte for the label will be updated. + /// + /// This must not be called on the root label. + /// + /// # Errors + /// + /// Fails if the label being built is empty. + /// + /// # Panics + /// + /// Panics if no label is being built. + pub fn end_label(&mut self) -> Result<(), EmptyLabelError> { + assert!( + self.label_offset < self.write_offset, + "begin_label() must be called before using end_label()" + ); + + if self.write_offset <= self.label_offset + 1 { + return Err(EmptyLabelError); + } + + let len = self.write_offset - (self.label_offset + 1); + self.buffer.as_mut()[self.label_offset as usize] = len; + self.label_offset = self.write_offset; + + Ok(()) + } +} + +/// # Convenience Methods +impl> NameBuilder { + /// Append a whole label to the name. + /// + /// This will start a new label, write the provided label into the buffer, + /// and complete it immediately. It is convenient to use if a whole label + /// is to be written, rather than being built incrementally. + /// + /// # Errors + /// + /// Fails if the label is empty or if adding the label would make the domain + /// name too large. + /// + /// # Panics + /// + /// Panics if a label was already being built. + pub fn write_label(&mut self, label: &Label) -> Result<(), BuildError> { + assert!( + self.label_offset == self.write_offset, + "write_label() was called before a previous label was ended" + ); + + if self.write_offset as usize + 1 + label.len() + 1 > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError.into()); + } + + let buffer = &mut self.buffer.as_mut()[self.write_offset as usize..]; + buffer[0] = label.len() as u8; + buffer[1..1 + label.len()].copy_from_slice(label.as_bytes()); + self.write_offset += 1 + label.len() as u8; + self.label_offset += 1 + label.len() as u8; + + Ok(()) + } + + /// Append a relative name to the name. + /// + /// # Errors + /// + /// Fails if appending the given name would make the domain name too large. + /// + /// # Panics + /// + /// Panics if a label was already being built. + pub fn write_rel_name( + &mut self, + name: &RelName, + ) -> Result<(), OverlongError> { + assert!( + self.label_offset == self.write_offset, + "write_rel_name() was called before a previous label was ended" + ); + + if self.write_offset as usize + name.len() + 1 > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError); + } + + self.buffer.as_mut()[self.write_offset as usize..][..name.len()] + .copy_from_slice(name.as_bytes()); + self.write_offset += name.len() as u8; + self.label_offset += name.len() as u8; + + Ok(()) + } + + /// Append an absolute name to the name. + /// + /// # Errors + /// + /// Fails if appending the given name would make the domain name too large. + /// + /// # Panics + /// + /// Panics if a label was already being built. + pub fn write_name(&mut self, name: &Name) -> Result<(), OverlongError> { + assert!( + self.label_offset == self.write_offset, + "write_name() was called before a previous label was ended" + ); + + if self.write_offset as usize + name.len() > 255 { + // The domain name would become 256 bytes or larger. + return Err(OverlongError); + } + + self.buffer.as_mut()[self.write_offset as usize..][..name.len()] + .copy_from_slice(name.as_bytes()); + self.write_offset += name.len() as u8; + self.label_offset += name.len() as u8 - 1; + + Ok(()) + } +} + +/// # Extraction +impl> NameBuilder { + /// Extract an absolute domain name. + /// + /// # Errors + /// + /// If a root label is not present, a [`NameError`] is returned. + /// + /// # Panics + /// + /// Panics if a label (except the root label) is still being built. + pub fn get_name(&self) -> Result<&Name, NameError> { + assert!( + self.write_offset <= self.label_offset + 1, + "get_name() was called before a previous label was ended" + ); + + if self.write_offset != self.label_offset + 1 { + return Err(NameError); + } + + let name = &self.buffer.as_ref()[..self.write_offset as usize]; + // SAFETY: The name is built correctly. + Ok(unsafe { Name::from_bytes_unchecked(name) }) + } + + /// Extract a relative domain name. + /// + /// # Panics + /// + /// Panics if a label (including the root label) is still being built. + pub fn get_rel_name(&self) -> &RelName { + assert!( + self.write_offset <= self.label_offset + 1, + "get_rel_name() was called before a previous label was ended" + ); + + let name = &self.buffer.as_ref()[..self.write_offset as usize]; + // SAFETY: The name is built correctly. + unsafe { RelName::from_bytes_unchecked(name) } + } +} + +/// An error in building a domain name. +pub enum BuildError { + /// A domain name or label was too long. + Overlong(OverlongError), + + /// A (non-root) label was empty. + EmptyLabel(EmptyLabelError), +} + +impl From for BuildError { + fn from(value: OverlongError) -> Self { + Self::Overlong(value) + } +} + +impl From for BuildError { + fn from(value: EmptyLabelError) -> Self { + Self::EmptyLabel(value) + } +} + +/// A domain name or label was too long. +pub struct OverlongError; + +/// A (non-root) label was empty. +pub struct EmptyLabelError; diff --git a/src/base/new_name/idna.rs b/src/base/new_name/idna.rs new file mode 100644 index 000000000..cc6c2abb1 --- /dev/null +++ b/src/base/new_name/idna.rs @@ -0,0 +1,263 @@ +//! International Domain Names in Applications. + +use core::{fmt, str}; + +use super::Label; + +impl Label { + /// Whether this could be an A-label. + /// + /// An A-label is defined by [RFC 5890, section 2.3.2.1] to be the ASCII + /// encoding of an IDNA-valid label. This method tests that the current + /// label begins with the ACE (ASCII-Compatible Encoding) prefix `xn--`, + /// like all A-labels. It does not perform the more expensive validation + /// that the label can be decoded into a Unicode string. + /// + /// [RFC 5890, section 2.3.2.1]: https://datatracker.ietf.org/doc/html/rfc5890#section-2.3.2.1 + pub fn has_ace_prefix(&self) -> bool { + self.len() >= 4 && self.as_bytes()[..4].eq_ignore_ascii_case(b"xn--") + } + + /// Decode this label into a Unicode string. + /// + /// If this is an A-label, the Punycode algorithm is applied to decode the + /// ASCII label contents into Unicode characters. If this is an NR-LDH + /// label, it is copied into the output verbatim. + pub fn to_unicode( + &self, + mut w: impl fmt::Write, + ) -> Result<(), DecodeError> { + // If this is an NR-LDH label, write it out and stop. + if self.is_nr_ldh() { + // SAFETY: The label consists of ASCII letters, digits, and + // hyphens, which never compose invalid UTF-8 strings. + w.write_str(unsafe { + str::from_utf8_unchecked(self.as_bytes()) + })?; + + return Ok(()); + } else if !self.has_ace_prefix() { + // This is an R-LDH or non-LDH label. + return Err(DecodeError::BadLabel); + } + + // This is an implementation of the Punycode algorithm as specified in + // RFC 3492 (https://datatracker.ietf.org/doc/html/rfc3492). A number + // of careful implementation decisions have been made in the interests + // of performance. + + // An A-label consists of at most 63 characters. The first 4 are the + // ACE prefix, 'xn--'. Assuming there are no ASCII characters to copy + // to the output, there are 59 encoded characters. If each character + // results in the output of a Unicode character, there are 59 Unicode + // characters (each at most 4 bytes) produced. Thus, we only have to + // contend with 59 characters at any time. + + let mut input = &self.as_bytes()[4..]; + + // TODO: I believe there is a linear algorithm for sorting output + // characters based on their positions. For now, however, a simple + // quadratic-time solution is used. + + // The decoder specifies where characters must be inserted in the + // output string. Inserting them immediately would yield quadratic + // runtime as characters following the insertion point would have to + // be copied every time. Instead, we maintain an array of character + // and insertion position; after decoding, this array is evaluated in + // reverse to determine the correct insertion points in linear time. + + let mut output_chars = ['\0'; 59]; + let mut output_indices = [0u8; 59]; + let mut output_len = 0; + + // Copy over any ASCII characters directly into the output. + + if let Some(num_ascii) = input.iter().rposition(|&b| b == b'-') { + for i in 0..num_ascii { + output_chars[i] = input[i] as char; + output_indices[i] = i as u8; + } + output_len += num_ascii; + input = &input[num_ascii + 1..]; + } + + if input.is_empty() { + // The ACE prefix shouldn't be used if there are no non-ASCII + // characters in the label. + return Err(DecodeError::BadLabel); + } + + // Determine the "digit-value" for every remaining character. + + let mut input_values = [0u8; 59]; + + for (i, &b) in input.iter().enumerate() { + // 'A'..'Z' => 0..25 + // 'a'..'z' => 0..25 + // '0'..'9' => 26..35 + if b.is_ascii_uppercase() { + input_values[i] = b - b'A'; + } else if b.is_ascii_lowercase() { + input_values[i] = b - b'a'; + } else if b.is_ascii_digit() { + input_values[i] = b - b'0' + 26; + } else { + return Err(DecodeError::BadLabel); + } + } + + let mut input = &input_values[..input.len()]; + + // Begin decoding Unicode characters. + + let mut n = 128u32; + let mut i = 0u32; + let mut bias = 72; + let mut first = true; + while !input.is_empty() { + // Find the end of the current variable-width integer. + let end = input + .iter() + .enumerate() + .position(|(k, &b)| { + let t = ((k + 1) * 36).saturating_sub(bias).clamp(1, 26); + b < t as u8 + }) + .ok_or(DecodeError::BadLabel)?; + + // Compute the variable-width integer. + let int = input[..end] + .iter() + .enumerate() + .map(|(k, &v)| { + let t = ((k + 1) * 36).saturating_sub(bias).clamp(1, 26); + (t as u8, v) + }) + .try_rfold(input[end] as u32, |int, (t, v)| { + int.checked_mul(36 - t as u32)?.checked_add(v as u32) + }) + .ok_or(DecodeError::BadLabel)?; + + // Update the bias value. + bias = punycode_adapt(int, output_len as u8 + 1, first) as usize; + i = i.checked_add(int).ok_or(DecodeError::BadLabel)?; + + // Save the decoded position-character pair. + n += i / (output_len as u32 + 1); + i %= output_len as u32 + 1; + output_chars[output_len] = + char::try_from(n).map_err(|_| DecodeError::BadLabel)?; + output_indices[output_len] = i as u8; + output_len += 1; + + // Prepare for the next iteration. + input = &input[end + 1..]; + first = false; + i += 1; + } + + let mut output = ['\0'; 59]; + for i in 0..output_len { + let o = output_indices[i] as usize; + output.copy_within(o..i, o + 1); + output[o] = output_chars[i]; + } + + // TODO: Verify the properties of this U-label. + + output[..output_len] + .iter() + .try_for_each(|&c| w.write_char(c)) + .map_err(DecodeError::Fmt) + } +} + +/// Adjust the Punycode transcoding bias. +fn punycode_adapt(mut delta: u32, length: u8, first: bool) -> u32 { + delta /= if first { 700 } else { 2 }; + delta += delta / length as u32; + let mut k = 0; + while delta > 455 { + delta /= 35; + k += 36; + } + k + (36 * delta) / (38 + delta) +} + +/// A decoding error. +#[derive(Clone, Debug)] +pub enum DecodeError { + /// The label was not an NR-LDH label or an A-label. + /// + /// The label may have been: + /// - An R-LDH label (the decoding process is unknown). + /// - A non-LDH label (all ASCII, but not in the preferred name syntax). + /// - A non-ASCII label. + BadLabel, + + /// The output stream could not be written to. + Fmt(fmt::Error), +} + +impl From for DecodeError { + fn from(value: fmt::Error) -> Self { + Self::Fmt(value) + } +} + +#[cfg(all(feature = "std", test))] +mod tests { + use std::string::String; + + use crate::base::new_name::Label; + + #[test] + fn rfc3492_samples() { + const A_LABELS: &[&[u8]] = &[ + b"xn--egbpdaj6bu4bxfgehfvwxn", + b"xn--ihqwcrb4cv8a8dqg056pqjye", + b"xn--ihqwctvzc91f659drss3x8bo0yb", + b"xn--Proprostnemluvesky-uyb24dma41a", + b"xn--4dbcagdahymbxekheh6e0a7fei0b", + b"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", + b"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", + b"xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l", + b"xn--PorqunopuedensimplementehablarenEspaol-fmd56a", + b"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", + b"xn--3B-ww4c5e180e575a65lsy2b", + b"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", + b"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b", + b"xn--2-u9tlzr9756bt3uc0v", + b"xn--MajiKoi5-783gue6qz075azm5e", + b"xn--de-jg4avhby1noc0d", + b"xn--d9juau41awczczp", + ]; + + const U_LABELS: &[&str] = &[ + "\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}", + "\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}", + "\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}", + "\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\u{0065}\u{0073}\u{006B}\u{0079}", + "\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\u{05D1}\u{05E8}\u{05D9}\u{05EA}", + "\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\u{0939}\u{0948}\u{0902}", + "\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}", + "\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\u{0438}", + "\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\u{0061}\u{00F1}\u{006F}\u{006C}", + "\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\u{0056}\u{0069}\u{1EC7}\u{0074}", + "\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}", + "\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}", + "\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}", + "\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}", + "\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\u{308B}\u{0035}\u{79D2}\u{524D}", + "\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}", + "\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}", + ]; + + for (&a, &u) in core::iter::zip(A_LABELS, U_LABELS) { + let a_label = Label::from_bytes(a).unwrap(); + let mut u_label = String::new(); + a_label.to_unicode(&mut u_label).unwrap(); + assert_eq!(&u_label, u); + } + } +} diff --git a/src/base/new_name/label.rs b/src/base/new_name/label.rs new file mode 100644 index 000000000..e4dc17374 --- /dev/null +++ b/src/base/new_name/label.rs @@ -0,0 +1,357 @@ +use core::{ + borrow::{Borrow, BorrowMut}, + cmp, fmt, + hash::{Hash, Hasher}, + iter, + ops::{Deref, DerefMut}, +}; + +use super::UncertainName; + +/// A label in a domain name. +#[repr(transparent)] +pub struct Label([u8]); + +impl Label { + /// The maximum size of a label in the wire format. + pub const MAX_SIZE: usize = 63; + + /// The root label. + pub const ROOT: &'static Self = + unsafe { Self::from_bytes_unchecked(b"") }; + + /// The wildcard label. + pub const WILDCARD: &'static Self = + unsafe { Self::from_bytes_unchecked(b"*") }; +} + +impl Label { + /// Assume a byte string is a valid [`Label`]. + /// + /// # Safety + /// + /// The byte string must be within the size restriction (63 bytes or + /// fewer). + pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Label' is sound. + core::mem::transmute(bytes) + } + + /// Assume a mutable byte string is a valid [`Label`]. + /// + /// # Safety + /// + /// The byte string must be within the size restriction (63 bytes or + /// fewer). + pub unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Self { + // SAFETY: 'Label' is a 'repr(transparent)' wrapper around '[u8]', so + // casting a '[u8]' into a 'Label' is sound. + core::mem::transmute(bytes) + } + + /// Try converting a byte string into a [`Label`]. + /// + /// If the byte string is too long, an error is returned. + /// + /// Runtime: `O(bytes.len())`. + pub fn from_bytes(bytes: &[u8]) -> Result<&Self, LabelError> { + if bytes.len() > Self::MAX_SIZE { + // The label was too long to be used. + return Err(LabelError); + } + + Ok(unsafe { Self::from_bytes_unchecked(bytes) }) + } + + /// Extract a label from the start of a byte string. + /// + /// A label encoded in the wire format will be extracted from the beginning + /// of the given byte string. If a valid label cannot be extracted, or the + /// byte string is simply empty, an error is returned. The extracted label + /// and the remainder of the byte string are returned. + /// + /// Runtime: `O(1)`. + pub fn split_off(bytes: &[u8]) -> Result<(&Self, &[u8]), LabelError> { + let (&length, bytes) = bytes.split_first().ok_or(LabelError)?; + if length < 64 && bytes.len() >= length as usize { + let (label, bytes) = bytes.split_at(length as usize); + // SAFETY: 'label' is known be to less than 64 bytes in size. + Ok((unsafe { Self::from_bytes_unchecked(label) }, bytes)) + } else { + // Overlong label (or compression pointer). + Err(LabelError) + } + } +} + +impl Label { + /// Whether this is the root label. + pub const fn is_root(&self) -> bool { + self.0.is_empty() + } + + /// Whether this is the wildcard label. + pub const fn is_wildcard(&self) -> bool { + // NOTE: 'self.0 == *b"*"' is not const. + self.0.len() == 1 && self.0[0] == b'*' + } + + /// The size of this name in the wire format. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + /// The wire format representation of the name. + pub const fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl Label { + /// Whether this is an LDH label. + /// + /// LDH ("letter-digit-hyphen") labels consist exclusively of ASCII letter + /// (A-Z, a-z), digit (0-9), and hyphen (-) characters, where labels begin + /// and end with non-hyphen characters. + /// + /// See [RFC 5890, section 2.3.1]. This is also known as the "preferred + /// name syntax" of [RFC 1034, section 3.5]. + /// + /// [RFC 5890, section 2.3.1]: https://datatracker.ietf.org/doc/html/rfc5890#section-2.3.1 + /// [RFC 1034, section 3.5]: https://datatracker.ietf.org/doc/html/rfc1034#section-3.5 + pub fn is_ldh(&self) -> bool { + self.as_bytes() + .iter() + .all(|&b| b.is_ascii_alphanumeric() || b == b'-') + && !self.as_bytes().starts_with(b"-") + && !self.as_bytes().ends_with(b"-") + } + + /// Whether this is an NR-LDH label. + /// + /// A "non-reserved" LDH label is slightly stricter than an LDH label (see + /// [`Self::is_ldh()`]); it further does not allow the third and fourth + /// characters to both be hyphens. A-labels (Unicode labels encoded into + /// ASCII) are not NR-LDH labels as they begin with `xn--`. + /// + /// See [RFC 5890, section 2.3.1]. + /// + /// [RFC 5890, section 2.3.1]: https://datatracker.ietf.org/doc/html/rfc5890#section-2.3.1 + pub fn is_nr_ldh(&self) -> bool { + self.is_ldh() && self.as_bytes().get(2..4) != Some(b"--") + } +} + +impl Label { + /// Canonicalize this label. + /// + /// All uppercase ASCII characters in the label will be lowercased. + /// + /// Runtime: `O(self.len())`. + pub fn canonicalize(&mut self) { + self.0.make_ascii_lowercase() + } +} + +impl PartialEq for Label { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. + fn eq(&self, that: &Self) -> bool { + self.0.eq_ignore_ascii_case(&that.0) + } +} + +impl Eq for Label {} + +impl PartialOrd for Label { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. + fn partial_cmp(&self, that: &Self) -> Option { + Some(Ord::cmp(self, that)) + } +} + +impl Ord for Label { + /// Compare labels by their canonical value. + /// + /// Canonicalized labels have uppercase ASCII characters lowercased, so this + /// function compares the two names ASCII-case-insensitively. + /// + // Runtime: `O(self.len())`, which is equal to `O(that.len())`. + fn cmp(&self, that: &Self) -> cmp::Ordering { + let this_bytes = self.as_bytes().iter().copied(); + let that_bytes = that.as_bytes().iter().copied(); + iter::zip(this_bytes, that_bytes) + .find(|(l, r)| !l.eq_ignore_ascii_case(r)) + .map_or(Ord::cmp(&self.len(), &that.len()), |(l, r)| { + Ord::cmp(&l.to_ascii_lowercase(), &r.to_ascii_lowercase()) + }) + } +} + +impl Hash for Label { + /// Hash this label by its canonical value. + /// + /// The hasher is provided with the labels in this name with ASCII + /// characters lowercased. Each label is preceded by its length as `u8`. + /// + /// The same scheme is used by [`Name`] and [`RelName`], so a tuple of any + /// of these types will have the same hash as the concatenation of the + /// labels. + /// + /// [`Name`]: super::Name + /// [`RelName`]: super::RelName + /// + /// Runtime: `O(self.len())`. + fn hash(&self, state: &mut H) { + // Individual labels and names should hash in the same way. + state.write_u8(self.len() as u8); + + // The default 'std' hasher actually buffers 8 bytes of input before + // processing them. There's no point trying to chunk the input here. + for &b in self.as_bytes() { + state.write_u8(b.to_ascii_lowercase()); + } + } +} + +impl AsRef<[u8]> for Label { + /// The raw bytes in this name, with no length octet. + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl<'a> TryFrom<&'a [u8]> for &'a Label { + type Error = LabelError; + + fn try_from(bytes: &'a [u8]) -> Result { + Label::from_bytes(bytes) + } +} + +impl<'a> From<&'a Label> for &'a [u8] { + fn from(label: &'a Label) -> Self { + label.as_bytes() + } +} + +/// A [`Label`] in a 64-byte buffer. +/// +/// This is a simple wrapper around a 64-byte buffer that stores a [`Label`] +/// within it. It can be used in situations where a [`Label`] must be placed +/// on the stack or within a `struct`, although it is also possible to store +/// [`Label`]s on the heap as `Box