@@ -77,10 +77,50 @@ pub struct HuffmanDictionary<T> {
7777}
7878
7979impl < T : Clone + Copy + Ord > HuffmanDictionary < T > {
80- /// The list of alphabet symbols and their respective frequency should
81- /// be given as input
82- pub fn new ( alphabet : & [ ( T , u64 ) ] ) -> Self {
80+ /// Creates a new Huffman dictionary from alphabet symbols and their frequencies.
81+ ///
82+ /// Returns `None` if the alphabet is empty.
83+ ///
84+ /// # Arguments
85+ /// * `alphabet` - A slice of tuples containing symbols and their frequencies
86+ ///
87+ /// # Example
88+ /// ```
89+ /// # use the_algorithms_rust::general::HuffmanDictionary;
90+ /// let freq = vec![('a', 5), ('b', 2), ('c', 1)];
91+ /// let dict = HuffmanDictionary::new(&freq).unwrap();
92+ ///
93+ pub fn new ( alphabet : & [ ( T , u64 ) ] ) -> Option < Self > {
94+ if alphabet. is_empty ( ) {
95+ return None ;
96+ }
97+
8398 let mut alph: BTreeMap < T , HuffmanValue > = BTreeMap :: new ( ) ;
99+
100+ // Special case: single symbol
101+ if alphabet. len ( ) == 1 {
102+ let ( symbol, _freq) = alphabet[ 0 ] ;
103+ alph. insert (
104+ symbol,
105+ HuffmanValue {
106+ value : 0 ,
107+ bits : 1 , // Must use at least 1 bit per symbol
108+ } ,
109+ ) ;
110+
111+ let root = HuffmanNode {
112+ left : None ,
113+ right : None ,
114+ symbol : Some ( symbol) ,
115+ frequency : alphabet[ 0 ] . 1 ,
116+ } ;
117+
118+ return Some ( HuffmanDictionary {
119+ alphabet : alph,
120+ root,
121+ } ) ;
122+ }
123+
84124 let mut queue: BinaryHeap < HuffmanNode < T > > = BinaryHeap :: new ( ) ;
85125 for ( symbol, freq) in alphabet. iter ( ) {
86126 queue. push ( HuffmanNode {
@@ -101,11 +141,14 @@ impl<T: Clone + Copy + Ord> HuffmanDictionary<T> {
101141 frequency : sm_freq,
102142 } ) ;
103143 }
104- let root = queue. pop ( ) . unwrap ( ) ;
105- HuffmanNode :: get_alphabet ( 0 , 0 , & root, & mut alph) ;
106- HuffmanDictionary {
107- alphabet : alph,
108- root,
144+ if let Some ( root) = queue. pop ( ) {
145+ HuffmanNode :: get_alphabet ( 0 , 0 , & root, & mut alph) ;
146+ Some ( HuffmanDictionary {
147+ alphabet : alph,
148+ root,
149+ } )
150+ } else {
151+ None
109152 }
110153 }
111154 pub fn encode ( & self , data : & [ T ] ) -> HuffmanEncoding {
@@ -143,27 +186,48 @@ impl HuffmanEncoding {
143186 }
144187 self . num_bits += data. bits as u64 ;
145188 }
189+
190+ #[ inline]
146191 fn get_bit ( & self , pos : u64 ) -> bool {
147192 ( self . data [ ( pos >> 6 ) as usize ] & ( 1 << ( pos & 63 ) ) ) != 0
148193 }
194+
149195 /// In case the encoding is invalid, `None` is returned
150196 pub fn decode < T : Clone + Copy + Ord > ( & self , dict : & HuffmanDictionary < T > ) -> Option < Vec < T > > {
197+ // Handle empty encoding
198+ if self . num_bits == 0 {
199+ return Some ( vec ! [ ] ) ;
200+ }
201+
202+ // Special case: single symbol in dictionary
203+ if dict. alphabet . len ( ) == 1 {
204+ //all bits represent the same symbol
205+ let symbol = dict. alphabet . keys ( ) . next ( ) ?;
206+ let result = vec ! [ * symbol; self . num_bits as usize ] ;
207+ return Some ( result) ;
208+ }
209+
210+ // Normal case: multiple symbols
151211 let mut state = & dict. root ;
152212 let mut result: Vec < T > = vec ! [ ] ;
213+
153214 for i in 0 ..self . num_bits {
154- if state. symbol . is_some ( ) {
155- result. push ( state . symbol . unwrap ( ) ) ;
215+ if let Some ( symbol ) = state. symbol {
216+ result. push ( symbol) ;
156217 state = & dict. root ;
157218 }
158219 state = if self . get_bit ( i) {
159- state. right . as_ref ( ) . unwrap ( )
220+ state. right . as_ref ( ) ?
160221 } else {
161- state. left . as_ref ( ) . unwrap ( )
222+ state. left . as_ref ( ) ?
162223 }
163224 }
225+
226+ // Check if we ended on a symbol
164227 if self . num_bits > 0 {
165228 result. push ( state. symbol ?) ;
166229 }
230+
167231 Some ( result)
168232 }
169233}
@@ -181,12 +245,97 @@ mod tests {
181245 . for_each ( |( b, & cnt) | result. push ( ( b as u8 , cnt) ) ) ;
182246 result
183247 }
248+
249+ #[ test]
250+ fn empty_text ( ) {
251+ let text = "" ;
252+ let bytes = text. as_bytes ( ) ;
253+ let freq = get_frequency ( bytes) ;
254+ let dict = HuffmanDictionary :: new ( & freq) ;
255+ assert ! ( dict. is_none( ) ) ;
256+ }
257+
258+ #[ test]
259+ fn one_symbol_text ( ) {
260+ let text = "aaaa" ;
261+ let bytes = text. as_bytes ( ) ;
262+ let freq = get_frequency ( bytes) ;
263+ let dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ;
264+ let encoded = dict. encode ( bytes) ;
265+ assert_eq ! ( encoded. num_bits, 4 ) ;
266+ let decoded = encoded. decode ( & dict) . unwrap ( ) ;
267+ assert_eq ! ( decoded, bytes) ;
268+ }
269+
270+ #[ test]
271+ fn test_decode_empty_encoding_struct ( ) {
272+ // Create a minimal but VALID HuffmanDictionary.
273+ // This is required because decode() expects a dictionary, even though
274+ // the content of the dictionary doesn't matter when num_bits == 0.
275+ let freq = vec ! [ ( b'a' , 1 ) ] ;
276+ let dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ;
277+
278+ // Manually create the target state: an encoding with 0 bits.
279+ let empty_encoding = HuffmanEncoding {
280+ data : vec ! [ ] ,
281+ num_bits : 0 ,
282+ } ;
283+
284+ let result = empty_encoding. decode ( & dict) ;
285+
286+ assert_eq ! ( result, Some ( vec![ ] ) ) ;
287+ }
288+
289+ #[ test]
290+ fn minimal_decode_end_check ( ) {
291+ let freq = vec ! [ ( b'a' , 1 ) , ( b'b' , 1 ) ] ;
292+ let bytes = b"ab" ;
293+
294+ let dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ;
295+ let encoded = dict. encode ( bytes) ;
296+
297+ // This decode will go through the main loop and hit the final 'if self.num_bits > 0' check.
298+ let decoded = encoded. decode ( & dict) . unwrap ( ) ;
299+
300+ assert_eq ! ( decoded, bytes) ;
301+ }
302+
303+ #[ test]
304+ fn test_decode_corrupted_stream_dead_end ( ) {
305+ // Create a dictionary with three symbols to ensure a deeper tree.
306+ // This makes hitting a dead-end (None pointer) easier.
307+ let freq = vec ! [ ( b'a' , 1 ) , ( b'b' , 1 ) , ( b'c' , 1 ) ] ;
308+ let bytes = b"ab" ;
309+ let dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ;
310+
311+ let encoded = dict. encode ( bytes) ;
312+
313+ // Manually corrupt the stream to stop mid-symbol.
314+ // We will truncate num_bits by a small amount (e.g., 1 bit).
315+ // This forces the loop to stop on an *intermediate* node.
316+ let corrupted_encoding = HuffmanEncoding {
317+ data : encoded. data ,
318+ // Shorten the bit count by one. The total length of the 'ab' stream
319+ // is likely 4 or 5 bits. This forces the loop to end one bit early,
320+ // leaving the state on an internal node.
321+ num_bits : encoded
322+ . num_bits
323+ . checked_sub ( 1 )
324+ . expect ( "Encoding should be > 0 bits" ) ,
325+ } ;
326+
327+ // Assert that the decode fails gracefully.
328+ // The loop finishes, the final 'if self.num_bits > 0' executes,
329+ // and result.push(state.symbol?) fails because state.symbol is None.
330+ assert_eq ! ( corrupted_encoding. decode( & dict) , None ) ;
331+ }
332+
184333 #[ test]
185334 fn small_text ( ) {
186335 let text = "Hello world" ;
187336 let bytes = text. as_bytes ( ) ;
188337 let freq = get_frequency ( bytes) ;
189- let dict = HuffmanDictionary :: new ( & freq) ;
338+ let dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ;
190339 let encoded = dict. encode ( bytes) ;
191340 assert_eq ! ( encoded. num_bits, 32 ) ;
192341 let decoded = encoded. decode ( & dict) . unwrap ( ) ;
@@ -208,7 +357,7 @@ mod tests {
208357 ) ;
209358 let bytes = text. as_bytes ( ) ;
210359 let freq = get_frequency ( bytes) ;
211- let dict = HuffmanDictionary :: new ( & freq) ;
360+ let dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ;
212361 let encoded = dict. encode ( bytes) ;
213362 assert_eq ! ( encoded. num_bits, 2372 ) ;
214363 let decoded = encoded. decode ( & dict) . unwrap ( ) ;
0 commit comments