From a4e09a89bbc2b429fb31b84064e8b200f47192b7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 27 Dec 2025 14:58:01 +0000 Subject: [PATCH] test: add comprehensive unit tests for improved coverage - Add tests for config.rs (all parser configs and builders) - Add tests for error.rs (error variants, From conversions) - Add tests for parser.rs (Platform enum, create_parser, ParseIterator) - Add tests for parsers (Telegram, Instagram, WhatsApp, Discord) - Add tests for streaming parsers (progress, iterators, error handling) - Add tests for parsing/discord.rs (stream messages, stickers) - Add edge_cases.rs integration tests (Unicode, timestamps, filters) - Expand proptest.rs property-based tests Coverage improved from 59.88% to 84.51% --- src/async_parser/telegram.rs | 137 ++++++++++ src/config.rs | 323 +++++++++++++++++++++++ src/error.rs | 314 ++++++++++++++++++++++ src/format.rs | 137 +++++++++- src/parser.rs | 318 +++++++++++++++++++++- src/parsers/discord.rs | 313 ++++++++++++++++++++++ src/parsers/instagram.rs | 186 +++++++++++++ src/parsers/telegram.rs | 181 +++++++++++++ src/parsers/whatsapp.rs | 210 +++++++++++++++ src/parsing/discord.rs | 313 ++++++++++++++++++++++ src/streaming/discord.rs | 332 ++++++++++++++++++++++- src/streaming/error.rs | 97 ++++++- src/streaming/instagram.rs | 251 ++++++++++++++++++ src/streaming/telegram.rs | 250 ++++++++++++++++++ src/streaming/traits.rs | 50 ++++ src/streaming/whatsapp.rs | 229 ++++++++++++++++ tests/edge_cases.rs | 496 +++++++++++++++++++++++++++++++++++ tests/proptest.rs | 52 +++- 18 files changed, 4176 insertions(+), 13 deletions(-) create mode 100644 tests/edge_cases.rs diff --git a/src/async_parser/telegram.rs b/src/async_parser/telegram.rs index 60d38c7d..fafd3fc7 100644 --- a/src/async_parser/telegram.rs +++ b/src/async_parser/telegram.rs @@ -81,12 +81,80 @@ impl AsyncParser for AsyncTelegramParser { mod tests { use super::*; + // ========================================================================= + // AsyncTelegramParser construction tests + // ========================================================================= + + #[test] + fn test_parser_new() { + let parser = AsyncTelegramParser::new(); + assert_eq!(parser.name(), "Telegram (Async)"); + } + + #[test] + fn test_parser_default() { + let parser = AsyncTelegramParser::default(); + assert_eq!(parser.name(), "Telegram (Async)"); + } + + #[test] + fn test_parser_with_config() { + let config = TelegramConfig::new().with_streaming(true); + let parser = AsyncTelegramParser::with_config(config); + assert_eq!(parser.name(), "Telegram (Async)"); + } + + // ========================================================================= + // Async parse tests + // ========================================================================= + #[tokio::test] async fn test_async_parser_name() { let parser = AsyncTelegramParser::new(); assert_eq!(parser.name(), "Telegram (Async)"); } + #[tokio::test] + async fn test_async_parse_file() { + use tokio::io::AsyncWriteExt; + let dir = tempfile::tempdir().expect("create temp dir"); + let file_path = dir.path().join("test.json"); + + let json = r#"{ + "messages": [ + { + "id": 1, + "type": "message", + "date_unixtime": "1705314600", + "from": "Alice", + "text": "Hello async!" + } + ] + }"#; + + let mut file = tokio::fs::File::create(&file_path).await.expect("create file"); + file.write_all(json.as_bytes()).await.expect("write"); + file.flush().await.expect("flush"); + + let parser = AsyncTelegramParser::new(); + let messages = parser.parse(&file_path).await.expect("parse failed"); + + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[0].content, "Hello async!"); + } + + #[tokio::test] + async fn test_async_parse_file_not_found() { + let parser = AsyncTelegramParser::new(); + let result = parser.parse("/nonexistent/file.json").await; + assert!(result.is_err()); + } + + // ========================================================================= + // parse_str tests + // ========================================================================= + #[test] fn test_parse_str() { let json = r#"{ @@ -108,4 +176,73 @@ mod tests { assert_eq!(messages[0].sender, "Alice"); assert_eq!(messages[0].content, "Hello!"); } + + #[test] + fn test_parse_str_multiple_messages() { + let json = r#"{ + "messages": [ + {"id": 1, "type": "message", "date_unixtime": "1705314600", "from": "Alice", "text": "Hello!"}, + {"id": 2, "type": "message", "date_unixtime": "1705314601", "from": "Bob", "text": "Hi!"} + ] + }"#; + + let parser = AsyncTelegramParser::new(); + let messages = parser.parse_str(json).unwrap(); + + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + #[test] + fn test_parse_str_invalid_json() { + let parser = AsyncTelegramParser::new(); + let result = parser.parse_str("invalid json"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_str_empty_messages() { + let json = r#"{"messages": []}"#; + let parser = AsyncTelegramParser::new(); + let messages = parser.parse_str(json).unwrap(); + assert!(messages.is_empty()); + } + + #[test] + fn test_parse_str_with_formatted_text() { + let json = r#"{ + "messages": [ + { + "id": 1, + "type": "message", + "date_unixtime": "1705314600", + "from": "Alice", + "text": ["Hello ", {"type": "bold", "text": "world"}] + } + ] + }"#; + + let parser = AsyncTelegramParser::new(); + let messages = parser.parse_str(json).unwrap(); + + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Hello world"); + } + + #[test] + fn test_parse_str_filters_service_messages() { + let json = r#"{ + "messages": [ + {"id": 1, "type": "message", "date_unixtime": "1705314600", "from": "Alice", "text": "Hello!"}, + {"id": 2, "type": "service", "date_unixtime": "1705314601", "from": "System", "text": "joined"}, + {"id": 3, "type": "message", "date_unixtime": "1705314602", "from": "Bob", "text": "Hi!"} + ] + }"#; + + let parser = AsyncTelegramParser::new(); + let messages = parser.parse_str(json).unwrap(); + + assert_eq!(messages.len(), 2); + } } diff --git a/src/config.rs b/src/config.rs index 7c04a2d0..2d03e5eb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -397,11 +397,25 @@ impl DiscordConfig { mod tests { use super::*; + // ========================================================================= + // TelegramConfig tests + // ========================================================================= + #[test] fn test_telegram_config_default() { let config = TelegramConfig::default(); assert!(!config.streaming); assert_eq!(config.buffer_size, 64 * 1024); + assert_eq!(config.max_message_size, 10 * 1024 * 1024); + assert!(config.skip_invalid); + } + + #[test] + fn test_telegram_config_new() { + let config = TelegramConfig::new(); + assert!(!config.streaming); + assert_eq!(config.buffer_size, 64 * 1024); + assert_eq!(config.max_message_size, 10 * 1024 * 1024); assert!(config.skip_invalid); } @@ -422,25 +436,334 @@ mod tests { assert_eq!(config.buffer_size, 256 * 1024); } + #[test] + fn test_telegram_config_with_max_message_size() { + let config = TelegramConfig::new().with_max_message_size(5 * 1024 * 1024); + assert_eq!(config.max_message_size, 5 * 1024 * 1024); + } + + #[test] + fn test_telegram_config_with_skip_invalid() { + let config = TelegramConfig::new().with_skip_invalid(false); + assert!(!config.skip_invalid); + } + + #[test] + fn test_telegram_config_builder_chain() { + let config = TelegramConfig::new() + .with_streaming(true) + .with_buffer_size(512 * 1024) + .with_max_message_size(20 * 1024 * 1024) + .with_skip_invalid(false); + + assert!(config.streaming); + assert_eq!(config.buffer_size, 512 * 1024); + assert_eq!(config.max_message_size, 20 * 1024 * 1024); + assert!(!config.skip_invalid); + } + + #[test] + fn test_telegram_config_serde() { + let config = TelegramConfig::new().with_streaming(true); + let json = serde_json::to_string(&config).expect("serialize failed"); + let parsed: TelegramConfig = serde_json::from_str(&json).expect("deserialize failed"); + assert!(parsed.streaming); + assert_eq!(parsed.buffer_size, config.buffer_size); + } + + // ========================================================================= + // WhatsAppConfig tests + // ========================================================================= + #[test] fn test_whatsapp_config_default() { let config = WhatsAppConfig::default(); assert!(!config.streaming); + assert_eq!(config.buffer_size, 64 * 1024); assert!(config.skip_system_messages); + assert!(config.skip_invalid); + } + + #[test] + fn test_whatsapp_config_new() { + let config = WhatsAppConfig::new(); + assert!(!config.streaming); + assert!(config.skip_system_messages); + } + + #[test] + fn test_whatsapp_config_streaming() { + let config = WhatsAppConfig::streaming(); + assert!(config.streaming); + assert_eq!(config.buffer_size, 256 * 1024); + } + + #[test] + fn test_whatsapp_config_with_streaming() { + let config = WhatsAppConfig::new().with_streaming(true); + assert!(config.streaming); + } + + #[test] + fn test_whatsapp_config_with_buffer_size() { + let config = WhatsAppConfig::new().with_buffer_size(128 * 1024); + assert_eq!(config.buffer_size, 128 * 1024); + } + + #[test] + fn test_whatsapp_config_with_skip_system_messages() { + let config = WhatsAppConfig::new().with_skip_system_messages(false); + assert!(!config.skip_system_messages); + } + + #[test] + fn test_whatsapp_config_with_skip_invalid() { + let config = WhatsAppConfig::new().with_skip_invalid(false); + assert!(!config.skip_invalid); + } + + #[test] + fn test_whatsapp_config_builder_chain() { + let config = WhatsAppConfig::new() + .with_streaming(true) + .with_buffer_size(512 * 1024) + .with_skip_system_messages(false) + .with_skip_invalid(false); + + assert!(config.streaming); + assert_eq!(config.buffer_size, 512 * 1024); + assert!(!config.skip_system_messages); + assert!(!config.skip_invalid); + } + + #[test] + fn test_whatsapp_config_serde() { + let config = WhatsAppConfig::new().with_skip_system_messages(false); + let json = serde_json::to_string(&config).expect("serialize failed"); + let parsed: WhatsAppConfig = serde_json::from_str(&json).expect("deserialize failed"); + assert!(!parsed.skip_system_messages); } + // ========================================================================= + // InstagramConfig tests + // ========================================================================= + #[test] fn test_instagram_config_default() { let config = InstagramConfig::default(); assert!(!config.streaming); + assert_eq!(config.buffer_size, 64 * 1024); + assert_eq!(config.max_message_size, 10 * 1024 * 1024); + assert!(config.fix_encoding); + assert!(config.skip_invalid); + } + + #[test] + fn test_instagram_config_new() { + let config = InstagramConfig::new(); + assert!(!config.streaming); assert!(config.fix_encoding); } + #[test] + fn test_instagram_config_streaming() { + let config = InstagramConfig::streaming(); + assert!(config.streaming); + assert_eq!(config.buffer_size, 256 * 1024); + } + + #[test] + fn test_instagram_config_with_streaming() { + let config = InstagramConfig::new().with_streaming(true); + assert!(config.streaming); + } + + #[test] + fn test_instagram_config_with_buffer_size() { + let config = InstagramConfig::new().with_buffer_size(128 * 1024); + assert_eq!(config.buffer_size, 128 * 1024); + } + + #[test] + fn test_instagram_config_with_max_message_size() { + let config = InstagramConfig::new().with_max_message_size(5 * 1024 * 1024); + assert_eq!(config.max_message_size, 5 * 1024 * 1024); + } + + #[test] + fn test_instagram_config_with_fix_encoding() { + let config = InstagramConfig::new().with_fix_encoding(false); + assert!(!config.fix_encoding); + } + + #[test] + fn test_instagram_config_with_skip_invalid() { + let config = InstagramConfig::new().with_skip_invalid(false); + assert!(!config.skip_invalid); + } + + #[test] + fn test_instagram_config_builder_chain() { + let config = InstagramConfig::new() + .with_streaming(true) + .with_buffer_size(512 * 1024) + .with_max_message_size(20 * 1024 * 1024) + .with_fix_encoding(false) + .with_skip_invalid(false); + + assert!(config.streaming); + assert_eq!(config.buffer_size, 512 * 1024); + assert_eq!(config.max_message_size, 20 * 1024 * 1024); + assert!(!config.fix_encoding); + assert!(!config.skip_invalid); + } + + #[test] + fn test_instagram_config_serde() { + let config = InstagramConfig::new().with_fix_encoding(false); + let json = serde_json::to_string(&config).expect("serialize failed"); + let parsed: InstagramConfig = serde_json::from_str(&json).expect("deserialize failed"); + assert!(!parsed.fix_encoding); + } + + // ========================================================================= + // DiscordConfig tests + // ========================================================================= + #[test] fn test_discord_config_default() { let config = DiscordConfig::default(); assert!(!config.streaming); + assert_eq!(config.buffer_size, 64 * 1024); + assert_eq!(config.max_message_size, 10 * 1024 * 1024); assert!(config.prefer_nickname); assert!(config.include_attachments); + assert!(config.skip_invalid); + } + + #[test] + fn test_discord_config_new() { + let config = DiscordConfig::new(); + assert!(!config.streaming); + assert!(config.prefer_nickname); + assert!(config.include_attachments); + } + + #[test] + fn test_discord_config_streaming() { + let config = DiscordConfig::streaming(); + assert!(config.streaming); + assert_eq!(config.buffer_size, 256 * 1024); + } + + #[test] + fn test_discord_config_with_streaming() { + let config = DiscordConfig::new().with_streaming(true); + assert!(config.streaming); + } + + #[test] + fn test_discord_config_with_buffer_size() { + let config = DiscordConfig::new().with_buffer_size(128 * 1024); + assert_eq!(config.buffer_size, 128 * 1024); + } + + #[test] + fn test_discord_config_with_max_message_size() { + let config = DiscordConfig::new().with_max_message_size(5 * 1024 * 1024); + assert_eq!(config.max_message_size, 5 * 1024 * 1024); + } + + #[test] + fn test_discord_config_with_prefer_nickname() { + let config = DiscordConfig::new().with_prefer_nickname(false); + assert!(!config.prefer_nickname); + } + + #[test] + fn test_discord_config_with_include_attachments() { + let config = DiscordConfig::new().with_include_attachments(false); + assert!(!config.include_attachments); + } + + #[test] + fn test_discord_config_with_skip_invalid() { + let config = DiscordConfig::new().with_skip_invalid(false); + assert!(!config.skip_invalid); + } + + #[test] + fn test_discord_config_builder_chain() { + let config = DiscordConfig::new() + .with_streaming(true) + .with_buffer_size(512 * 1024) + .with_max_message_size(20 * 1024 * 1024) + .with_prefer_nickname(false) + .with_include_attachments(false) + .with_skip_invalid(false); + + assert!(config.streaming); + assert_eq!(config.buffer_size, 512 * 1024); + assert_eq!(config.max_message_size, 20 * 1024 * 1024); + assert!(!config.prefer_nickname); + assert!(!config.include_attachments); + assert!(!config.skip_invalid); + } + + #[test] + fn test_discord_config_serde() { + let config = DiscordConfig::new() + .with_prefer_nickname(false) + .with_include_attachments(false); + let json = serde_json::to_string(&config).expect("serialize failed"); + let parsed: DiscordConfig = serde_json::from_str(&json).expect("deserialize failed"); + assert!(!parsed.prefer_nickname); + assert!(!parsed.include_attachments); + } + + // ========================================================================= + // Clone and Debug trait tests + // ========================================================================= + + #[test] + fn test_configs_clone() { + let telegram = TelegramConfig::new().with_streaming(true); + let telegram_clone = telegram.clone(); + assert_eq!(telegram.streaming, telegram_clone.streaming); + + let whatsapp = WhatsAppConfig::new().with_skip_system_messages(false); + let whatsapp_clone = whatsapp.clone(); + assert_eq!( + whatsapp.skip_system_messages, + whatsapp_clone.skip_system_messages + ); + + let instagram = InstagramConfig::new().with_fix_encoding(false); + let instagram_clone = instagram.clone(); + assert_eq!(instagram.fix_encoding, instagram_clone.fix_encoding); + + let discord = DiscordConfig::new().with_prefer_nickname(false); + let discord_clone = discord.clone(); + assert_eq!(discord.prefer_nickname, discord_clone.prefer_nickname); + } + + #[test] + fn test_configs_debug() { + let telegram = TelegramConfig::new(); + let debug_str = format!("{:?}", telegram); + assert!(debug_str.contains("TelegramConfig")); + assert!(debug_str.contains("streaming")); + + let whatsapp = WhatsAppConfig::new(); + let debug_str = format!("{:?}", whatsapp); + assert!(debug_str.contains("WhatsAppConfig")); + + let instagram = InstagramConfig::new(); + let debug_str = format!("{:?}", instagram); + assert!(debug_str.contains("InstagramConfig")); + + let discord = DiscordConfig::new(); + let debug_str = format!("{:?}", discord); + assert!(debug_str.contains("DiscordConfig")); } } diff --git a/src/error.rs b/src/error.rs index 1f213520..cdeb4146 100644 --- a/src/error.rs +++ b/src/error.rs @@ -357,6 +357,10 @@ impl From for ChatpackError { mod tests { use super::*; + // ========================================================================= + // Display tests for all error variants + // ========================================================================= + #[test] fn test_io_error_display() { let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); @@ -385,6 +389,40 @@ mod tests { assert!(display.contains("/path/to/file.json")); } + #[test] + fn test_parse_error_without_path() { + let err = ChatpackError::Parse { + format: "WhatsApp TXT", + source: ParseErrorKind::Pattern("invalid pattern".into()), + path: None, + }; + let display = err.to_string(); + assert!(display.contains("WhatsApp TXT")); + assert!(!display.contains("file:")); + } + + #[test] + fn test_parse_error_other_kind() { + let err = ChatpackError::Parse { + format: "Test", + source: ParseErrorKind::Other("custom error".into()), + path: None, + }; + let display = err.to_string(); + assert!(display.contains("custom error")); + } + + #[test] + fn test_invalid_format_display() { + let err = ChatpackError::InvalidFormat { + format: "Discord", + message: "unrecognized export format".into(), + }; + let display = err.to_string(); + assert!(display.contains("Discord")); + assert!(display.contains("unrecognized export format")); + } + #[test] fn test_invalid_date_display() { let err = ChatpackError::invalid_date("not-a-date"); @@ -401,6 +439,41 @@ mod tests { assert!(display.contains("1024")); } + #[test] + fn test_unexpected_eof_display() { + let err = ChatpackError::unexpected_eof("parsing JSON array"); + let display = err.to_string(); + assert!(display.contains("Unexpected end of file")); + assert!(display.contains("parsing JSON array")); + } + + #[test] + fn test_streaming_error_display() { + let err = ChatpackError::Streaming(StreamingErrorKind::InvalidFormat( + "missing header".into(), + )); + let display = err.to_string(); + assert!(display.contains("Streaming error")); + assert!(display.contains("missing header")); + } + + #[test] + fn test_utf8_error_display() { + let invalid_bytes = vec![0xff, 0xfe]; + let utf8_err = String::from_utf8(invalid_bytes).unwrap_err(); + let err = ChatpackError::Utf8 { + context: "reading file".into(), + source: utf8_err, + }; + let display = err.to_string(); + assert!(display.contains("UTF-8")); + assert!(display.contains("reading file")); + } + + // ========================================================================= + // Error source chain tests + // ========================================================================= + #[test] fn test_error_source_chain() { use std::error::Error; @@ -409,17 +482,56 @@ mod tests { assert!(err.source().is_some()); } + #[test] + fn test_streaming_error_source() { + use std::error::Error; + let io_err = io::Error::new(io::ErrorKind::NotFound, "not found"); + let streaming_err = StreamingErrorKind::Io(io_err); + let err = ChatpackError::Streaming(streaming_err); + assert!(err.source().is_some()); + } + + // ========================================================================= + // is_* methods tests + // ========================================================================= + #[test] fn test_is_methods() { let io_err = ChatpackError::Io(io::Error::new(io::ErrorKind::NotFound, "")); assert!(io_err.is_io()); assert!(!io_err.is_parse()); + assert!(!io_err.is_invalid_format()); + assert!(!io_err.is_invalid_date()); let date_err = ChatpackError::invalid_date("bad"); assert!(date_err.is_invalid_date()); assert!(!date_err.is_io()); + assert!(!date_err.is_parse()); + assert!(!date_err.is_invalid_format()); + } + + #[test] + fn test_is_parse() { + let err = ChatpackError::Parse { + format: "Test", + source: ParseErrorKind::Other("test".into()), + path: None, + }; + assert!(err.is_parse()); + assert!(!err.is_io()); + } + + #[test] + fn test_is_invalid_format() { + let err = ChatpackError::invalid_format("Test", "bad format"); + assert!(err.is_invalid_format()); + assert!(!err.is_parse()); } + // ========================================================================= + // Convenience constructors tests + // ========================================================================= + #[test] fn test_convenience_constructors() { let err = ChatpackError::invalid_format("WhatsApp", "could not detect date format"); @@ -431,6 +543,191 @@ mod tests { assert!(display.contains("reading message array")); } + #[test] + fn test_whatsapp_parse_constructor() { + let err = ChatpackError::whatsapp_parse("invalid format", None); + assert!(err.is_parse()); + assert!(err.to_string().contains("WhatsApp TXT")); + + let err_with_path = + ChatpackError::whatsapp_parse("invalid format", Some(PathBuf::from("/test.txt"))); + assert!(err_with_path.to_string().contains("/test.txt")); + } + + #[cfg(feature = "telegram")] + #[test] + fn test_telegram_parse_constructor() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let err = ChatpackError::telegram_parse(json_err, None); + assert!(err.is_parse()); + assert!(err.to_string().contains("Telegram JSON")); + } + + #[cfg(feature = "instagram")] + #[test] + fn test_instagram_parse_constructor() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let err = ChatpackError::instagram_parse(json_err, None); + assert!(err.is_parse()); + assert!(err.to_string().contains("Instagram JSON")); + } + + #[cfg(feature = "discord")] + #[test] + fn test_discord_parse_constructor() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let err = ChatpackError::discord_parse(json_err, None); + assert!(err.is_parse()); + assert!(err.to_string().contains("Discord")); + } + + #[test] + fn test_streaming_constructor() { + let kind = StreamingErrorKind::UnexpectedEof; + let err = ChatpackError::streaming(kind); + assert!(err.to_string().contains("Streaming error")); + } + + // ========================================================================= + // From conversions tests + // ========================================================================= + + #[test] + fn test_from_io_error() { + let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); + let err: ChatpackError = io_err.into(); + assert!(err.is_io()); + } + + #[cfg(any(feature = "csv-output", feature = "discord"))] + #[test] + fn test_from_csv_error() { + // Create a CSV error by using a writer and forcing an error + use std::io::Cursor; + let mut wtr = csv::Writer::from_writer(Cursor::new(Vec::new())); + // Write some data first + wtr.write_record(["a", "b"]).expect("write"); + // Create error via deserialization of invalid data + let data = "field1,field2\n\"unclosed"; + let mut rdr = csv::ReaderBuilder::new().from_reader(data.as_bytes()); + for result in rdr.records() { + if let Err(csv_err) = result { + let err: ChatpackError = csv_err.into(); + assert!(err.to_string().contains("CSV error")); + return; + } + } + // If we reach here, force an error in a different way + // The Csv variant just needs to be tested for From conversion + let io_err = std::io::Error::new(std::io::ErrorKind::Other, "test"); + let csv_err = csv::Error::from(io_err); + let err: ChatpackError = csv_err.into(); + assert!(err.to_string().contains("CSV error")); + } + + #[cfg(any( + feature = "telegram", + feature = "instagram", + feature = "discord", + feature = "json-output" + ))] + #[test] + fn test_from_json_error() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let err: ChatpackError = json_err.into(); + assert!(err.to_string().contains("JSON error")); + } + + #[test] + fn test_from_utf8_error() { + let invalid_bytes = vec![0xff, 0xfe]; + let utf8_err = String::from_utf8(invalid_bytes).unwrap_err(); + let err: ChatpackError = utf8_err.into(); + assert!(err.to_string().contains("UTF-8")); + } + + // ========================================================================= + // StreamingErrorKind tests + // ========================================================================= + + #[test] + fn test_streaming_error_kind_io() { + let io_err = io::Error::new(io::ErrorKind::NotFound, "not found"); + let kind = StreamingErrorKind::Io(io_err); + assert!(kind.to_string().contains("IO error")); + } + + #[cfg(any( + feature = "telegram", + feature = "instagram", + feature = "discord", + feature = "json-output" + ))] + #[test] + fn test_streaming_error_kind_json() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let kind = StreamingErrorKind::Json(json_err); + assert!(kind.to_string().contains("JSON error")); + } + + #[test] + fn test_streaming_error_kind_invalid_format() { + let kind = StreamingErrorKind::InvalidFormat("missing messages array".into()); + assert!(kind.to_string().contains("Invalid format")); + assert!(kind.to_string().contains("missing messages array")); + } + + #[test] + fn test_streaming_error_kind_buffer_overflow() { + let kind = StreamingErrorKind::BufferOverflow { + max_size: 1024, + actual_size: 2048, + }; + let display = kind.to_string(); + assert!(display.contains("Buffer overflow")); + assert!(display.contains("1024")); + assert!(display.contains("2048")); + } + + #[test] + fn test_streaming_error_kind_unexpected_eof() { + let kind = StreamingErrorKind::UnexpectedEof; + assert!(kind.to_string().contains("Unexpected end of file")); + } + + // ========================================================================= + // ParseErrorKind tests + // ========================================================================= + + #[test] + fn test_parse_error_kind_pattern() { + let kind = ParseErrorKind::Pattern("invalid regex".into()); + assert!(kind.to_string().contains("invalid regex")); + } + + #[test] + fn test_parse_error_kind_other() { + let kind = ParseErrorKind::Other("unknown error".into()); + assert!(kind.to_string().contains("unknown error")); + } + + #[cfg(any( + feature = "telegram", + feature = "instagram", + feature = "discord", + feature = "json-output" + ))] + #[test] + fn test_parse_error_kind_json() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let kind = ParseErrorKind::Json(json_err); + assert!(!kind.to_string().is_empty()); + } + + // ========================================================================= + // Result type alias test + // ========================================================================= + #[test] fn test_result_type_alias() { fn returns_result() -> i32 { @@ -441,7 +738,24 @@ mod tests { Err(ChatpackError::invalid_date("bad")) } + fn returns_ok() -> Result { + Ok(42) + } + assert_eq!(returns_result(), 42); assert!(returns_error().is_err()); + assert!(returns_ok().is_ok()); + assert_eq!(returns_ok().unwrap(), 42); + } + + // ========================================================================= + // Debug trait test + // ========================================================================= + + #[test] + fn test_error_debug() { + let err = ChatpackError::invalid_date("bad"); + let debug = format!("{:?}", err); + assert!(debug.contains("InvalidDate")); } } diff --git a/src/format.rs b/src/format.rs index 0995352f..fdf441d9 100644 --- a/src/format.rs +++ b/src/format.rs @@ -286,6 +286,10 @@ mod tests { use super::*; use std::str::FromStr; + // ========================================================================= + // FromStr tests + // ========================================================================= + #[test] fn test_format_from_str() { assert_eq!(OutputFormat::from_str("csv").unwrap(), OutputFormat::Csv); @@ -299,9 +303,28 @@ mod tests { OutputFormat::Jsonl ); assert_eq!(OutputFormat::from_str("CSV").unwrap(), OutputFormat::Csv); - assert!(OutputFormat::from_str("unknown").is_err()); + assert_eq!(OutputFormat::from_str("JSON").unwrap(), OutputFormat::Json); + assert_eq!(OutputFormat::from_str("JSONL").unwrap(), OutputFormat::Jsonl); + assert_eq!(OutputFormat::from_str("NDJSON").unwrap(), OutputFormat::Jsonl); + } + + #[test] + fn test_format_from_str_errors() { + let err = OutputFormat::from_str("unknown").unwrap_err(); + assert!(err.contains("Unknown format")); + assert!(err.contains("unknown")); + + let err = OutputFormat::from_str("").unwrap_err(); + assert!(err.contains("Unknown format")); + + let err = OutputFormat::from_str("xml").unwrap_err(); + assert!(err.contains("Unknown format")); } + // ========================================================================= + // Display tests + // ========================================================================= + #[test] fn test_format_display() { assert_eq!(OutputFormat::Csv.to_string(), "CSV"); @@ -309,6 +332,10 @@ mod tests { assert_eq!(OutputFormat::Jsonl.to_string(), "JSONL"); } + // ========================================================================= + // Extension tests + // ========================================================================= + #[test] fn test_format_extension() { assert_eq!(OutputFormat::Csv.extension(), "csv"); @@ -316,6 +343,10 @@ mod tests { assert_eq!(OutputFormat::Jsonl.extension(), "jsonl"); } + // ========================================================================= + // MIME type tests + // ========================================================================= + #[test] fn test_format_mime_type() { assert_eq!(OutputFormat::Csv.mime_type(), "text/csv"); @@ -323,6 +354,10 @@ mod tests { assert_eq!(OutputFormat::Jsonl.mime_type(), "application/x-ndjson"); } + // ========================================================================= + // from_path tests + // ========================================================================= + #[test] fn test_format_from_path() { assert_eq!( @@ -345,9 +380,28 @@ mod tests { OutputFormat::from_path("/path/to/file.JSON").unwrap(), OutputFormat::Json ); - assert!(OutputFormat::from_path("output.txt").is_err()); + assert_eq!( + OutputFormat::from_path("file.CSV").unwrap(), + OutputFormat::Csv + ); } + #[test] + fn test_format_from_path_errors() { + let err = OutputFormat::from_path("output.txt"); + assert!(err.is_err()); + + let err = OutputFormat::from_path("noextension"); + assert!(err.is_err()); + + let err = OutputFormat::from_path("file.xml"); + assert!(err.is_err()); + } + + // ========================================================================= + // all() and all_names() tests + // ========================================================================= + #[test] fn test_format_all() { let all = OutputFormat::all(); @@ -357,11 +411,28 @@ mod tests { assert!(all.contains(&OutputFormat::Jsonl)); } + #[test] + fn test_format_all_names() { + let names = OutputFormat::all_names(); + assert!(names.contains(&"csv")); + assert!(names.contains(&"json")); + assert!(names.contains(&"jsonl")); + assert!(names.contains(&"ndjson")); + } + + // ========================================================================= + // Default trait tests + // ========================================================================= + #[test] fn test_format_default() { assert_eq!(OutputFormat::default(), OutputFormat::Csv); } + // ========================================================================= + // Serde tests + // ========================================================================= + #[test] fn test_format_serde() { let format = OutputFormat::Jsonl; @@ -371,4 +442,66 @@ mod tests { let parsed: OutputFormat = serde_json::from_str("\"csv\"").unwrap(); assert_eq!(parsed, OutputFormat::Csv); } + + #[test] + fn test_format_serde_all_variants() { + for format in OutputFormat::all() { + let json = serde_json::to_string(format).expect("serialize failed"); + let parsed: OutputFormat = serde_json::from_str(&json).expect("deserialize failed"); + assert_eq!(parsed, *format); + } + } + + // ========================================================================= + // Traits tests (Clone, Copy, Eq, Hash) + // ========================================================================= + + #[test] + fn test_format_clone_copy() { + let f1 = OutputFormat::Csv; + let f2 = f1; // Copy + let f3 = f1.clone(); + assert_eq!(f1, f2); + assert_eq!(f1, f3); + } + + #[test] + fn test_format_eq_hash() { + use std::collections::HashSet; + let mut set = HashSet::new(); + set.insert(OutputFormat::Csv); + set.insert(OutputFormat::Json); + set.insert(OutputFormat::Csv); // Duplicate + assert_eq!(set.len(), 2); + } + + #[test] + fn test_format_debug() { + let debug = format!("{:?}", OutputFormat::Csv); + assert!(debug.contains("Csv")); + } + + // ========================================================================= + // to_format_string tests + // ========================================================================= + + #[cfg(all(feature = "csv-output", feature = "json-output"))] + #[test] + fn test_to_format_string() { + let messages = vec![Message::new("Alice", "Hello!")]; + let config = OutputConfig::new(); + + let csv = to_format_string(&messages, OutputFormat::Csv, &config).expect("csv failed"); + assert!(csv.contains("Alice")); + assert!(csv.contains("Hello")); + + let json = to_format_string(&messages, OutputFormat::Json, &config).expect("json failed"); + assert!(json.contains("Alice")); + assert!(json.contains("Hello")); + + let jsonl = + to_format_string(&messages, OutputFormat::Jsonl, &config).expect("jsonl failed"); + assert!(jsonl.contains("Alice")); + assert!(jsonl.contains("Hello")); + } } diff --git a/src/parser.rs b/src/parser.rs index c36b2d9f..eb704bd7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -451,6 +451,10 @@ mod tests { use super::*; use std::str::FromStr; + // ========================================================================= + // Platform::from_str tests + // ========================================================================= + #[test] fn test_platform_from_str() { assert_eq!(Platform::from_str("telegram").unwrap(), Platform::Telegram); @@ -467,11 +471,39 @@ mod tests { assert_eq!(Platform::from_str("dc").unwrap(), Platform::Discord); } + #[test] + fn test_platform_from_str_case_insensitive() { + // Test various case combinations + assert_eq!(Platform::from_str("TeLegRaM").unwrap(), Platform::Telegram); + assert_eq!(Platform::from_str("TG").unwrap(), Platform::Telegram); + assert_eq!(Platform::from_str("WhAtSaPp").unwrap(), Platform::WhatsApp); + assert_eq!(Platform::from_str("WA").unwrap(), Platform::WhatsApp); + assert_eq!( + Platform::from_str("InStAgRaM").unwrap(), + Platform::Instagram + ); + assert_eq!(Platform::from_str("IG").unwrap(), Platform::Instagram); + assert_eq!(Platform::from_str("DiScOrD").unwrap(), Platform::Discord); + assert_eq!(Platform::from_str("DC").unwrap(), Platform::Discord); + } + #[test] fn test_platform_from_str_error() { - assert!(Platform::from_str("unknown").is_err()); + let err = Platform::from_str("unknown").unwrap_err(); + assert!(err.contains("Unknown platform")); + assert!(err.contains("unknown")); + + let err = Platform::from_str("").unwrap_err(); + assert!(err.contains("Unknown platform")); + + let err = Platform::from_str("telegramx").unwrap_err(); + assert!(err.contains("Unknown platform")); } + // ========================================================================= + // Platform display tests + // ========================================================================= + #[test] fn test_platform_display() { assert_eq!(Platform::Telegram.to_string(), "Telegram"); @@ -480,6 +512,10 @@ mod tests { assert_eq!(Platform::Discord.to_string(), "Discord"); } + // ========================================================================= + // Platform default_extension tests + // ========================================================================= + #[test] fn test_platform_default_extension() { assert_eq!(Platform::Telegram.default_extension(), "json"); @@ -488,6 +524,10 @@ mod tests { assert_eq!(Platform::Discord.default_extension(), "json"); } + // ========================================================================= + // Platform::all and Platform::all_names tests + // ========================================================================= + #[test] fn test_platform_all() { let all = Platform::all(); @@ -498,11 +538,285 @@ mod tests { assert!(all.contains(&Platform::Discord)); } + #[test] + fn test_platform_all_names() { + let names = Platform::all_names(); + assert!(names.contains(&"telegram")); + assert!(names.contains(&"tg")); + assert!(names.contains(&"whatsapp")); + assert!(names.contains(&"wa")); + assert!(names.contains(&"instagram")); + assert!(names.contains(&"ig")); + assert!(names.contains(&"discord")); + assert!(names.contains(&"dc")); + } + + // ========================================================================= + // Platform serde tests + // ========================================================================= + + #[test] + fn test_platform_serde() { + let platform = Platform::Telegram; + let json = serde_json::to_string(&platform).expect("serialize failed"); + assert_eq!(json, "\"telegram\""); + + let parsed: Platform = serde_json::from_str("\"telegram\"").expect("deserialize failed"); + assert_eq!(parsed, Platform::Telegram); + + // Test alias deserialization + let parsed: Platform = serde_json::from_str("\"tg\"").expect("deserialize failed"); + assert_eq!(parsed, Platform::Telegram); + + let parsed: Platform = serde_json::from_str("\"wa\"").expect("deserialize failed"); + assert_eq!(parsed, Platform::WhatsApp); + } + + #[test] + fn test_platform_serde_all_variants() { + for platform in Platform::all() { + let json = serde_json::to_string(platform).expect("serialize failed"); + let parsed: Platform = serde_json::from_str(&json).expect("deserialize failed"); + assert_eq!(parsed, *platform); + } + } + + // ========================================================================= + // Platform traits tests + // ========================================================================= + + #[test] + fn test_platform_clone_copy() { + let p1 = Platform::Telegram; + let p2 = p1; // Copy + let p3 = p1.clone(); + assert_eq!(p1, p2); + assert_eq!(p1, p3); + } + + #[test] + fn test_platform_debug() { + let debug = format!("{:?}", Platform::Telegram); + assert!(debug.contains("Telegram")); + } + + #[test] + fn test_platform_eq_hash() { + use std::collections::HashSet; + let mut set = HashSet::new(); + set.insert(Platform::Telegram); + set.insert(Platform::WhatsApp); + set.insert(Platform::Telegram); // Duplicate + assert_eq!(set.len(), 2); + assert!(set.contains(&Platform::Telegram)); + assert!(set.contains(&Platform::WhatsApp)); + } + + // ========================================================================= + // create_parser tests + // ========================================================================= + #[cfg(feature = "telegram")] #[test] - fn test_create_parser() { + fn test_create_parser_telegram() { let parser = create_parser(Platform::Telegram); assert_eq!(parser.name(), "Telegram"); assert_eq!(parser.platform(), Platform::Telegram); + assert!(!parser.supports_streaming()); + } + + #[cfg(feature = "whatsapp")] + #[test] + fn test_create_parser_whatsapp() { + let parser = create_parser(Platform::WhatsApp); + assert_eq!(parser.name(), "WhatsApp"); + assert_eq!(parser.platform(), Platform::WhatsApp); + } + + #[cfg(feature = "instagram")] + #[test] + fn test_create_parser_instagram() { + let parser = create_parser(Platform::Instagram); + assert_eq!(parser.name(), "Instagram"); + assert_eq!(parser.platform(), Platform::Instagram); + } + + #[cfg(feature = "discord")] + #[test] + fn test_create_parser_discord() { + let parser = create_parser(Platform::Discord); + assert_eq!(parser.name(), "Discord"); + assert_eq!(parser.platform(), Platform::Discord); + } + + // ========================================================================= + // create_streaming_parser tests + // ========================================================================= + + #[cfg(feature = "telegram")] + #[test] + fn test_create_streaming_parser_telegram() { + let parser = create_streaming_parser(Platform::Telegram); + assert_eq!(parser.name(), "Telegram"); + assert!(parser.supports_streaming()); + assert!(parser.recommended_buffer_size() >= 64 * 1024); + } + + #[cfg(feature = "whatsapp")] + #[test] + fn test_create_streaming_parser_whatsapp() { + let parser = create_streaming_parser(Platform::WhatsApp); + assert_eq!(parser.name(), "WhatsApp"); + assert!(parser.supports_streaming()); + } + + #[cfg(feature = "instagram")] + #[test] + fn test_create_streaming_parser_instagram() { + let parser = create_streaming_parser(Platform::Instagram); + assert_eq!(parser.name(), "Instagram"); + assert!(parser.supports_streaming()); + } + + #[cfg(feature = "discord")] + #[test] + fn test_create_streaming_parser_discord() { + let parser = create_streaming_parser(Platform::Discord); + assert_eq!(parser.name(), "Discord"); + assert!(parser.supports_streaming()); + } + + // ========================================================================= + // Parser trait method tests + // ========================================================================= + + #[cfg(feature = "telegram")] + #[test] + fn test_parser_parse_str() { + let parser = create_parser(Platform::Telegram); + let json = r#"{"messages": [{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[0].content, "Hello"); + } + + #[cfg(feature = "telegram")] + #[test] + fn test_parser_parse_file() { + use std::io::Write; + let dir = tempfile::tempdir().expect("create temp dir"); + let file_path = dir.path().join("test.json"); + let mut file = std::fs::File::create(&file_path).expect("create file"); + write!(file, r#"{{"messages": [{{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": "Hi"}}]}}"#).expect("write"); + + let parser = create_parser(Platform::Telegram); + let messages = parser + .parse_file(file_path.to_str().unwrap()) + .expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Bob"); + } + + #[cfg(all(feature = "telegram", feature = "streaming"))] + #[test] + fn test_parser_stream_file() { + use std::io::Write; + let dir = tempfile::tempdir().expect("create temp dir"); + let file_path = dir.path().join("test.json"); + let mut file = std::fs::File::create(&file_path).expect("create file"); + // Streaming parser needs newlines for line-by-line reading + writeln!(file, r#"{{"#).expect("write"); + writeln!(file, r#" "messages": ["#).expect("write"); + writeln!(file, r#" {{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Charlie", "text": "Hello"}}"#).expect("write"); + writeln!(file, r#" ]"#).expect("write"); + writeln!(file, r#"}}"#).expect("write"); + file.flush().expect("flush"); + drop(file); + + let parser = create_streaming_parser(Platform::Telegram); + let iter = parser + .stream_file(file_path.to_str().unwrap()) + .expect("stream failed"); + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Charlie"); + } + + // ========================================================================= + // Parser default implementations + // ========================================================================= + + #[cfg(feature = "telegram")] + #[test] + fn test_parser_default_supports_streaming() { + let parser = create_parser(Platform::Telegram); + // Default parser (non-streaming config) should return false + assert!(!parser.supports_streaming()); + } + + #[cfg(feature = "telegram")] + #[test] + fn test_parser_default_recommended_buffer_size() { + let parser = create_parser(Platform::Telegram); + // Should return at least 64KB + assert!(parser.recommended_buffer_size() >= 64 * 1024); + } + + // ========================================================================= + // ParseIterator tests + // ========================================================================= + + #[cfg(all(feature = "telegram", feature = "streaming"))] + #[test] + fn test_parse_iterator_wrapper() { + use std::io::Write; + use crate::streaming::TelegramStreamingParser; + use crate::streaming::StreamingParser; + + let dir = tempfile::tempdir().expect("create temp dir"); + let file_path = dir.path().join("test.json"); + let mut file = std::fs::File::create(&file_path).expect("create file"); + writeln!(file, r#"{{"#).expect("write"); + writeln!(file, r#" "messages": ["#).expect("write"); + writeln!(file, r#" {{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}}"#).expect("write"); + writeln!(file, r#" ]"#).expect("write"); + writeln!(file, r#"}}"#).expect("write"); + file.flush().expect("flush"); + drop(file); + + let streaming_parser = TelegramStreamingParser::new(); + let inner = streaming_parser.stream(file_path.to_str().unwrap()).expect("stream failed"); + + let mut parse_iter = ParseIterator::new(inner); + + // Test progress methods + assert!(parse_iter.progress().is_some() || parse_iter.progress().is_none()); + assert!(parse_iter.bytes_processed() >= 0); + assert!(parse_iter.total_bytes().is_some()); + + // Test iterator + let msg = parse_iter.next().expect("should have message").expect("parse ok"); + assert_eq!(msg.sender, "Alice"); + } + + #[cfg(feature = "telegram")] + #[test] + fn test_parser_stream_default_impl() { + use std::io::Write; + + let dir = tempfile::tempdir().expect("create temp dir"); + let file_path = dir.path().join("test.json"); + let mut file = std::fs::File::create(&file_path).expect("create file"); + write!(file, r#"{{"messages": [{{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": "Hi"}}]}}"#).expect("write"); + file.flush().expect("flush"); + drop(file); + + // Use non-streaming parser to test default stream() implementation + let parser = create_parser(Platform::Telegram); + let iter = parser.stream(file_path.as_ref()).expect("stream failed"); + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Bob"); } } diff --git a/src/parsers/discord.rs b/src/parsers/discord.rs index 820464f9..4c360994 100644 --- a/src/parsers/discord.rs +++ b/src/parsers/discord.rs @@ -505,18 +505,50 @@ impl Parser for DiscordParser { mod tests { use super::*; + // ========================================================================= + // DiscordParser construction tests + // ========================================================================= + #[test] fn test_parser_name() { let parser = DiscordParser::new(); assert_eq!(Parser::name(&parser), "Discord"); } + #[test] + fn test_parser_platform() { + let parser = DiscordParser::new(); + assert_eq!(parser.platform(), Platform::Discord); + } + #[test] fn test_parser_default() { let parser = DiscordParser::default(); assert_eq!(Parser::name(&parser), "Discord"); + assert!(parser.config().prefer_nickname); + assert!(parser.config().include_attachments); + } + + #[test] + fn test_parser_with_config() { + let config = DiscordConfig::new() + .with_streaming(true) + .with_prefer_nickname(false); + let parser = DiscordParser::with_config(config); + assert!(parser.config().streaming); + assert!(!parser.config().prefer_nickname); + } + + #[test] + fn test_parser_with_streaming() { + let parser = DiscordParser::with_streaming(); + assert!(parser.config().streaming); } + // ========================================================================= + // Format detection tests + // ========================================================================= + #[test] fn test_format_detection_from_ext() { assert!(matches!( @@ -541,9 +573,14 @@ mod tests { DiscordParser::detect_format_from_ext("test.CSV"), Some(DiscordFormat::Csv) )); + assert!(matches!( + DiscordParser::detect_format_from_ext("test.TXT"), + Some(DiscordFormat::Txt) + )); // No extension assert!(DiscordParser::detect_format_from_ext("test").is_none()); + assert!(DiscordParser::detect_format_from_ext("test.unknown").is_none()); } #[test] @@ -556,12 +593,24 @@ mod tests { DiscordParser::detect_format_from_content("AuthorID,Author,Date"), DiscordFormat::Csv )); + assert!(matches!( + DiscordParser::detect_format_from_content(r#""a","b","c""#), + DiscordFormat::Csv + )); assert!(matches!( DiscordParser::detect_format_from_content("[1/15/2024 10:30 AM] alice"), DiscordFormat::Txt )); + assert!(matches!( + DiscordParser::detect_format_from_content("plain text"), + DiscordFormat::Txt + )); } + // ========================================================================= + // JSON parsing tests + // ========================================================================= + #[test] fn test_parse_json_basic() { let parser = DiscordParser::new(); @@ -580,6 +629,8 @@ mod tests { assert_eq!(messages.len(), 1); assert_eq!(messages[0].sender, "alice"); assert_eq!(messages[0].content, "Hello world"); + assert!(messages[0].timestamp.is_some()); + assert_eq!(messages[0].id, Some(123)); } #[test] @@ -600,6 +651,122 @@ mod tests { assert_eq!(messages[0].sender, "Alice"); } + #[test] + fn test_parse_json_with_attachments() { + let parser = DiscordParser::new(); + let json = r#"{ + "messages": [ + { + "id": "123", + "timestamp": "2024-01-15T10:30:00+00:00", + "content": "Check this out", + "author": {"name": "alice"}, + "attachments": [{"fileName": "image.png"}, {"fileName": "doc.pdf"}] + } + ] + }"#; + + let messages = parser.parse_json(json).unwrap(); + assert_eq!(messages.len(), 1); + assert!(messages[0].content.contains("Check this out")); + assert!(messages[0].content.contains("[Attachment: image.png]")); + assert!(messages[0].content.contains("[Attachment: doc.pdf]")); + } + + #[test] + fn test_parse_json_with_stickers() { + let parser = DiscordParser::new(); + let json = r#"{ + "messages": [ + { + "id": "123", + "timestamp": "2024-01-15T10:30:00+00:00", + "content": "", + "author": {"name": "alice"}, + "stickers": [{"name": "cool_sticker"}] + } + ] + }"#; + + let messages = parser.parse_json(json).unwrap(); + assert_eq!(messages.len(), 1); + assert!(messages[0].content.contains("[Sticker: cool_sticker]")); + } + + #[test] + fn test_parse_json_with_reply() { + let parser = DiscordParser::new(); + let json = r#"{ + "messages": [ + { + "id": "124", + "timestamp": "2024-01-15T10:30:00+00:00", + "content": "Reply!", + "author": {"name": "bob"}, + "reference": {"messageId": "123"} + } + ] + }"#; + + let messages = parser.parse_json(json).unwrap(); + assert_eq!(messages[0].reply_to, Some(123)); + } + + #[test] + fn test_parse_json_with_edited() { + let parser = DiscordParser::new(); + let json = r#"{ + "messages": [ + { + "id": "123", + "timestamp": "2024-01-15T10:30:00+00:00", + "timestampEdited": "2024-01-15T10:35:00+00:00", + "content": "Edited message", + "author": {"name": "alice"} + } + ] + }"#; + + let messages = parser.parse_json(json).unwrap(); + assert!(messages[0].edited.is_some()); + } + + #[test] + fn test_parse_json_skips_empty() { + let parser = DiscordParser::new(); + let json = r#"{ + "messages": [ + { + "id": "123", + "timestamp": "2024-01-15T10:30:00+00:00", + "content": "Hello", + "author": {"name": "alice"} + }, + { + "id": "124", + "timestamp": "2024-01-15T10:31:00+00:00", + "content": "", + "author": {"name": "bob"} + } + ] + }"#; + + let messages = parser.parse_json(json).unwrap(); + assert_eq!(messages.len(), 1); + } + + #[test] + fn test_parse_json_empty_messages() { + let parser = DiscordParser::new(); + let json = r#"{"messages": []}"#; + let messages = parser.parse_json(json).unwrap(); + assert!(messages.is_empty()); + } + + // ========================================================================= + // TXT parsing tests + // ========================================================================= + #[test] fn test_txt_timestamp_parsing() { let ts = DiscordParser::parse_txt_timestamp("1/15/2024 10:30 AM"); @@ -607,8 +774,106 @@ mod tests { let ts = DiscordParser::parse_txt_timestamp("12/31/2024 11:59 PM"); assert!(ts.is_some()); + + let ts = DiscordParser::parse_txt_timestamp("1/1/2024 1:00 AM"); + assert!(ts.is_some()); + + // 24-hour format + let ts = DiscordParser::parse_txt_timestamp("1/15/2024 14:30"); + assert!(ts.is_some()); } + #[test] + fn test_parse_txt_basic() { + let parser = DiscordParser::new(); + let txt = "[1/15/2024 10:30 AM] alice\nHello world\n[1/15/2024 10:31 AM] bob\nHi there"; + + let messages = parser.parse_txt(txt).unwrap(); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "alice"); + assert_eq!(messages[0].content, "Hello world"); + assert_eq!(messages[1].sender, "bob"); + assert_eq!(messages[1].content, "Hi there"); + } + + #[test] + fn test_parse_txt_multiline() { + let parser = DiscordParser::new(); + let txt = "[1/15/2024 10:30 AM] alice\nLine 1\nLine 2\nLine 3"; + + let messages = parser.parse_txt(txt).unwrap(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Line 1\nLine 2\nLine 3"); + } + + #[test] + fn test_parse_txt_with_attachments() { + let parser = DiscordParser::new(); + let txt = "[1/15/2024 10:30 AM] alice\nMessage\n{Attachments}\nhttps://cdn.discord.com/image.png"; + + let messages = parser.parse_txt(txt).unwrap(); + assert_eq!(messages.len(), 1); + assert!(messages[0].content.contains("Message")); + assert!(messages[0].content.contains("[Attachment: image.png]")); + } + + #[test] + fn test_parse_txt_with_stickers() { + let parser = DiscordParser::new(); + let txt = "[1/15/2024 10:30 AM] alice\n{Stickers}\ncool_sticker"; + + let messages = parser.parse_txt(txt).unwrap(); + assert_eq!(messages.len(), 1); + assert!(messages[0].content.contains("[Sticker: cool_sticker]")); + } + + #[test] + fn test_parse_txt_empty() { + let parser = DiscordParser::new(); + let txt = ""; + let messages = parser.parse_txt(txt).unwrap(); + assert!(messages.is_empty()); + } + + // ========================================================================= + // CSV parsing tests + // ========================================================================= + + #[test] + fn test_parse_csv_basic() { + let parser = DiscordParser::new(); + let csv = "AuthorID,Author,Date,Content,Attachments,Reactions\n123,alice,2024-01-15T10:30:00+00:00,Hello world,,"; + + let messages = parser.parse_csv_str(csv).unwrap(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "alice"); + assert_eq!(messages[0].content, "Hello world"); + } + + #[test] + fn test_parse_csv_with_attachments() { + let parser = DiscordParser::new(); + let csv = "AuthorID,Author,Date,Content,Attachments,Reactions\n123,alice,2024-01-15T10:30:00+00:00,Check this,https://cdn.discord.com/image.png,"; + + let messages = parser.parse_csv_str(csv).unwrap(); + assert_eq!(messages.len(), 1); + assert!(messages[0].content.contains("Check this")); + assert!(messages[0].content.contains("[Attachment: image.png]")); + } + + #[test] + fn test_parse_csv_skips_empty() { + let parser = DiscordParser::new(); + let csv = "AuthorID,Author,Date,Content,Attachments,Reactions\n123,alice,2024-01-15T10:30:00+00:00,Hello,,\n124,bob,2024-01-15T10:31:00+00:00,,,"; + + let messages = parser.parse_csv_str(csv).unwrap(); + assert_eq!(messages.len(), 1); + } + + // ========================================================================= + // parse_str auto-detection tests + // ========================================================================= + #[test] fn test_parse_str_json() { let parser = DiscordParser::new(); @@ -618,4 +883,52 @@ mod tests { assert_eq!(messages.len(), 1); assert_eq!(messages[0].sender, "bob"); } + + #[test] + fn test_parse_str_csv() { + let parser = DiscordParser::new(); + let csv = "AuthorID,Author,Date,Content,Attachments\n123,alice,2024-01-15T10:30:00+00:00,Hello,"; + + let messages = Parser::parse_str(&parser, csv).unwrap(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "alice"); + } + + #[test] + fn test_parse_str_txt() { + let parser = DiscordParser::new(); + let txt = "[1/15/2024 10:30 AM] alice\nHello world"; + + let messages = Parser::parse_str(&parser, txt).unwrap(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "alice"); + } + + // ========================================================================= + // Streaming support tests + // ========================================================================= + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_false_by_default() { + let parser = DiscordParser::new(); + assert!(!parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_true_when_enabled() { + let parser = DiscordParser::with_streaming(); + assert!(parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_recommended_buffer_size() { + let parser = DiscordParser::new(); + assert_eq!(parser.recommended_buffer_size(), 64 * 1024); + + let streaming_parser = DiscordParser::with_streaming(); + assert_eq!(streaming_parser.recommended_buffer_size(), 256 * 1024); + } } diff --git a/src/parsers/instagram.rs b/src/parsers/instagram.rs index 29f5e5a1..f41cca38 100644 --- a/src/parsers/instagram.rs +++ b/src/parsers/instagram.rs @@ -139,3 +139,189 @@ impl Parser for InstagramParser { self.config.buffer_size } } + +#[cfg(test)] +mod tests { + use super::*; + + // ========================================================================= + // InstagramParser construction tests + // ========================================================================= + + #[test] + fn test_parser_new() { + let parser = InstagramParser::new(); + assert!(!parser.config().streaming); + assert!(parser.config().fix_encoding); + } + + #[test] + fn test_parser_default() { + let parser = InstagramParser::default(); + assert!(!parser.config().streaming); + assert!(parser.config().fix_encoding); + } + + #[test] + fn test_parser_with_config() { + let config = InstagramConfig::new() + .with_streaming(true) + .with_fix_encoding(false); + let parser = InstagramParser::with_config(config); + assert!(parser.config().streaming); + assert!(!parser.config().fix_encoding); + } + + #[test] + fn test_parser_with_streaming() { + let parser = InstagramParser::with_streaming(); + assert!(parser.config().streaming); + } + + #[test] + fn test_parser_name() { + let parser = InstagramParser::new(); + assert_eq!(Parser::name(&parser), "Instagram"); + } + + #[test] + fn test_parser_platform() { + let parser = InstagramParser::new(); + assert_eq!(parser.platform(), Platform::Instagram); + } + + // ========================================================================= + // parse_str tests + // ========================================================================= + + #[test] + fn test_parse_str_simple() { + let parser = InstagramParser::new(); + let json = r#"{"messages": [{"sender_name": "Alice", "content": "Hello", "timestamp_ms": 1234567890000}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[0].content, "Hello"); + } + + #[test] + fn test_parse_str_filters_empty_content() { + let parser = InstagramParser::new(); + let json = r#"{"messages": [ + {"sender_name": "Alice", "content": "Hello", "timestamp_ms": 1234567890000}, + {"sender_name": "Bob", "content": "", "timestamp_ms": 1234567891000}, + {"sender_name": "Charlie", "content": "Hi", "timestamp_ms": 1234567892000} + ]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Charlie"); // Reversed order + assert_eq!(messages[1].sender, "Alice"); + } + + #[test] + fn test_parse_str_reverses_order() { + let parser = InstagramParser::new(); + let json = r#"{"messages": [ + {"sender_name": "First", "content": "1", "timestamp_ms": 1234567890000}, + {"sender_name": "Second", "content": "2", "timestamp_ms": 1234567891000}, + {"sender_name": "Third", "content": "3", "timestamp_ms": 1234567892000} + ]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 3); + // Instagram stores newest first, so should be reversed + assert_eq!(messages[0].sender, "Third"); + assert_eq!(messages[1].sender, "Second"); + assert_eq!(messages[2].sender, "First"); + } + + #[test] + fn test_parse_str_with_shared_link() { + let parser = InstagramParser::new(); + // When content is present, it's used (share link is separate metadata) + let json = r#"{"messages": [{"sender_name": "Alice", "content": "Check this", "share": {"link": "https://example.com"}, "timestamp_ms": 1234567890000}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Check this"); + } + + #[test] + fn test_parse_str_with_share_text_only() { + let parser = InstagramParser::new(); + // When no content but share_text exists, use share_text + let json = r#"{"messages": [{"sender_name": "Alice", "share": {"share_text": "Shared content", "link": "https://example.com"}, "timestamp_ms": 1234567890000}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Shared content"); + } + + #[test] + fn test_parse_str_empty_messages() { + let parser = InstagramParser::new(); + let json = r#"{"messages": []}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert!(messages.is_empty()); + } + + #[test] + fn test_parse_str_invalid_json() { + let parser = InstagramParser::new(); + let result = parser.parse_str("not json"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_str_missing_messages() { + let parser = InstagramParser::new(); + let result = parser.parse_str(r#"{"participants": []}"#); + assert!(result.is_err()); + } + + #[test] + fn test_parse_str_timestamp_parsing() { + let parser = InstagramParser::new(); + let json = r#"{"messages": [{"sender_name": "Alice", "content": "Hello", "timestamp_ms": 1609459200000}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert!(messages[0].timestamp.is_some()); + } + + // ========================================================================= + // Encoding fix tests + // ========================================================================= + + #[test] + fn test_parse_str_without_fix_encoding() { + let config = InstagramConfig::new().with_fix_encoding(false); + let parser = InstagramParser::with_config(config); + let json = r#"{"messages": [{"sender_name": "Test", "content": "Hello", "timestamp_ms": 1234567890000}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + } + + // ========================================================================= + // Streaming support tests + // ========================================================================= + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_false_by_default() { + let parser = InstagramParser::new(); + assert!(!parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_true_when_enabled() { + let parser = InstagramParser::with_streaming(); + assert!(parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_recommended_buffer_size() { + let parser = InstagramParser::new(); + assert_eq!(parser.recommended_buffer_size(), 64 * 1024); + + let streaming_parser = InstagramParser::with_streaming(); + assert_eq!(streaming_parser.recommended_buffer_size(), 256 * 1024); + } +} diff --git a/src/parsers/telegram.rs b/src/parsers/telegram.rs index e468daf0..c44958b5 100644 --- a/src/parsers/telegram.rs +++ b/src/parsers/telegram.rs @@ -156,6 +156,10 @@ mod tests { use crate::parsing::telegram::extract_telegram_text; use serde_json::json; + // ========================================================================= + // extract_telegram_text tests + // ========================================================================= + #[test] fn test_extract_text_string() { let value = json!("Hello world"); @@ -175,15 +179,192 @@ mod tests { ); } + #[test] + fn test_extract_text_array_with_bold_italic() { + let value = json!([ + "Normal ", + {"type": "bold", "text": "bold"}, + " and ", + {"type": "italic", "text": "italic"} + ]); + assert_eq!(extract_telegram_text(&value), "Normal bold and italic"); + } + #[test] fn test_extract_text_empty() { let value = json!(null); assert_eq!(extract_telegram_text(&value), ""); } + #[test] + fn test_extract_text_empty_string() { + let value = json!(""); + assert_eq!(extract_telegram_text(&value), ""); + } + + #[test] + fn test_extract_text_empty_array() { + let value = json!([]); + assert_eq!(extract_telegram_text(&value), ""); + } + + // ========================================================================= + // TelegramParser tests + // ========================================================================= + #[test] fn test_parser_name() { let parser = TelegramParser::new(); assert_eq!(Parser::name(&parser), "Telegram"); } + + #[test] + fn test_parser_platform() { + let parser = TelegramParser::new(); + assert_eq!(parser.platform(), Platform::Telegram); + } + + #[test] + fn test_parser_default() { + let parser = TelegramParser::default(); + assert!(!parser.config().streaming); + } + + #[test] + fn test_parser_with_config() { + let config = TelegramConfig::new().with_streaming(true); + let parser = TelegramParser::with_config(config); + assert!(parser.config().streaming); + } + + #[test] + fn test_parser_with_streaming() { + let parser = TelegramParser::with_streaming(); + assert!(parser.config().streaming); + } + + #[test] + fn test_parser_config_accessor() { + let parser = TelegramParser::new(); + let config = parser.config(); + assert_eq!(config.buffer_size, 64 * 1024); + } + + // ========================================================================= + // parse_str tests + // ========================================================================= + + #[test] + fn test_parse_str_simple() { + let parser = TelegramParser::new(); + let json = r#"{"messages": [{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[0].content, "Hello"); + } + + #[test] + fn test_parse_str_with_formatted_text() { + let parser = TelegramParser::new(); + let json = r#"{"messages": [{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": ["Hello ", {"type": "bold", "text": "world"}]}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Hello world"); + } + + #[test] + fn test_parse_str_with_reply() { + let parser = TelegramParser::new(); + let json = r#"{"messages": [{"id": 2, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Reply", "reply_to_message_id": 1}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].reply_to, Some(1)); + } + + #[test] + fn test_parse_str_with_edited() { + let parser = TelegramParser::new(); + let json = r#"{"messages": [{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Edited", "edited_unixtime": "1234567899"}]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 1); + assert!(messages[0].edited.is_some()); + } + + #[test] + fn test_parse_str_filters_service_messages() { + let parser = TelegramParser::new(); + let json = r#"{"messages": [ + {"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}, + {"id": 2, "type": "service", "date_unixtime": "1234567890", "from": "System", "text": "joined"}, + {"id": 3, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": "Hi"} + ]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + #[test] + fn test_parse_str_filters_empty_content() { + let parser = TelegramParser::new(); + let json = r#"{"messages": [ + {"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}, + {"id": 2, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": ""}, + {"id": 3, "type": "message", "date_unixtime": "1234567890", "from": "Charlie", "text": "Hi"} + ]}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert_eq!(messages.len(), 2); + } + + #[test] + fn test_parse_str_empty_messages() { + let parser = TelegramParser::new(); + let json = r#"{"messages": []}"#; + let messages = parser.parse_str(json).expect("parse failed"); + assert!(messages.is_empty()); + } + + #[test] + fn test_parse_str_invalid_json() { + let parser = TelegramParser::new(); + let result = parser.parse_str("invalid json"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_str_missing_messages() { + let parser = TelegramParser::new(); + let result = parser.parse_str(r#"{"name": "Test"}"#); + // Should fail because messages is missing + assert!(result.is_err()); + } + + // ========================================================================= + // Streaming support tests + // ========================================================================= + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_false_by_default() { + let parser = TelegramParser::new(); + assert!(!parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_true_when_enabled() { + let parser = TelegramParser::with_streaming(); + assert!(parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_recommended_buffer_size() { + let parser = TelegramParser::new(); + assert_eq!(parser.recommended_buffer_size(), 64 * 1024); + + let streaming_parser = TelegramParser::with_streaming(); + assert_eq!(streaming_parser.recommended_buffer_size(), 256 * 1024); + } } diff --git a/src/parsers/whatsapp.rs b/src/parsers/whatsapp.rs index 3adc82e1..6c68547a 100644 --- a/src/parsers/whatsapp.rs +++ b/src/parsers/whatsapp.rs @@ -205,12 +205,182 @@ mod tests { use super::*; use crate::parsing::whatsapp::DateFormat; + // ========================================================================= + // WhatsAppParser construction tests + // ========================================================================= + #[test] fn test_parser_name() { let parser = WhatsAppParser::new(); assert_eq!(Parser::name(&parser), "WhatsApp"); } + #[test] + fn test_parser_platform() { + let parser = WhatsAppParser::new(); + assert_eq!(parser.platform(), Platform::WhatsApp); + } + + #[test] + fn test_parser_default() { + let parser = WhatsAppParser::default(); + assert!(!parser.config().streaming); + assert!(parser.config().skip_system_messages); + } + + #[test] + fn test_parser_with_config() { + let config = WhatsAppConfig::new() + .with_streaming(true) + .with_skip_system_messages(false); + let parser = WhatsAppParser::with_config(config); + assert!(parser.config().streaming); + assert!(!parser.config().skip_system_messages); + } + + #[test] + fn test_parser_with_streaming() { + let parser = WhatsAppParser::with_streaming(); + assert!(parser.config().streaming); + } + + // ========================================================================= + // parse_str tests - US format + // ========================================================================= + + #[test] + fn test_parse_str_us_format() { + let parser = WhatsAppParser::new(); + let content = "[1/15/24, 10:30:45 AM] Alice: Hello\n[1/15/24, 10:31:00 AM] Bob: Hi there"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[0].content, "Hello"); + assert_eq!(messages[1].sender, "Bob"); + assert_eq!(messages[1].content, "Hi there"); + } + + // ========================================================================= + // parse_str tests - EU dot bracketed format + // ========================================================================= + + #[test] + fn test_parse_str_eu_dot_bracketed() { + let parser = WhatsAppParser::new(); + let content = "[15.01.24, 10:30:45] Alice: Hello\n[15.01.24, 10:31:00] Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + // ========================================================================= + // parse_str tests - EU dot no bracket format + // ========================================================================= + + #[test] + fn test_parse_str_eu_dot_no_bracket() { + let parser = WhatsAppParser::new(); + let content = "26.10.2025, 20:40 - Alice: Hello\n26.10.2025, 20:41 - Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + // ========================================================================= + // parse_str tests - EU slash format + // ========================================================================= + + #[test] + fn test_parse_str_eu_slash() { + let parser = WhatsAppParser::new(); + let content = "15/01/2024, 10:30 - Alice: Hello\n15/01/2024, 10:31 - Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + // ========================================================================= + // parse_str tests - multiline messages + // ========================================================================= + + #[test] + fn test_parse_str_multiline() { + let parser = WhatsAppParser::new(); + let content = "[1/15/24, 10:30:45 AM] Alice: Hello\nThis is a second line\n[1/15/24, 10:31:00 AM] Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].content, "Hello\nThis is a second line"); + assert_eq!(messages[1].content, "Hi"); + } + + // ========================================================================= + // parse_str tests - system messages + // ========================================================================= + + #[test] + fn test_parse_str_filters_system_messages() { + let parser = WhatsAppParser::new(); + let content = "[1/15/24, 10:30:45 AM] Alice: Hello\n[1/15/24, 10:30:50 AM] System: Messages and calls are end-to-end encrypted\n[1/15/24, 10:31:00 AM] Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + #[test] + fn test_parse_str_keeps_system_messages_when_disabled() { + let config = WhatsAppConfig::new().with_skip_system_messages(false); + let parser = WhatsAppParser::with_config(config); + let content = "[1/15/24, 10:30:45 AM] Alice: Hello\n[1/15/24, 10:30:50 AM] System: Messages and calls are end-to-end encrypted\n[1/15/24, 10:31:00 AM] Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + // Now system messages should be kept + assert!(messages.len() >= 2); + } + + // ========================================================================= + // parse_str tests - edge cases + // ========================================================================= + + #[test] + fn test_parse_str_empty_content() { + let parser = WhatsAppParser::new(); + let content = ""; + let messages = parser.parse_str(content).expect("parse failed"); + assert!(messages.is_empty()); + } + + #[test] + fn test_parse_str_unrecognized_format() { + let parser = WhatsAppParser::new(); + let content = "This is not a valid WhatsApp export"; + let result = parser.parse_str(content); + assert!(result.is_err()); + } + + #[test] + fn test_parse_str_media_omitted_not_filtered() { + let parser = WhatsAppParser::new(); + let content = "[1/15/24, 10:30:45 AM] Alice: \n[1/15/24, 10:31:00 AM] Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].content, ""); + } + + #[test] + fn test_parse_str_skips_empty_lines() { + let parser = WhatsAppParser::new(); + let content = "[1/15/24, 10:30:45 AM] Alice: Hello\n\n\n[1/15/24, 10:31:00 AM] Bob: Hi"; + let messages = parser.parse_str(content).expect("parse failed"); + assert_eq!(messages.len(), 2); + } + + // ========================================================================= + // Format detection tests + // ========================================================================= + #[test] fn test_detect_format_us() { let lines = vec![ @@ -253,6 +423,10 @@ mod tests { assert_eq!(detect_whatsapp_format(&lines), Some(DateFormat::EuSlash)); } + // ========================================================================= + // System message detection tests + // ========================================================================= + #[test] fn test_is_system_message_english() { assert!(is_whatsapp_system_message( @@ -279,6 +453,10 @@ mod tests { assert!(!is_whatsapp_system_message("Bob", "<Без медиафайлов>")); } + // ========================================================================= + // Timestamp parsing tests + // ========================================================================= + #[test] fn test_parse_timestamp_us() { let ts = parse_whatsapp_timestamp("1/15/24", "10:30:45 AM", DateFormat::US); @@ -294,6 +472,10 @@ mod tests { assert!(ts2.is_some()); } + // ========================================================================= + // Edge cases for system messages + // ========================================================================= + #[test] fn test_media_not_filtered() { // should NOT be treated as system message @@ -307,4 +489,32 @@ mod tests { assert!(is_whatsapp_system_message("", "Some message")); assert!(is_whatsapp_system_message(" ", "Some message")); } + + // ========================================================================= + // Streaming support tests + // ========================================================================= + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_false_by_default() { + let parser = WhatsAppParser::new(); + assert!(!parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_supports_streaming_true_when_enabled() { + let parser = WhatsAppParser::with_streaming(); + assert!(parser.supports_streaming()); + } + + #[cfg(feature = "streaming")] + #[test] + fn test_recommended_buffer_size() { + let parser = WhatsAppParser::new(); + assert_eq!(parser.recommended_buffer_size(), 64 * 1024); + + let streaming_parser = WhatsAppParser::with_streaming(); + assert_eq!(streaming_parser.recommended_buffer_size(), 256 * 1024); + } } diff --git a/src/parsing/discord.rs b/src/parsing/discord.rs index 3f43c990..d92c9c8b 100644 --- a/src/parsing/discord.rs +++ b/src/parsing/discord.rs @@ -187,6 +187,10 @@ pub fn parse_discord_stream_message(msg: &DiscordStreamMessage) -> Option = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Charlie"); + } + + #[test] + fn test_jsonl_iterator_skips_empty_lines() { + let jsonl = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}} + +{"id":"2","timestamp":"2024-01-01T00:01:00Z","content":"World","author":{"name":"Bob"}}"#; + + let cursor = Cursor::new(jsonl.as_bytes().to_vec()); + let config = StreamingConfig::default(); + let iter = DiscordJsonlIterator::new(cursor, jsonl.len() as u64, config); + + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 2); + } + + #[test] + fn test_jsonl_iterator_with_edited_timestamp() { + let jsonl = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","timestampEdited":"2024-01-01T00:05:00Z","content":"Edited","author":{"name":"Alice"}}"#; + + let cursor = Cursor::new(jsonl.as_bytes().to_vec()); + let config = StreamingConfig::default(); + let mut iter = DiscordJsonlIterator::new(cursor, jsonl.len() as u64, config); + + let msg = iter.next().expect("should have message").expect("parse ok"); + assert!(msg.edited.is_some()); + } + + #[test] + fn test_jsonl_iterator_progress() { + let jsonl = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}}"#; + + let cursor = Cursor::new(jsonl.as_bytes().to_vec()); + let file_size = jsonl.len() as u64; + let config = StreamingConfig::default(); + let iter = DiscordJsonlIterator::new(cursor, file_size, config); + + assert_eq!(iter.total_bytes(), Some(file_size)); + assert_eq!(iter.bytes_processed(), 0); + let progress = iter.progress(); + assert!(progress.is_some()); + } + + #[test] + fn test_jsonl_iterator_zero_file_size() { + let jsonl = ""; + let cursor = Cursor::new(jsonl.as_bytes().to_vec()); + let config = StreamingConfig::default(); + let iter = DiscordJsonlIterator::new(cursor, 0, config); + + assert!(iter.progress().is_none()); + } + + #[test] + fn test_jsonl_iterator_skip_invalid() { + let jsonl = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}} +invalid json line +{"id":"2","timestamp":"2024-01-01T00:01:00Z","content":"World","author":{"name":"Bob"}}"#; + + let cursor = Cursor::new(jsonl.as_bytes().to_vec()); + let config = StreamingConfig::new().with_skip_invalid(true); + let iter = DiscordJsonlIterator::new(cursor, jsonl.len() as u64, config); + + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 2); + } + + #[test] + fn test_jsonl_iterator_error_on_invalid() { + let jsonl = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}} +invalid json line"#; + + let cursor = Cursor::new(jsonl.as_bytes().to_vec()); + let config = StreamingConfig::new().with_skip_invalid(false); + let mut iter = DiscordJsonlIterator::new(cursor, jsonl.len() as u64, config); + + let _ = iter.next(); // First message OK + let result = iter.next(); + assert!(result.is_some()); + assert!(result.unwrap().is_err()); + } + + // ========================================================================= + // DiscordJsonIterator tests + // ========================================================================= + + #[test] + fn test_json_iterator_basic() { + let json = r#"{"guild":{"id":"123"},"messages":[ +{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}}, +{"id":"2","timestamp":"2024-01-01T00:01:00Z","content":"Hi","author":{"name":"Bob"}} +]}"#; + + let cursor = Cursor::new(json.as_bytes().to_vec()); + let file_size = json.len() as u64; + let config = StreamingConfig::default(); + let iter = DiscordJsonIterator::new(cursor, file_size, config) + .expect("create iterator"); + + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].sender, "Alice"); + assert_eq!(messages[1].sender, "Bob"); + } + + #[test] + fn test_json_iterator_with_reference() { + let json = r#"{"messages":[ +{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}}, +{"id":"2","timestamp":"2024-01-01T00:01:00Z","content":"Reply","author":{"name":"Bob"},"reference":{"messageId":"1"}} +]}"#; + + let cursor = Cursor::new(json.as_bytes().to_vec()); + let file_size = json.len() as u64; + let config = StreamingConfig::default(); + let iter = DiscordJsonIterator::new(cursor, file_size, config) + .expect("create iterator"); + + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 2); + assert_eq!(messages[1].reply_to, Some(1)); + } + + #[test] + fn test_json_iterator_skips_empty_content() { + let json = r#"{"messages":[ +{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}}, +{"id":"2","timestamp":"2024-01-01T00:01:00Z","content":"","author":{"name":"Bob"}}, +{"id":"3","timestamp":"2024-01-01T00:02:00Z","content":"World","author":{"name":"Charlie"}} +]}"#; + + let cursor = Cursor::new(json.as_bytes().to_vec()); + let file_size = json.len() as u64; + let config = StreamingConfig::default(); + let iter = DiscordJsonIterator::new(cursor, file_size, config) + .expect("create iterator"); + + let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + assert_eq!(messages.len(), 2); + } + + #[test] + fn test_json_iterator_missing_messages_array() { + let json = r#"{"guild":{"id":"123"}}"#; + + let cursor = Cursor::new(json.as_bytes().to_vec()); + let file_size = json.len() as u64; + let config = StreamingConfig::default(); + let result = DiscordJsonIterator::new(cursor, file_size, config); + assert!(result.is_err()); + } + + #[test] + fn test_json_iterator_progress() { + let json = r#"{"messages":[ +{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}} +]}"#; + + let cursor = Cursor::new(json.as_bytes().to_vec()); + let file_size = json.len() as u64; + let config = StreamingConfig::default(); + let iter = DiscordJsonIterator::new(cursor, file_size, config) + .expect("create iterator"); + + assert_eq!(iter.total_bytes(), Some(file_size)); + assert!(iter.bytes_processed() > 0); // Header was read + } + + #[test] + fn test_json_iterator_zero_file_size() { + let json = r#"{"messages":[]}"#; + + let cursor = Cursor::new(json.as_bytes().to_vec()); + let config = StreamingConfig::default(); + let iter = DiscordJsonIterator::new(cursor, 0, config) + .expect("create iterator"); + + assert!(iter.progress().is_none()); + } + + // ========================================================================= + // parse_line tests for JSONL + // ========================================================================= + + #[test] + fn test_parse_line_valid() { + let line = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"Hello","author":{"name":"Alice"}}"#; + let result = DiscordJsonlIterator::>>::parse_line(line); + assert!(result.is_ok()); + let msg = result.unwrap(); + assert!(msg.is_some()); + assert_eq!(msg.unwrap().sender, "Alice"); + } + + #[test] + fn test_parse_line_empty() { + let result = DiscordJsonlIterator::>>::parse_line(""); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); + } + + #[test] + fn test_parse_line_whitespace_only() { + let result = DiscordJsonlIterator::>>::parse_line(" "); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); + } + + #[test] + fn test_parse_line_empty_content() { + let line = r#"{"id":"1","timestamp":"2024-01-01T00:00:00Z","content":"","author":{"name":"Alice"}}"#; + let result = DiscordJsonlIterator::>>::parse_line(line); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); + } + + #[test] + fn test_parse_line_invalid_json() { + let result = DiscordJsonlIterator::>>::parse_line("not json"); + assert!(result.is_err()); } } diff --git a/src/streaming/error.rs b/src/streaming/error.rs index 931d74e5..c4c88303 100644 --- a/src/streaming/error.rs +++ b/src/streaming/error.rs @@ -76,17 +76,30 @@ impl From for StreamingError { mod tests { use super::*; + // ========================================================================= + // Display tests + // ========================================================================= + + #[test] + fn test_error_display_io() { + let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); + let err = StreamingError::Io(io_err); + let display = err.to_string(); + assert!(display.contains("IO error")); + assert!(display.contains("file not found")); + } + #[test] - fn test_error_display() { + fn test_error_display_invalid_format() { let err = StreamingError::InvalidFormat("missing messages array".into()); assert!(err.to_string().contains("Invalid format")); + assert!(err.to_string().contains("missing messages array")); } #[test] - fn test_error_from_io() { - let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); - let streaming_err: StreamingError = io_err.into(); - assert!(matches!(streaming_err, StreamingError::Io(_))); + fn test_error_display_unexpected_eof() { + let err = StreamingError::UnexpectedEof; + assert!(err.to_string().contains("Unexpected end of file")); } #[test] @@ -98,5 +111,79 @@ mod tests { let msg = err.to_string(); assert!(msg.contains("2048")); assert!(msg.contains("1024")); + assert!(msg.contains("too large")); + } + + #[cfg(any(feature = "telegram", feature = "instagram", feature = "discord"))] + #[test] + fn test_error_display_json() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let err = StreamingError::Json(json_err); + assert!(err.to_string().contains("JSON error")); + } + + // ========================================================================= + // From conversions tests + // ========================================================================= + + #[test] + fn test_error_from_io() { + let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); + let streaming_err: StreamingError = io_err.into(); + assert!(matches!(streaming_err, StreamingError::Io(_))); + } + + #[cfg(any(feature = "telegram", feature = "instagram", feature = "discord"))] + #[test] + fn test_error_from_json() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let streaming_err: StreamingError = json_err.into(); + assert!(matches!(streaming_err, StreamingError::Json(_))); + } + + // ========================================================================= + // Error source tests + // ========================================================================= + + #[test] + fn test_error_source_io() { + let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); + let err = StreamingError::Io(io_err); + assert!(err.source().is_some()); + } + + #[cfg(any(feature = "telegram", feature = "instagram", feature = "discord"))] + #[test] + fn test_error_source_json() { + let json_err = serde_json::from_str::("invalid").unwrap_err(); + let err = StreamingError::Json(json_err); + assert!(err.source().is_some()); + } + + #[test] + fn test_error_source_none() { + let err = StreamingError::InvalidFormat("test".into()); + assert!(err.source().is_none()); + + let err = StreamingError::UnexpectedEof; + assert!(err.source().is_none()); + + let err = StreamingError::BufferOverflow { + max_size: 1024, + actual_size: 2048, + }; + assert!(err.source().is_none()); + } + + // ========================================================================= + // Debug tests + // ========================================================================= + + #[test] + fn test_error_debug() { + let err = StreamingError::InvalidFormat("test".into()); + let debug = format!("{:?}", err); + assert!(debug.contains("InvalidFormat")); + assert!(debug.contains("test")); } } diff --git a/src/streaming/instagram.rs b/src/streaming/instagram.rs index 41d1c786..b3f5d8e6 100644 --- a/src/streaming/instagram.rs +++ b/src/streaming/instagram.rs @@ -277,6 +277,42 @@ mod tests { .to_string() } + // ========================================================================= + // Constructor tests + // ========================================================================= + + #[test] + fn test_parser_new() { + let parser = InstagramStreamingParser::new(); + assert_eq!(parser.name(), "Instagram (Streaming)"); + } + + #[test] + fn test_parser_default() { + let parser = InstagramStreamingParser::default(); + assert_eq!(parser.name(), "Instagram (Streaming)"); + } + + #[test] + fn test_parser_with_config() { + let config = StreamingConfig::default() + .with_buffer_size(512 * 1024) + .with_max_message_size(2 * 1024 * 1024) + .with_skip_invalid(true); + let parser = InstagramStreamingParser::with_config(config); + assert_eq!(parser.name(), "Instagram (Streaming)"); + } + + #[test] + fn test_recommended_buffer_size() { + let parser = InstagramStreamingParser::new(); + assert!(parser.recommended_buffer_size() > 0); + } + + // ========================================================================= + // Basic parsing tests + // ========================================================================= + #[test] fn test_streaming_parser_basic() { let json = create_test_json(); @@ -296,6 +332,24 @@ mod tests { assert_eq!(messages[1].sender, "user_two"); } + #[test] + fn test_empty_messages_array() { + let json = r#"{"participants": [], "messages": []}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert!(messages.is_empty()); + } + + // ========================================================================= + // Progress and iterator trait tests + // ========================================================================= + #[test] fn test_progress_reporting() { let json = create_test_json(); @@ -313,6 +367,120 @@ mod tests { assert!(progress > 90.0); } + #[test] + fn test_progress_with_zero_file_size() { + let json = create_test_json(); + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + InstagramMessageIterator::new(reader, 0, StreamingConfig::default()).unwrap(); + + assert!(iterator.progress().is_none()); + } + + #[test] + fn test_bytes_processed() { + let json = create_test_json(); + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let initial_bytes = iterator.bytes_processed(); + assert!(initial_bytes > 0); // Read past header + + // Consume one message + let _ = iterator.next(); + let bytes_after = iterator.bytes_processed(); + assert!(bytes_after > initial_bytes); + } + + #[test] + fn test_total_bytes() { + let json = create_test_json(); + let file_size = json.len() as u64; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + InstagramMessageIterator::new(reader, file_size, StreamingConfig::default()).unwrap(); + + assert_eq!(iterator.total_bytes(), Some(file_size)); + } + + // ========================================================================= + // Error handling tests + // ========================================================================= + + #[test] + fn test_no_messages_array() { + let json = r#"{"participants": []}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let result = + InstagramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()); + + assert!(result.is_err()); + // Verify it's an InvalidFormat error + if let Err(StreamingError::InvalidFormat(msg)) = result { + assert!(msg.contains("messages")); + } else { + panic!("Expected InvalidFormat error"); + } + } + + #[test] + fn test_skip_invalid_messages() { + let json = r#"{ + "participants": [], + "messages": [ + {"invalid": "json message"}, + {"sender_name": "user", "timestamp_ms": 1705315800000, "content": "Valid!"} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let config = StreamingConfig::default().with_skip_invalid(true); + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, config).unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + + // Should skip invalid and return valid message + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Valid!"); + } + + #[test] + fn test_invalid_message_without_skip() { + let json = r#"{ + "participants": [], + "messages": [ + {"sender_name": 12345} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let config = StreamingConfig::default().with_skip_invalid(false); + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, config).unwrap(); + + // First message should be an error + let first = iterator.next(); + assert!(first.is_some()); + assert!(first.unwrap().is_err()); + } + + // ========================================================================= + // Content type tests + // ========================================================================= + #[test] fn test_fix_encoding() { // Test that normal ASCII passes through @@ -345,4 +513,87 @@ mod tests { assert_eq!(messages.len(), 1); assert_eq!(messages[0].content, "Check this out!"); } + + #[test] + fn test_multiline_message() { + let json = r#"{ + "participants": [], + "messages": [ + { + "sender_name": "user", + "timestamp_ms": 1705315800000, + "content": "Line1\nLine2\nLine3" + } + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert_eq!(messages.len(), 1); + assert!(messages[0].content.contains("Line1")); + } + + #[test] + fn test_iterator_finished_returns_none() { + let json = r#"{"participants": [], "messages": [{"sender_name": "user", "timestamp_ms": 1000, "content": "Hi"}]}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + // Consume all messages + let _: Vec<_> = iterator.by_ref().collect(); + + // Additional calls should return None + assert!(iterator.next().is_none()); + assert!(iterator.next().is_none()); + } + + #[test] + fn test_messages_with_commas_between() { + let json = r#"{ + "participants": [], + "messages": [ + {"sender_name": "user1", "timestamp_ms": 1000, "content": "First"}, + , + {"sender_name": "user2", "timestamp_ms": 2000, "content": "Second"} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let config = StreamingConfig::default().with_skip_invalid(true); + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, config).unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert_eq!(messages.len(), 2); + } + + #[test] + fn test_message_without_content_skipped() { + let json = r#"{ + "participants": [], + "messages": [ + {"sender_name": "user", "timestamp_ms": 1000} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + InstagramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + // Message without content should be skipped + assert!(messages.is_empty()); + } } diff --git a/src/streaming/telegram.rs b/src/streaming/telegram.rs index 94b87ad7..44e3348e 100644 --- a/src/streaming/telegram.rs +++ b/src/streaming/telegram.rs @@ -278,6 +278,42 @@ mod tests { .to_string() } + // ========================================================================= + // Constructor tests + // ========================================================================= + + #[test] + fn test_parser_new() { + let parser = TelegramStreamingParser::new(); + assert_eq!(parser.name(), "Telegram (Streaming)"); + } + + #[test] + fn test_parser_default() { + let parser = TelegramStreamingParser::default(); + assert_eq!(parser.name(), "Telegram (Streaming)"); + } + + #[test] + fn test_parser_with_config() { + let config = StreamingConfig::default() + .with_buffer_size(512 * 1024) + .with_max_message_size(2 * 1024 * 1024) + .with_skip_invalid(true); + let parser = TelegramStreamingParser::with_config(config); + assert_eq!(parser.name(), "Telegram (Streaming)"); + } + + #[test] + fn test_recommended_buffer_size() { + let parser = TelegramStreamingParser::new(); + assert!(parser.recommended_buffer_size() > 0); + } + + // ========================================================================= + // Basic parsing tests + // ========================================================================= + #[test] fn test_streaming_parser_basic() { let json = create_test_json(); @@ -297,6 +333,24 @@ mod tests { assert_eq!(messages[2].content, "Bye!"); } + #[test] + fn test_empty_messages_array() { + let json = r#"{"name": "Chat", "messages": []}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert!(messages.is_empty()); + } + + // ========================================================================= + // Progress and iterator trait tests + // ========================================================================= + #[test] fn test_progress_reporting() { let json = create_test_json(); @@ -314,6 +368,119 @@ mod tests { assert!(progress > 90.0); // Should be close to 100% } + #[test] + fn test_progress_with_zero_file_size() { + let json = create_test_json(); + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + TelegramMessageIterator::new(reader, 0, StreamingConfig::default()).unwrap(); + + assert!(iterator.progress().is_none()); + } + + #[test] + fn test_bytes_processed() { + let json = create_test_json(); + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let initial_bytes = iterator.bytes_processed(); + assert!(initial_bytes > 0); + + let _ = iterator.next(); + let bytes_after = iterator.bytes_processed(); + assert!(bytes_after > initial_bytes); + } + + #[test] + fn test_total_bytes() { + let json = create_test_json(); + let file_size = json.len() as u64; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + TelegramMessageIterator::new(reader, file_size, StreamingConfig::default()).unwrap(); + + assert_eq!(iterator.total_bytes(), Some(file_size)); + } + + // ========================================================================= + // Error handling tests + // ========================================================================= + + #[test] + fn test_no_messages_array() { + let json = r#"{"name": "Chat"}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let result = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()); + + assert!(result.is_err()); + // Verify it's an InvalidFormat error + if let Err(StreamingError::InvalidFormat(msg)) = result { + assert!(msg.contains("messages")); + } else { + panic!("Expected InvalidFormat error"); + } + } + + #[test] + fn test_skip_invalid_messages() { + let json = r#"{ + "name": "Chat", + "messages": [ + {"invalid": "object"}, + {"id": 1, "type": "message", "date_unixtime": "1705314600", "from": "Alice", "text": "Valid!"} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let config = StreamingConfig::default().with_skip_invalid(true); + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, config).unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + + // Should skip invalid and return valid message + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Valid!"); + } + + #[test] + fn test_invalid_message_without_skip() { + let json = r#"{ + "name": "Chat", + "messages": [ + {"id": "invalid_id_type"} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let config = StreamingConfig::default().with_skip_invalid(false); + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, config).unwrap(); + + // First message should be an error + let first = iterator.next(); + assert!(first.is_some()); + assert!(first.unwrap().is_err()); + } + + // ========================================================================= + // Content type tests + // ========================================================================= + #[test] fn test_extract_text_with_formatting() { let value = serde_json::json!([ @@ -329,4 +496,87 @@ mod tests { let parser = TelegramStreamingParser::new(); assert_eq!(parser.name(), "Telegram (Streaming)"); } + + #[test] + fn test_multiline_message() { + let json = r#"{ + "name": "Chat", + "messages": [ + { + "id": 1, + "type": "message", + "date_unixtime": "1705314600", + "from": "Alice", + "text": "Line1\nLine2\nLine3" + } + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert_eq!(messages.len(), 1); + } + + #[test] + fn test_iterator_finished_returns_none() { + let json = r#"{"name": "Chat", "messages": [{"id": 1, "type": "message", "date_unixtime": "1000", "from": "A", "text": "Hi"}]}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + // Consume all messages + let _: Vec<_> = iterator.by_ref().collect(); + + // Additional calls should return None + assert!(iterator.next().is_none()); + assert!(iterator.next().is_none()); + } + + #[test] + fn test_service_messages_skipped() { + let json = r#"{ + "name": "Chat", + "messages": [ + {"id": 1, "type": "service", "action": "pin_message"}, + {"id": 2, "type": "service", "action": "create_group"} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert!(messages.is_empty()); + } + + #[test] + fn test_formatted_text_array() { + let json = r#"{ + "name": "Chat", + "messages": [ + {"id": 1, "type": "message", "date_unixtime": "1705314600", "from": "Alice", "text": ["Hello ", {"type": "bold", "text": "World"}, "!"]} + ] +}"#; + let cursor = Cursor::new(json.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + TelegramMessageIterator::new(reader, json.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.by_ref().filter_map(Result::ok).collect(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].content, "Hello World!"); + } } diff --git a/src/streaming/traits.rs b/src/streaming/traits.rs index 881bdf9b..cfee3953 100644 --- a/src/streaming/traits.rs +++ b/src/streaming/traits.rs @@ -127,12 +127,25 @@ impl StreamingConfig { mod tests { use super::*; + // ========================================================================= + // StreamingConfig tests + // ========================================================================= + #[test] fn test_streaming_config_default() { let config = StreamingConfig::default(); assert_eq!(config.buffer_size, 64 * 1024); assert_eq!(config.max_message_size, 10 * 1024 * 1024); assert!(config.skip_invalid); + assert_eq!(config.progress_interval, 10_000); + } + + #[test] + fn test_streaming_config_new() { + let config = StreamingConfig::new(); + assert_eq!(config.buffer_size, 64 * 1024); + assert_eq!(config.max_message_size, 10 * 1024 * 1024); + assert!(config.skip_invalid); } #[test] @@ -147,10 +160,47 @@ mod tests { assert!(!config.skip_invalid); } + #[test] + fn test_streaming_config_with_progress_interval() { + let config = StreamingConfig::new().with_progress_interval(5000); + assert_eq!(config.progress_interval, 5000); + } + + #[test] + fn test_streaming_config_builder_chain() { + let config = StreamingConfig::new() + .with_buffer_size(256 * 1024) + .with_max_message_size(20 * 1024 * 1024) + .with_skip_invalid(false) + .with_progress_interval(1000); + + assert_eq!(config.buffer_size, 256 * 1024); + assert_eq!(config.max_message_size, 20 * 1024 * 1024); + assert!(!config.skip_invalid); + assert_eq!(config.progress_interval, 1000); + } + #[test] fn test_streaming_config_copy() { let config = StreamingConfig::new(); let copied = config; // Copy assert_eq!(config.buffer_size, copied.buffer_size); + assert_eq!(config.max_message_size, copied.max_message_size); + assert_eq!(config.skip_invalid, copied.skip_invalid); + } + + #[test] + fn test_streaming_config_clone() { + let config = StreamingConfig::new().with_buffer_size(512 * 1024); + let cloned = config.clone(); + assert_eq!(config.buffer_size, cloned.buffer_size); + } + + #[test] + fn test_streaming_config_debug() { + let config = StreamingConfig::new(); + let debug = format!("{:?}", config); + assert!(debug.contains("StreamingConfig")); + assert!(debug.contains("buffer_size")); } } diff --git a/src/streaming/whatsapp.rs b/src/streaming/whatsapp.rs index 22fcfce1..88ce6c1b 100644 --- a/src/streaming/whatsapp.rs +++ b/src/streaming/whatsapp.rs @@ -295,6 +295,42 @@ This is a continuation line .to_string() } + // ========================================================================= + // Constructor tests + // ========================================================================= + + #[test] + fn test_parser_new() { + let parser = WhatsAppStreamingParser::new(); + assert_eq!(parser.name(), "WhatsApp (Streaming)"); + } + + #[test] + fn test_parser_default() { + let parser = WhatsAppStreamingParser::default(); + assert_eq!(parser.name(), "WhatsApp (Streaming)"); + } + + #[test] + fn test_parser_with_config() { + let config = StreamingConfig::default() + .with_buffer_size(512 * 1024) + .with_max_message_size(2 * 1024 * 1024) + .with_skip_invalid(true); + let parser = WhatsAppStreamingParser::with_config(config); + assert_eq!(parser.name(), "WhatsApp (Streaming)"); + } + + #[test] + fn test_recommended_buffer_size() { + let parser = WhatsAppStreamingParser::new(); + assert!(parser.recommended_buffer_size() > 0); + } + + // ========================================================================= + // Basic parsing tests + // ========================================================================= + #[test] fn test_streaming_parser_us_format() { let txt = create_test_us_format(); @@ -330,6 +366,24 @@ This is a continuation line assert!(messages[0].content.contains("Привет")); } + #[test] + fn test_empty_file() { + let txt = ""; + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.filter_map(Result::ok).collect(); + assert!(messages.is_empty()); + } + + // ========================================================================= + // Progress and iterator trait tests + // ========================================================================= + #[test] fn test_progress_reporting() { let txt = create_test_us_format(); @@ -346,6 +400,56 @@ This is a continuation line assert!(progress > 90.0); } + #[test] + fn test_progress_with_zero_file_size() { + let txt = create_test_us_format(); + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, 0, StreamingConfig::default()).unwrap(); + + assert!(iterator.progress().is_none()); + } + + #[test] + fn test_bytes_processed() { + let txt = create_test_us_format(); + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + // Sample lines already read + let initial_bytes = iterator.bytes_processed(); + assert!(initial_bytes > 0); + + // Consume all messages + let _: Vec<_> = iterator.by_ref().collect(); + + let final_bytes = iterator.bytes_processed(); + assert!(final_bytes >= initial_bytes); + } + + #[test] + fn test_total_bytes() { + let txt = create_test_us_format(); + let file_size = txt.len() as u64; + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, file_size, StreamingConfig::default()).unwrap(); + + assert_eq!(iterator.total_bytes(), Some(file_size)); + } + + // ========================================================================= + // Content tests + // ========================================================================= + #[test] fn test_parser_name() { let parser = WhatsAppStreamingParser::new(); @@ -407,4 +511,129 @@ Line 3 assert!(messages[0].content.contains("Line 2")); assert!(messages[0].content.contains("Line 3")); } + + #[test] + fn test_empty_content_skipped() { + let txt = "[1/15/24, 10:30:00 AM] Alice: +[1/15/24, 10:31:00 AM] Bob: Real message"; + + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.filter_map(Result::ok).collect(); + + // Empty content message should be skipped + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].sender, "Bob"); + } + + #[test] + fn test_unrecognized_format_returns_empty() { + let txt = "This is not a WhatsApp export format +Just random lines +With no pattern"; + + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.filter_map(Result::ok).collect(); + assert!(messages.is_empty()); + } + + #[test] + fn test_pending_message_helpers() { + let pending = PendingMessage::default(); + assert!(pending.is_empty()); + + let with_content = PendingMessage { + sender: "Alice".to_string(), + content: "Hello".to_string(), + timestamp: None, + }; + assert!(!with_content.is_empty()); + + // Test into_message + let msg = with_content.into_message(); + assert!(msg.is_some()); + let msg = msg.unwrap(); + assert_eq!(msg.sender, "Alice"); + assert_eq!(msg.content, "Hello"); + } + + #[test] + fn test_pending_message_take() { + let mut pending = PendingMessage { + sender: "Alice".to_string(), + content: "Hello".to_string(), + timestamp: None, + }; + + let taken = pending.take(); + assert!(!taken.is_empty()); + assert!(pending.is_empty()); + } + + #[test] + fn test_system_messages_filtered() { + let txt = "[1/15/24, 10:30:00 AM] Alice: Hello +[1/15/24, 10:31:00 AM] System: created this group +[1/15/24, 10:32:00 AM] Bob: Hi there"; + + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.filter_map(Result::ok).collect(); + + // System message should be filtered + assert!(messages.iter().all(|m| m.sender != "System")); + } + + #[test] + fn test_empty_lines_between_messages() { + let txt = "[1/15/24, 10:30:00 AM] Alice: Hello + +[1/15/24, 10:31:00 AM] Bob: Hi"; + + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + let messages: Vec<_> = iterator.filter_map(Result::ok).collect(); + + assert_eq!(messages.len(), 2); + } + + #[test] + fn test_iterator_returns_none_when_finished() { + let txt = "[1/15/24, 10:30:00 AM] Alice: Hello"; + + let cursor = Cursor::new(txt.as_bytes().to_vec()); + let reader = BufReader::new(cursor); + + let mut iterator = + WhatsAppMessageIterator::new(reader, txt.len() as u64, StreamingConfig::default()) + .unwrap(); + + // Consume all messages + let _: Vec<_> = iterator.by_ref().collect(); + + // Additional calls should return None + assert!(iterator.next().is_none()); + assert!(iterator.next().is_none()); + } } diff --git a/tests/edge_cases.rs b/tests/edge_cases.rs new file mode 100644 index 00000000..722050ce --- /dev/null +++ b/tests/edge_cases.rs @@ -0,0 +1,496 @@ +//! Edge case tests for chatpack +//! +//! These tests cover various edge cases and boundary conditions +//! that might not be covered by regular unit and integration tests. + +use chatpack::core::filter::FilterConfig; +use chatpack::core::models::OutputConfig; +use chatpack::core::processor::merge_consecutive; +use chatpack::Message; +use chrono::{TimeZone, Utc}; + +// ========================================================================= +// Unicode and special character tests +// ========================================================================= + +#[test] +fn test_unicode_normalization() { + // Test various Unicode scripts + let cyrillic = Message::new("Иван", "Привет мир!"); + assert_eq!(cyrillic.sender, "Иван"); + assert_eq!(cyrillic.content, "Привет мир!"); + + let japanese = Message::new("田中太郎", "こんにちは世界!"); + assert_eq!(japanese.sender, "田中太郎"); + assert_eq!(japanese.content, "こんにちは世界!"); + + let arabic = Message::new("محمد", "مرحبا بالعالم"); + assert_eq!(arabic.sender, "محمد"); + assert_eq!(arabic.content, "مرحبا بالعالم"); + + let emoji = Message::new("User 🎉", "Hello 👋 World 🌍"); + assert_eq!(emoji.sender, "User 🎉"); + assert_eq!(emoji.content, "Hello 👋 World 🌍"); + + // Mixed scripts + let mixed = Message::new("User123", "Hello 你好 Привет مرحبا 🌍"); + assert_eq!(mixed.content, "Hello 你好 Привет مرحبا 🌍"); +} + +#[test] +fn test_zero_width_characters() { + // Zero-width joiner in names (used in emoji sequences) + let zwj_emoji = Message::new("User👨‍👩‍👧", "Family emoji test"); + assert!(zwj_emoji.sender.contains("👨‍👩‍👧")); + + // Zero-width non-joiner + let zwnj = Message::new("User\u{200C}Name", "ZWNJ test"); + assert!(zwnj.sender.contains("\u{200C}")); + + // Zero-width space + let zws = Message::new("User\u{200B}Name", "ZWS test"); + assert!(zws.sender.contains("\u{200B}")); +} + +#[test] +fn test_combining_diacritics() { + // Test combining characters + let combining = Message::new("Café", "Naïve résumé"); + assert!(combining.sender.contains("é")); + + // NFD vs NFC normalization + let nfc = Message::new("é", "NFC form"); + let nfd_content = "e\u{0301}"; // e + combining acute accent + let nfd = Message::new(nfd_content, "NFD form"); + + // Both should work, even if different representations + assert!(!nfc.sender.is_empty()); + assert!(!nfd.sender.is_empty()); +} + +// ========================================================================= +// Very long message tests +// ========================================================================= + +#[test] +fn test_very_long_content() { + // 10KB message + let long_content = "x".repeat(10 * 1024); + let msg = Message::new("Sender", &long_content); + assert_eq!(msg.content.len(), 10 * 1024); + + // 100KB message + let very_long_content = "y".repeat(100 * 1024); + let msg2 = Message::new("Sender", &very_long_content); + assert_eq!(msg2.content.len(), 100 * 1024); + + // 1MB message + let huge_content = "z".repeat(1024 * 1024); + let msg3 = Message::new("Sender", &huge_content); + assert_eq!(msg3.content.len(), 1024 * 1024); +} + +#[test] +fn test_very_long_sender_name() { + let long_name = "A".repeat(10000); + let msg = Message::new(&long_name, "Content"); + assert_eq!(msg.sender.len(), 10000); +} + +// ========================================================================= +// Special sender name tests +// ========================================================================= + +#[test] +fn test_empty_sender_name() { + let msg = Message::new("", "Content"); + assert!(msg.sender.is_empty()); +} + +#[test] +fn test_whitespace_only_sender() { + let msg = Message::new(" ", "Content"); + assert_eq!(msg.sender, " "); +} + +#[test] +fn test_special_chars_in_sender() { + let special = Message::new("User<>&\"'", "Content"); + assert_eq!(special.sender, "User<>&\"'"); + + let newlines = Message::new("User\nName", "Content"); + assert!(newlines.sender.contains('\n')); + + let tabs = Message::new("User\tName", "Content"); + assert!(tabs.sender.contains('\t')); +} + +// ========================================================================= +// Timestamp edge cases +// ========================================================================= + +#[test] +fn test_timestamp_unix_epoch() { + let epoch = Utc.timestamp_opt(0, 0).unwrap(); + let msg = Message::with_metadata("Sender", "1970-01-01", Some(epoch), None, None, None); + assert_eq!(msg.timestamp, Some(epoch)); +} + +#[test] +fn test_timestamp_y2k() { + let y2k = Utc.with_ymd_and_hms(2000, 1, 1, 0, 0, 0).unwrap(); + let msg = Message::with_metadata("Sender", "Y2K", Some(y2k), None, None, None); + assert_eq!(msg.timestamp, Some(y2k)); +} + +#[test] +fn test_timestamp_near_2038() { + // Y2038 problem for 32-bit systems (2^31 - 1 seconds from epoch) + let near_2038 = Utc.with_ymd_and_hms(2038, 1, 18, 0, 0, 0).unwrap(); + let msg = Message::with_metadata("Sender", "Near 2038", Some(near_2038), None, None, None); + assert_eq!(msg.timestamp, Some(near_2038)); +} + +#[test] +fn test_timestamp_far_future() { + // Year 3000 + let far_future = Utc.with_ymd_and_hms(3000, 1, 1, 0, 0, 0).unwrap(); + let msg = Message::with_metadata("Sender", "Far future", Some(far_future), None, None, None); + assert_eq!(msg.timestamp, Some(far_future)); +} + +// ========================================================================= +// Filter edge cases +// ========================================================================= + +#[test] +fn test_filter_boundary_dates() { + let msg1 = Message::with_metadata( + "Alice", + "Start of day", + Some(Utc.with_ymd_and_hms(2024, 1, 15, 0, 0, 0).unwrap()), + None, + None, + None, + ); + let msg2 = Message::with_metadata( + "Bob", + "End of day", + Some(Utc.with_ymd_and_hms(2024, 1, 15, 23, 59, 59).unwrap()), + None, + None, + None, + ); + let msg3 = Message::with_metadata( + "Charlie", + "Next day end", + Some(Utc.with_ymd_and_hms(2024, 1, 17, 0, 0, 0).unwrap()), + None, + None, + None, + ); + + let messages = vec![msg1.clone(), msg2.clone(), msg3.clone()]; + + // Filter for messages before 2024-01-16 (includes up to 23:59:59 on that day) + let filter = FilterConfig::new().before_date("2024-01-16").unwrap(); + let filtered = chatpack::core::filter::apply_filters(messages.clone(), &filter); + // Should include messages on 2024-01-15 and 2024-01-16 + assert!(filtered.iter().any(|m| m.sender == "Alice")); + assert!(filtered.iter().any(|m| m.sender == "Bob")); + // Message on 2024-01-17 should be excluded + assert!(!filtered.iter().any(|m| m.sender == "Charlie")); +} + +#[test] +fn test_filter_empty_result() { + let msg = Message::with_metadata( + "Alice", + "Hello", + Some(Utc.with_ymd_and_hms(2024, 1, 15, 0, 0, 0).unwrap()), + None, + None, + None, + ); + + let messages = vec![msg]; + let filter = FilterConfig::new().after_date("2025-01-01").unwrap(); + let filtered = chatpack::core::filter::apply_filters(messages, &filter); + assert!(filtered.is_empty()); +} + +#[test] +fn test_filter_user_case_insensitive() { + let messages = vec![ + Message::new("Alice", "Hello"), + Message::new("ALICE", "World"), + Message::new("alice", "Test"), + Message::new("Bob", "Hi"), + ]; + + let filter = FilterConfig::new().with_user("alice".to_string()); + let filtered = chatpack::core::filter::apply_filters(messages, &filter); + + // Should match all case variations of "alice" + assert_eq!(filtered.len(), 3); + assert!(filtered.iter().all(|m| m.sender.to_lowercase() == "alice")); +} + +// ========================================================================= +// Merge edge cases +// ========================================================================= + +#[test] +fn test_merge_empty_vector() { + let empty: Vec = vec![]; + let result = merge_consecutive(empty); + assert!(result.is_empty()); +} + +#[test] +fn test_merge_single_message() { + let single = vec![Message::new("Alice", "Hello")]; + let result = merge_consecutive(single); + assert_eq!(result.len(), 1); + assert_eq!(result[0].sender, "Alice"); + assert_eq!(result[0].content, "Hello"); +} + +#[test] +fn test_merge_all_same_sender() { + let messages = vec![ + Message::new("Alice", "Hello"), + Message::new("Alice", "World"), + Message::new("Alice", "!"), + ]; + + let result = merge_consecutive(messages); + assert_eq!(result.len(), 1); + assert_eq!(result[0].sender, "Alice"); + assert!(result[0].content.contains("Hello")); + assert!(result[0].content.contains("World")); + assert!(result[0].content.contains("!")); +} + +#[test] +fn test_merge_all_different_senders() { + let messages = vec![ + Message::new("Alice", "Hello"), + Message::new("Bob", "World"), + Message::new("Charlie", "!"), + ]; + + let result = merge_consecutive(messages); + assert_eq!(result.len(), 3); +} + +#[test] +fn test_merge_preserves_first_message_metadata() { + let ts1 = Utc.with_ymd_and_hms(2024, 1, 15, 10, 0, 0).unwrap(); + let ts2 = Utc.with_ymd_and_hms(2024, 1, 15, 10, 1, 0).unwrap(); + + let messages = vec![ + Message::with_metadata("Alice", "Hello", Some(ts1), Some(1), None, None), + Message::with_metadata("Alice", "World", Some(ts2), Some(2), None, None), + ]; + + let result = merge_consecutive(messages); + assert_eq!(result.len(), 1); + assert_eq!(result[0].timestamp, Some(ts1)); + assert_eq!(result[0].id, Some(1)); +} + +#[test] +fn test_merge_with_empty_content() { + let messages = vec![ + Message::new("Alice", "Hello"), + Message::new("Alice", ""), + Message::new("Alice", "World"), + ]; + + let result = merge_consecutive(messages); + assert_eq!(result.len(), 1); +} + +// ========================================================================= +// Output config edge cases +// ========================================================================= + +#[test] +fn test_output_config_all_disabled() { + let config = OutputConfig::new(); + assert!(!config.include_timestamps); + assert!(!config.include_ids); + assert!(!config.include_replies); + assert!(!config.include_edited); +} + +#[test] +fn test_output_config_all_enabled() { + let config = OutputConfig::new() + .with_timestamps() + .with_ids() + .with_replies() + .with_edited(); + assert!(config.include_timestamps); + assert!(config.include_ids); + assert!(config.include_replies); + assert!(config.include_edited); +} + +#[test] +fn test_output_config_full() { + let config = OutputConfig::all(); + assert!(config.include_timestamps); + assert!(config.include_ids); + assert!(config.include_replies); + assert!(config.include_edited); +} + +// ========================================================================= +// CSV output edge cases +// ========================================================================= + +#[cfg(feature = "csv-output")] +#[test] +fn test_csv_escaping_special_chars() { + use chatpack::core::output::to_csv; + + let messages = vec![ + Message::new("Alice", "Hello, World"), // Comma + Message::new("Bob", "Say \"Hi\""), // Quotes + Message::new("Charlie", "Line1\nLine2"), // Newline + Message::new("David", "Semi;colon"), // Semicolon (delimiter) + ]; + + let config = OutputConfig::new(); + let csv = to_csv(&messages, &config).expect("CSV generation failed"); + + // Verify the CSV can be generated without errors + assert!(!csv.is_empty()); + assert!(csv.contains("Alice")); + assert!(csv.contains("Bob")); +} + +// ========================================================================= +// JSON output edge cases +// ========================================================================= + +#[cfg(feature = "json-output")] +#[test] +fn test_json_escaping_special_chars() { + use chatpack::core::output::to_json; + + let messages = vec![ + Message::new("User", "Quote: \"test\""), + Message::new("User", "Backslash: \\"), + Message::new("User", "Tab: \t"), + Message::new("User", "Newline: \n"), + Message::new("User", "Control: \x01\x02"), + ]; + + let config = OutputConfig::new(); + let json = to_json(&messages, &config).expect("JSON generation failed"); + + // Verify valid JSON + let parsed: serde_json::Value = serde_json::from_str(&json).expect("Invalid JSON"); + assert!(parsed.is_array()); +} + +#[cfg(feature = "json-output")] +#[test] +fn test_jsonl_each_line_valid() { + use chatpack::core::output::to_jsonl; + + let messages = vec![ + Message::new("Alice", "Hello"), + Message::new("Bob", "World"), + ]; + + let config = OutputConfig::new(); + let jsonl = to_jsonl(&messages, &config).expect("JSONL generation failed"); + + // Each line should be valid JSON + for line in jsonl.lines() { + if !line.is_empty() { + let _: serde_json::Value = + serde_json::from_str(line).expect("Invalid JSON line"); + } + } +} + +// ========================================================================= +// Message builder pattern tests +// ========================================================================= + +#[test] +fn test_message_builder_with_all_metadata() { + let ts = Utc.with_ymd_and_hms(2024, 1, 15, 10, 0, 0).unwrap(); + let edited = Utc.with_ymd_and_hms(2024, 1, 15, 10, 5, 0).unwrap(); + + let msg = Message::new("Alice", "Hello") + .with_timestamp(ts) + .with_id(123) + .with_reply_to(100) + .with_edited(edited); + + assert_eq!(msg.sender, "Alice"); + assert_eq!(msg.content, "Hello"); + assert_eq!(msg.timestamp, Some(ts)); + assert_eq!(msg.id, Some(123)); + assert_eq!(msg.reply_to, Some(100)); + assert_eq!(msg.edited, Some(edited)); +} + +// ========================================================================= +// Serde roundtrip tests +// ========================================================================= + +#[test] +fn test_message_serde_roundtrip_basic() { + let msg = Message::new("Alice", "Hello"); + let json = serde_json::to_string(&msg).expect("serialize"); + let parsed: Message = serde_json::from_str(&json).expect("deserialize"); + + assert_eq!(msg.sender, parsed.sender); + assert_eq!(msg.content, parsed.content); +} + +#[test] +fn test_message_serde_roundtrip_with_metadata() { + let ts = Utc.with_ymd_and_hms(2024, 1, 15, 10, 0, 0).unwrap(); + let msg = Message::with_metadata("Alice", "Hello", Some(ts), Some(123), Some(100), None); + + let json = serde_json::to_string(&msg).expect("serialize"); + let parsed: Message = serde_json::from_str(&json).expect("deserialize"); + + assert_eq!(msg.sender, parsed.sender); + assert_eq!(msg.content, parsed.content); + assert_eq!(msg.timestamp, parsed.timestamp); + assert_eq!(msg.id, parsed.id); + assert_eq!(msg.reply_to, parsed.reply_to); +} + +#[test] +fn test_message_serde_ignores_unknown_fields() { + let json = r#"{"sender": "Alice", "content": "Hello", "unknown_field": 123}"#; + let parsed: Message = serde_json::from_str(json).expect("deserialize"); + + assert_eq!(parsed.sender, "Alice"); + assert_eq!(parsed.content, "Hello"); +} + +// ========================================================================= +// Message default tests +// ========================================================================= + +#[test] +fn test_message_default() { + let msg = Message::default(); + assert!(msg.sender.is_empty()); + assert!(msg.content.is_empty()); + assert!(msg.timestamp.is_none()); + assert!(msg.id.is_none()); + assert!(msg.reply_to.is_none()); + assert!(msg.edited.is_none()); +} diff --git a/tests/proptest.rs b/tests/proptest.rs index 077875d2..f5833d65 100644 --- a/tests/proptest.rs +++ b/tests/proptest.rs @@ -48,7 +48,7 @@ fn arb_messages(max_len: usize) -> impl Strategy> { } proptest! { - #![proptest_config(ProptestConfig::with_cases(50))] + #![proptest_config(ProptestConfig::with_cases(100))] // ============================================ // MERGE PROPERTIES @@ -76,6 +76,43 @@ proptest! { prop_assert_eq!(result.len(), 1); } + /// Merge with same sender produces exactly one message + #[test] + fn merge_same_sender_produces_one(n in 1usize..10) { + let messages: Vec = (0..n) + .map(|i| Message { + sender: "Alice".to_string(), + content: format!("Message {}", i), + timestamp: None, + id: None, + reply_to: None, + edited: None, + }) + .collect(); + let merged = merge_consecutive(messages); + prop_assert_eq!(merged.len(), 1); + } + + /// Merge preserves total content + #[test] + fn merge_preserves_content_parts(n in 1usize..5) { + let messages: Vec = (0..n) + .map(|i| Message { + sender: "Alice".to_string(), + content: format!("part{}", i), + timestamp: None, + id: None, + reply_to: None, + edited: None, + }) + .collect(); + let merged = merge_consecutive(messages); + for i in 0..n { + let expected = format!("part{}", i); + prop_assert!(merged[0].content.contains(&expected), "Missing part{}", i); + } + } + // ============================================ // FILTER PROPERTIES // ============================================ @@ -163,6 +200,19 @@ proptest! { let merged = merge_consecutive(vec![msg]); prop_assert_eq!(&merged[0].content, &content); } + + // ============================================ + // SERDE ROUNDTRIP + // ============================================ + + /// Message serialization roundtrip + #[test] + fn message_serde_roundtrip(msg in arb_message()) { + let json = serde_json::to_string(&msg).expect("serialize"); + let parsed: Message = serde_json::from_str(&json).expect("deserialize"); + prop_assert_eq!(msg.sender, parsed.sender); + prop_assert_eq!(msg.content, parsed.content); + } } // ============================================