From 9fa55fa3ea4cf07cdbafe5c8cd01e460965607f9 Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 12:51:21 +0200 Subject: [PATCH 1/7] chore: use sequence alignment to match the month name --- src/NepDate/SmartDateParser.cs | 368 ++++++++++++++++++--------------- 1 file changed, 205 insertions(+), 163 deletions(-) diff --git a/src/NepDate/SmartDateParser.cs b/src/NepDate/SmartDateParser.cs index 1066f25..eeacd1c 100644 --- a/src/NepDate/SmartDateParser.cs +++ b/src/NepDate/SmartDateParser.cs @@ -11,121 +11,16 @@ namespace NepDate /// public static class SmartDateParser { - // Month name mappings (English, Nepali transliteration, and Unicode) - private static readonly Dictionary MonthNameMappings = new Dictionary(StringComparer.OrdinalIgnoreCase) - { - // Month 1 - Baisakh (वैशाख) - { "baisakh", 1 }, { "baishakh", 1 }, { "baisak", 1 }, { "vaisakh", 1 }, { "vaisakha", 1 }, - { "vaishak", 1 }, { "vaisakhi", 1 }, { "beshak", 1 }, { "baishak", 1 }, - { "baisaga", 1 }, { "baishaga", 1 }, { "vesak", 1 }, - - // Month 2 - Jestha (जेष्ठ) - { "jestha", 2 }, { "jeth", 2 }, { "jeshtha", 2 }, { "jyeshtha", 2 }, { "jyestha", 2 }, - { "jesth", 2 }, { "jeshth", 2 }, { "jetha", 2 }, { "jeshta", 2 }, { "jayshtha", 2 }, - { "jayestha", 2 }, { "jesta", 2 }, { "jyesth", 2 }, { "jyaistha", 2 }, { "jaistha", 2 }, - - // Month 3 - Asar (असार) - { "asar", 3 }, { "asadh", 3 }, { "ashar", 3 }, { "ashad", 3 }, { "asad", 3 }, - { "aasad", 3 }, { "asada", 3 }, { "ashadh", 3 }, { "asadha", 3 }, { "ashadha", 3 }, - { "ashara", 3 }, { "asara", 3 }, { "ashada", 3 }, { "asaad", 3 }, { "aashar", 3 }, - - // Month 4 - Shrawan (श्रावण) - { "shrawan", 4 }, { "sawan", 4 }, { "saun", 4 }, { "srawan", 4 }, { "shraawan", 4 }, - { "shravan", 4 }, { "shravana", 4 }, { "sawun", 4 }, { "savan", 4 }, { "shrawana", 4 }, - { "sravana", 4 }, { "sawon", 4 }, { "sravan", 4 }, { "saawan", 4 }, { "sharwan", 4 }, - { "sarwan", 4 }, { "sraawan", 4 }, { "shaun", 4 }, { "shawan", 4 }, - - // Month 5 - Bhadra (भाद्र) - { "bhadra", 5 }, { "bhadau", 5 }, { "bhado", 5 }, { "bhaadra", 5 }, - { "bhadow", 5 }, { "bhadava", 5 }, { "bhadaw", 5 }, { "bhada", 5 }, - { "bhadoo", 5 }, { "bhadon", 5 }, { "bhadrapad", 5 }, { "bhadrapada", 5 }, { "bhaado", 5 }, - - // Month 6 - Ashwin (आश्विन) - { "ashwin", 6 }, { "asoj", 6 }, { "ashoj", 6 }, { "aswin", 6 }, { "ashvin", 6 }, - { "aaswin", 6 }, { "ashwini", 6 }, { "aswini", 6 }, { "ashvini", 6 }, { "aasoj", 6 }, - { "aashoj", 6 }, { "asoja", 6 }, { "asojh", 6 }, { "ashoja", 6 }, - { "asvin", 6 }, { "aashwin", 6 }, { "ashvina", 6 }, { "ashwina", 6 }, { "asvaayuja", 6 }, - - // Month 7 - Kartik (कार्तिक) - { "kartik", 7 }, { "kattik", 7 }, { "kaartik", 7 }, { "kartika", 7 }, { "katik", 7 }, - { "kartike", 7 }, { "karttik", 7 }, { "kartiki", 7 }, { "karthik", 7 }, { "karthika", 7 }, - { "kathik", 7 }, { "kaatik", 7 }, { "katak", 7 }, { "karttic", 7 }, { "kartic", 7 }, - - // Month 8 - Mangsir (मंसिर) - { "mangsir", 8 }, { "mangshir", 8 }, { "manshir", 8 }, { "marg", 8 }, { "margashirsha", 8 }, - { "mangasir", 8 }, { "mangsheer", 8 }, { "mangseer", 8 }, { "margshirsha", 8 }, - { "mansheer", 8 }, { "margsir", 8 }, { "managsir", 8 }, { "mangaseer", 8 }, { "mangsheersh", 8 }, - { "mangsira", 8 }, { "mansir", 8 }, { "magshir", 8 }, { "mangir", 8 }, { "magsir", 8 }, - - // Month 9 - Poush (पौष) - { "poush", 9 }, { "push", 9 }, { "pus", 9 }, { "paush", 9 }, - { "pausha", 9 }, { "pousha", 9 }, { "pos", 9 }, { "pausa", 9 }, { "pousa", 9 }, - { "posh", 9 }, { "posma", 9 }, { "paus", 9 }, { "poos", 9 }, - - // Month 10 - Magh (माघ) - { "magh", 10 }, { "mag", 10 }, { "maagh", 10 }, { "magha", 10 }, { "maagha", 10 }, - { "maga", 10 }, { "magah", 10 }, { "maag", 10 }, { "maaha", 10 }, { "maghu", 10 }, - { "maghaa", 10 }, { "magg", 10 }, { "mahi", 10 }, { "mahag", 10 }, - - // Month 11 - Falgun (फाल्गुन) - { "falgun", 11 }, { "phagun", 11 }, { "phalgun", 11 }, { "fagan", 11 }, { "fagun", 11 }, - { "phalguna", 11 }, { "falguna", 11 }, { "phalgoon", 11 }, { "falgunn", 11 }, { "phalguni", 11 }, - { "phalagan", 11 }, { "phalagun", 11 }, { "phalag", 11 }, - { "fagoon", 11 }, { "phaguna", 11 }, { "falgoona", 11 }, { "phagoon", 11 }, - - // Month 12 - Chaitra (चैत्र) - { "chaitra", 12 }, { "chait", 12 }, { "chaita", 12 }, { "chet", 12 }, { "chetra", 12 }, - { "chaitr", 12 }, { "chaity", 12 }, { "cheta", 12 }, { "chaitya", 12 }, - { "chaitri", 12 }, { "chaito", 12 }, { "chythro", 12 }, { "chaithra", 12 }, - - // Nepali unicode month names - // Month 1 - Baisakh - { "बैशाख", 1 }, { "वैशाख", 1 }, { "बैसाख", 1 }, { "बैशाक", 1 }, { "वैसाख", 1 }, { "वैशाक", 1 }, - - // Month 2 - Jestha - { "जेष्ठ", 2 }, { "जेठ", 2 }, { "जेस्थ", 2 }, { "ज्येष्ठ", 2 }, { "जेस्ठ", 2 }, { "जेष्ट", 2 }, - - // Month 3 - Asar - { "आषाढ", 3 }, { "असार", 3 }, { "अषाढ", 3 }, { "आशाढ", 3 }, { "आषाढ़", 3 }, { "असाढ", 3 }, { "अषाड", 3 }, - - // Month 4 - Shrawan - { "श्रावण", 4 }, { "सावन", 4 }, { "साउन", 4 }, { "श्रावन", 4 }, { "सावण", 4 }, { "श्रवण", 4 }, - - // Month 5 - Bhadra - { "भाद्र", 5 }, { "भदौ", 5 }, { "भादौ", 5 }, { "भाद्रपद", 5 }, { "भदो", 5 }, { "भादोै", 5 }, { "भाद्रा", 5 }, - - // Month 6 - Ashwin - { "आश्विन", 6 }, { "असोज", 6 }, { "अश्विन", 6 }, { "आसोज", 6 }, { "अस्विन", 6 }, { "अश्वीन", 6 }, { "अश्वीना", 6 }, - - // Month 7 - Kartik - { "कार्तिक", 7 }, { "कात्तिक", 7 }, { "कार्तीक", 7 }, { "कार्तिका", 7 }, { "कातिक", 7 }, { "कर्तिक", 7 }, { "कार्तिक्", 7 }, - - // Month 8 - Mangsir - { "मंसिर", 8 }, { "मङ्सिर", 8 }, { "मार्ग", 8 }, { "मंग्सिर", 8 }, { "मंशिर", 8 }, { "मागशिर", 8 }, { "मार्गशीर्ष", 8 }, - - // Month 9 - Poush - { "पौष", 9 }, { "पुष", 9 }, { "पुस", 9 }, { "पौश", 9 }, { "पौष्य", 9 }, { "पौस", 9 }, - - // Month 10 - Magh - { "माघ", 10 }, { "माग", 10 }, { "माह", 10 }, { "माघा", 10 }, { "माग्ह", 10 }, { "मा्घ", 10 }, - - // Month 11 - Falgun - { "फाल्गुन", 11 }, { "फागुन", 11 }, { "फाल्गुण", 11 }, { "फल्गुन", 11 }, { "फाल्गुना", 11 }, - - // Month 12 - Chaitra - { "चैत्र", 12 }, { "चैत", 12 }, { "चैता", 12 }, { "चॆत्र", 12 }, { "चेत्र", 12 }, { "चैत्रा", 12 } - }; + // Common date separators (same as original) + private static readonly char[] DateSeparators = { '/', '-', '.', ' ', ',', '_', '|', '।' }; - // Nepali unicode digit mappings + // Nepali unicode digit mappings (same as original) private static readonly Dictionary NepaliToEnglishDigits = new Dictionary { { '०', '0' }, { '१', '1' }, { '२', '2' }, { '३', '3' }, { '४', '4' }, { '५', '5' }, { '६', '6' }, { '७', '7' }, { '८', '8' }, { '९', '9' } }; - // Common date separators - private static readonly char[] DateSeparators = { '/', '-', '.', ' ', ',', '_', '|', '।' }; - /// /// Parses a string representation of a Nepali date in various formats and returns a NepaliDate. /// @@ -214,7 +109,6 @@ private static bool TryParseStandardFormat(string input, out NepaliDate result) // Try different separator-based formats foreach (char separator in DateSeparators) { - string pattern = $"{Regex.Escape(separator.ToString())}"; string[] parts = input.Split(new[] { separator }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length == 3) @@ -238,75 +132,76 @@ private static bool TryParseStandardFormat(string input, out NepaliDate result) /// /// Tries to parse a string containing month names like "15 Jestha 2080" or "Jestha 15, 2080". + /// Now uses sequence alignment for fuzzy month matching. /// private static bool TryParseMonthNameFormat(string input, out NepaliDate result) { result = default; - // Pattern for: [day] [month name] [year] or [month name] [day], [year] - var monthNameMatches = MonthNameMappings.Keys - .Where(monthName => input.IndexOf(monthName, StringComparison.OrdinalIgnoreCase) >= 0) - .OrderByDescending(m => m.Length) // Prefer longer matches to avoid partial matches + // Extract potential month names (words with at least 3 characters) + var words = input.Split(new[] { ' ', ',', '-', '.', '/', '\\' }, StringSplitOptions.RemoveEmptyEntries) + .Where(w => w.Length >= 3 && !IsNumeric(w)) .ToList(); - foreach (var monthName in monthNameMatches) + foreach (string word in words) { - int monthValue = MonthNameMappings[monthName]; + // Use fuzzy matching instead of dictionary lookup + int? monthNumber = NepaliMonthMatcher.FindBestMatch(word, 0.4); - // Find year and day in the input - string remaining = ReplaceStringIgnoreCase(input, monthName, " ").Trim(); - - // Extract year and day - var numbers = Regex.Matches(remaining, @"\d+") - .Cast() - .Select(m => int.Parse(m.Value)) - .ToList(); - - if (numbers.Count >= 2) + if (monthNumber.HasValue) { - // Determine which number is the year based on magnitude - int year, day; - if (numbers[0] > 1900) // Likely a year - { - year = numbers[0]; - day = numbers.Count > 1 ? numbers[1] : 1; - } - else if (numbers.Count > 1 && numbers[1] > 1900) // Second number is a year - { - year = numbers[1]; - day = numbers[0]; - } - else // No obvious year, try heuristic - { - // Sort numbers by size, largest is likely year - var sortedNumbers = numbers.OrderByDescending(n => n).ToList(); - year = sortedNumbers[0]; - day = sortedNumbers.Count > 1 ? sortedNumbers[1] : 1; + // Extract numbers from the input for year and day + var numbers = System.Text.RegularExpressions.Regex.Matches(input, @"\d+") + .Cast() + .Select(m => int.Parse(m.Value)) + .ToList(); - // If largest number is too small to be a BS year, fallback - if (year < 1900) + if (numbers.Count >= 2) + { + // Determine which number is the year based on magnitude + int year, day; + if (numbers[0] > 1900) // Likely a year { - // Add 2000 to years likely expressed in 2-digit short form (e.g., '80 for 2080) - if (year >= 0 && year < 100) - year += 2000; - else if (year >= 100 && year < 999) - year += 1000; // Convert 3-digit year like 080 to 1080 or 080 to 2080 + year = numbers[0]; + day = numbers.Count > 1 ? numbers[1] : 1; + } + else if (numbers.Count > 1 && numbers[1] > 1900) // Second number is a year + { + year = numbers[1]; + day = numbers[0]; + } + else // No obvious year, try heuristic + { + // Sort numbers by size, largest is likely year + var sortedNumbers = numbers.OrderByDescending(n => n).ToList(); + year = sortedNumbers[0]; + day = sortedNumbers.Count > 1 ? sortedNumbers[1] : 1; + + // If largest number is too small to be a BS year, fallback + if (year < 1900) + { + // Add 2000 to years likely expressed in 2-digit short form (e.g., '80 for 2080) + if (year >= 0 && year < 100) + year += 2000; + else if (year >= 100 && year < 999) + year += 1000; // Convert 3-digit year like 080 to 1080 or 080 to 2080 + } } - } - // Validate and sanitize day - if (day < 1 || day > 32) - continue; + // Validate and sanitize day + if (day < 1 || day > 32) + continue; - // Try to create valid date - try - { - result = new NepaliDate(year, monthValue, day); - return true; - } - catch - { - // Continue to next attempt + // Try to create valid date + try + { + result = new NepaliDate(year, monthNumber.Value, day); + return true; + } + catch + { + // Continue to next attempt + } } } } @@ -488,5 +383,152 @@ private static string ReplaceStringIgnoreCase(string input, string oldValue, str return result.ToString(); } + + /// + /// Checks if a string represents a numeric value + /// + private static bool IsNumeric(string input) + { + return int.TryParse(input, out _); + } + } + + + + /// + /// Provides fuzzy matching for Nepali month names using sequence alignment algorithm + /// (Needleman-Wunsch algorithm adapted for string matching) + /// + public static class NepaliMonthMatcher + { + // Standard month names (canonical forms) - much smaller dictionary + private static readonly Dictionary CanonicalMonthNames = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + { "baisakh", 1 }, + { "jestha", 2 }, + { "asar", 3 },{ "ashad", 3 }, + { "shrawan", 4 },{ "saun", 4 }, + { "bhadra", 5 }, + { "ashwin", 6 },{ "ashoj", 6 },{"aswayuja",6}, + { "kartik", 7 }, + { "mangsir", 8 }, + { "poush", 9 }, + { "magh", 10 }, + { "falgun", 11 }, + { "chaitra", 12 }, + { "बैशाख", 1 }, + { "जेष्ठ", 2 },{ "जेठ", 2 }, + { "असार", 3 },{ "आषाढ", 3 }, + { "श्रावण", 4 },{ "साउन", 4 }, + { "भाद्र", 5 },{ "भदौ", 5 }, + { "आश्विन", 6 },{ "असोज", 6 }, + { "कार्तिक", 7 }, + { "मंसिर", 8 },{ "मङ्गसिर", 8 }, + { "पौष", 9 }, + { "माघ", 10 }, + { "फाल्गुन", 11 },{"फाल्गुण",11}, + { "चैत्र", 12 },{ "चैत", 12 }, + }; + + /// + /// Finds the best matching Nepali month using sequence alignment algorithm + /// + /// The month name to match + /// Minimum similarity threshold (0.0 to 1.0) + /// The month number (1-12) if found, null otherwise + public static int? FindBestMatch(string input, double threshold = 0.6) + { + if (string.IsNullOrWhiteSpace(input)) + return null; + + string normalizedInput = input; + + // Try exact match first for performance + if (CanonicalMonthNames.TryGetValue(normalizedInput, out int exactMatch)) + return exactMatch; + + double bestScore = 0; + int? bestMatch = null; + + // Compare against all canonical forms using sequence alignment + foreach (var kvp in CanonicalMonthNames) + { + string candidate = kvp.Key; + double similarity = CalculateSequenceAlignment(normalizedInput, candidate); + + if (similarity > bestScore && similarity >= threshold) + { + bestScore = similarity; + bestMatch = kvp.Value; + } + } + + return bestMatch; + } + + /// + /// Calculates sequence alignment similarity between two strings using modified Needleman-Wunsch algorithm + /// + /// First string + /// Second string + /// Similarity score between 0.0 and 1.0 + private static double CalculateSequenceAlignment(string s1, string s2) + { + if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2)) + return 0; + + if (s1 == s2) + return 1.0; + + int m = s1.Length; + int n = s2.Length; + + int[,] dp = new int[m + 1, n + 1]; + + const int MATCH_SCORE = 3; + const int MISMATCH_PENALTY = -1; + const int GAP_PENALTY = -1; + + for (int i = 0; i <= m; i++) + dp[i, 0] = i * GAP_PENALTY; + + for (int j = 0; j <= n; j++) + dp[0, j] = j * GAP_PENALTY; + + for (int i = 1; i <= m; i++) + { + for (int j = 1; j <= n; j++) + { + char c1 = char.ToLower(s1[i - 1]); + char c2 = char.ToLower(s2[j - 1]); + + int matchScore; + if (c1 == c2) + { + matchScore = MATCH_SCORE; + } + else + { + matchScore = MISMATCH_PENALTY; + } + + int diagonal = dp[i - 1, j - 1] + matchScore; + int deletion = dp[i - 1, j] + GAP_PENALTY; + int insertion = dp[i, j - 1] + GAP_PENALTY; + + dp[i, j] = Math.Max(diagonal, Math.Max(deletion, insertion)); + } + } + + int alignmentScore = dp[m, n]; + int maxPossibleScore = Math.Max(m, n) * MATCH_SCORE; + int minPossibleScore = Math.Max(m, n) * GAP_PENALTY; + + double normalizedScore = + (double)(alignmentScore - minPossibleScore) / (maxPossibleScore - minPossibleScore); + return Math.Max(0, Math.Min(1, normalizedScore)); + } + } } \ No newline at end of file From 9b703ce8539e0fd8ecab504169aecf738bd4f171 Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 12:51:43 +0200 Subject: [PATCH 2/7] chore: add tests to verify the month name combination --- .../Core/SmartDateParserTests.cs | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/tests/NepDate.Tests/Core/SmartDateParserTests.cs b/tests/NepDate.Tests/Core/SmartDateParserTests.cs index 3e69c49..ea9a8a5 100644 --- a/tests/NepDate.Tests/Core/SmartDateParserTests.cs +++ b/tests/NepDate.Tests/Core/SmartDateParserTests.cs @@ -174,4 +174,157 @@ public void ExtensionMethod_TryToNepaliDate_ParsesCorrectly() Assert.True(success); Assert.Equal(expectedDate, result); } + + [Theory] + // Baisakh (1) + [InlineData("baisakh", 1)] + [InlineData("baishakh", 1)] + [InlineData("baisak", 1)] + [InlineData("vaisakh", 1)] + [InlineData("vaisakha", 1)] + [InlineData("vaishak", 1)] + [InlineData("vaisakhi", 1)] + [InlineData("baishak", 1)] + [InlineData("baisaga", 1)] + [InlineData("baishaga", 1)] + + // Jestha (2) + [InlineData("jestha", 2)] + [InlineData("jeth", 2)] + [InlineData("jeshtha", 2)] + [InlineData("jyeshtha", 2)] + [InlineData("jyestha", 2)] + [InlineData("jesth", 2)] + [InlineData("jeshth", 2)] + [InlineData("jetha", 2)] + [InlineData("jeshta", 2)] + [InlineData("jayshtha", 2)] + [InlineData("jayestha", 2)] + [InlineData("jesta", 2)] + [InlineData("jyesth", 2)] + [InlineData("jyaistha", 2)] + [InlineData("jaistha", 2)] + + // Asar (3) + [InlineData("asar", 3)] + [InlineData("asadh", 3)] + [InlineData("ashar", 3)] + [InlineData("ashad", 3)] + [InlineData("asad", 3)] + [InlineData("aasad", 3)] + [InlineData("asada", 3)] + [InlineData("ashadh", 3)] + [InlineData("asadha", 3)] + [InlineData("ashadha", 3)] + [InlineData("ashara", 3)] + [InlineData("asara", 3)] + [InlineData("ashada", 3)] + [InlineData("asaad", 3)] + [InlineData("aashar", 3)] + + // Shrawan (4) + [InlineData("shrawan", 4)] + [InlineData("sawan", 4)] + [InlineData("saun", 4)] + [InlineData("srawan", 4)] + [InlineData("shraawan", 4)] + [InlineData("shravan", 4)] + [InlineData("shravana", 4)] + [InlineData("sawun", 4)] + [InlineData("savan", 4)] + [InlineData("shrawana", 4)] + [InlineData("sravana", 4)] + [InlineData("sawon", 4)] + [InlineData("sravan", 4)] + [InlineData("saawan", 4)] + [InlineData("sharwan", 4)] + [InlineData("sarwan", 4)] + [InlineData("sraawan", 4)] + [InlineData("shaun", 4)] + [InlineData("shawan", 4)] + + // Bhadra (5) + [InlineData("bhadra", 5)] + [InlineData("bhadrapad", 5)] + [InlineData("bhadrapada", 5)] + [InlineData("bhadra pad", 5)] + [InlineData("bhadraw", 5)] + [InlineData("bhadar", 5)] + [InlineData("bhadrapaksh", 5)] + [InlineData("bhdra", 5)] + [InlineData("bhadarwa", 5)] + [InlineData("bhadrawa", 5)] + + // Ashoj (6) + [InlineData("ashoj", 6)] + [InlineData("asoj", 6)] + [InlineData("ashwin", 6)] + [InlineData("ashvina", 6)] + [InlineData("ashwayuja", 6)] + [InlineData("asuj", 6)] + [InlineData("asoja", 6)] + [InlineData("ashvayuja", 6)] + [InlineData("ashwinak", 6)] + + // Kartik (7) + [InlineData("kartik", 7)] + [InlineData("kartika", 7)] + [InlineData("karthik", 7)] + [InlineData("karttika", 7)] + [InlineData("kaartik", 7)] + [InlineData("karthika", 7)] + [InlineData("kaarthik", 7)] + [InlineData("kartic", 7)] + + // Mangsir (8) + [InlineData("mangsir", 8)] + [InlineData("mangsirh", 8)] + [InlineData("mangsar", 8)] + [InlineData("mangsira", 8)] + [InlineData("mangsire", 8)] + [InlineData("mangsira", 8)] + [InlineData("margashirsha", 8)] + [InlineData("margashira", 8)] + [InlineData("margshirsha", 8)] + [InlineData("margsheersh", 8)] + [InlineData("mangsira", 8)] + + // Poush (9) + [InlineData("poush", 9)] + [InlineData("paush", 9)] + [InlineData("pousha", 9)] + [InlineData("pausha", 9)] + [InlineData("push", 9)] + [InlineData("pusha", 9)] + [InlineData("paus", 9)] + [InlineData("pous", 9)] + + // Magh (10) + [InlineData("magh", 10)] + [InlineData("magha", 10)] + [InlineData("mag", 10)] + [InlineData("maagh", 10)] + [InlineData("maagha", 10)] + + // Falgun (11) + [InlineData("falgun", 11)] + [InlineData("phagun", 11)] + [InlineData("phalgun", 11)] + [InlineData("phalguna", 11)] + [InlineData("faagun", 11)] + [InlineData("phalguni", 11)] + [InlineData("fagun", 11)] + + // Chaitra (12) + [InlineData("chaitra", 12)] + [InlineData("chait", 12)] + [InlineData("chaitri", 12)] + [InlineData("chaitram", 12)] + [InlineData("chaitanya", 12)] + [InlineData("chaitarah", 12)] + public void Parse_WithNepaliMonthName_ReturnsCorrectDate(string input, int expectedMonth) + { + var expectedDate = new NepaliDate(2080, expectedMonth, 15); + Assert.Equal(expectedDate, SmartDateParser.Parse($"15 {input} 2080")); + } } \ No newline at end of file From 69ff9046e2832bb1694c94ac1c59f5fdf59e4fb1 Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 12:52:09 +0200 Subject: [PATCH 3/7] chore: update to net9.0 --- tests/NepDate.Tests/NepDate.Tests.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/NepDate.Tests/NepDate.Tests.csproj b/tests/NepDate.Tests/NepDate.Tests.csproj index 1af4666..bf3776b 100644 --- a/tests/NepDate.Tests/NepDate.Tests.csproj +++ b/tests/NepDate.Tests/NepDate.Tests.csproj @@ -1,7 +1,7 @@ - net7.0 + net9.0 enable enable From 56f1e390b2411488dcf0f450d6619da24cc20198 Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 13:29:53 +0200 Subject: [PATCH 4/7] chore: add more test for nepali unicode months --- .../Core/SmartDateParserTests.cs | 100 +++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/tests/NepDate.Tests/Core/SmartDateParserTests.cs b/tests/NepDate.Tests/Core/SmartDateParserTests.cs index ea9a8a5..6c840b3 100644 --- a/tests/NepDate.Tests/Core/SmartDateParserTests.cs +++ b/tests/NepDate.Tests/Core/SmartDateParserTests.cs @@ -174,7 +174,7 @@ public void ExtensionMethod_TryToNepaliDate_ParsesCorrectly() Assert.True(success); Assert.Equal(expectedDate, result); } - + [Theory] // Baisakh (1) [InlineData("baisakh", 1)] @@ -322,6 +322,104 @@ public void ExtensionMethod_TryToNepaliDate_ParsesCorrectly() [InlineData("chaitram", 12)] [InlineData("chaitanya", 12)] [InlineData("chaitarah", 12)] + + // Month 1 - Baisakh + [InlineData("बैशाख", 1)] + [InlineData("वैशाख", 1)] + [InlineData("बैसाख", 1)] + [InlineData("बैशाक", 1)] + [InlineData("वैसाख", 1)] + [InlineData("वैशाक", 1)] + +// Month 2 - Jestha + [InlineData("जेष्ठ", 2)] + [InlineData("जेठ", 2)] + [InlineData("जेस्थ", 2)] + [InlineData("ज्येष्ठ", 2)] + [InlineData("जेस्ठ", 2)] + [InlineData("जेष्ट", 2)] + +// Month 3 - Asar + [InlineData("आषाढ", 3)] + [InlineData("असार", 3)] + [InlineData("अषाढ", 3)] + [InlineData("आशाढ", 3)] + [InlineData("आषाढ़", 3)] + [InlineData("असाढ", 3)] + [InlineData("अषाड", 3)] + +// Month 4 - Shrawan + [InlineData("श्रावण", 4)] + [InlineData("सावन", 4)] + [InlineData("साउन", 4)] + [InlineData("श्रावन", 4)] + [InlineData("सावण", 4)] + [InlineData("श्रवण", 4)] + +// Month 5 - Bhadra + [InlineData("भाद्र", 5)] + [InlineData("भदौ", 5)] + [InlineData("भाद्रपद", 5)] + [InlineData("भदो", 5)] + [InlineData("भादोै", 5)] + [InlineData("भाद्रा", 5)] + +// Month 6 - Ashwin + [InlineData("आश्विन", 6)] + [InlineData("असोज", 6)] + [InlineData("अश्विन", 6)] + [InlineData("आसोज", 6)] + [InlineData("अस्विन", 6)] + [InlineData("अश्वीन", 6)] + [InlineData("अश्वीना", 6)] + +// Month 7 - Kartik + [InlineData("कार्तिक", 7)] + [InlineData("कात्तिक", 7)] + [InlineData("कार्तीक", 7)] + [InlineData("कार्तिका", 7)] + [InlineData("कातिक", 7)] + [InlineData("कर्तिक", 7)] + [InlineData("कार्तिक्", 7)] + +// Month 8 - Mangsir + [InlineData("मंसिर", 8)] + [InlineData("मङ्सिर", 8)] + [InlineData("मंग्सिर", 8)] + [InlineData("मंशिर", 8)] + [InlineData("मागशिर", 8)] + [InlineData("मार्गशीर्ष", 8)] + +// Month 9 - Poush + [InlineData("पौष", 9)] + [InlineData("पुष", 9)] + [InlineData("पुस", 9)] + [InlineData("पौश", 9)] + [InlineData("पौष्य", 9)] + [InlineData("पौस", 9)] + +// Month 10 - Magh + [InlineData("माघ", 10)] + [InlineData("माग", 10)] + [InlineData("माह", 10)] + [InlineData("माघा", 10)] + [InlineData("माग्ह", 10)] + [InlineData("मा्घ", 10)] + +// Month 11 - Falgun + [InlineData("फाल्गुन", 11)] + [InlineData("फागुन", 11)] + [InlineData("फाल्गुण", 11)] + [InlineData("फल्गुन", 11)] + [InlineData("फाल्गुना", 11)] + +// Month 12 - Chaitra + [InlineData("चैत्र", 12)] + [InlineData("चैत", 12)] + [InlineData("चैता", 12)] + [InlineData("चॆत्र", 12)] + [InlineData("चेत्र", 12)] + [InlineData("चैत्रा", 12)] public void Parse_WithNepaliMonthName_ReturnsCorrectDate(string input, int expectedMonth) { var expectedDate = new NepaliDate(2080, expectedMonth, 15); From 098f114abc31dbbb44794a49712b87e41fb55579 Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 13:30:09 +0200 Subject: [PATCH 5/7] chore: move nepaliMonthMatcher to new file --- src/NepDate/NepaliMonthMatcher.cs | 142 ++++++++++++++++++++++++++++++ src/NepDate/SmartDateParser.cs | 139 ----------------------------- 2 files changed, 142 insertions(+), 139 deletions(-) create mode 100644 src/NepDate/NepaliMonthMatcher.cs diff --git a/src/NepDate/NepaliMonthMatcher.cs b/src/NepDate/NepaliMonthMatcher.cs new file mode 100644 index 0000000..06b8a84 --- /dev/null +++ b/src/NepDate/NepaliMonthMatcher.cs @@ -0,0 +1,142 @@ +using System; +using System.Collections.Generic; + +namespace NepDate +{ + /// + /// Provides fuzzy matching for Nepali month names using sequence alignment algorithm + /// (Needleman-Wunsch algorithm adapted for string matching) + /// + public static class NepaliMonthMatcher + { + // Standard month names (canonical forms) - much smaller dictionary + private static readonly Dictionary CanonicalMonthNames = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + { "baisakh", 1 }, + { "jestha", 2 }, + { "asar", 3 },{ "ashad", 3 }, + { "shrawan", 4 },{ "saun", 4 }, + { "bhadra", 5 }, + { "ashwin", 6 },{ "ashoj", 6 },{"aswayuja",6}, + { "kartik", 7 }, + { "mangsir", 8 }, + { "poush", 9 }, + { "magh", 10 }, + { "falgun", 11 }, + { "chaitra", 12 }, + { "बैशाख", 1 }, + { "जेष्ठ", 2 },{ "जेठ", 2 }, + { "असार", 3 },{ "आषाढ", 3 }, + { "श्रावण", 4 },{ "साउन", 4 }, + { "भाद्र", 5 },{ "भदौ", 5 }, + { "आश्विन", 6 },{ "असोज", 6 }, + { "कार्तिक", 7 }, + { "मंसिर", 8 },{ "मङ्गसिर", 8 }, + { "पौष", 9 },{"पुस",9}, + { "माघ", 10 },{"माग्ह",10}, + { "फाल्गुन", 11 },{"फाल्गुण",11}, + { "चैत्र", 12 },{ "चैत", 12 }, + }; + + /// + /// Finds the best matching Nepali month using sequence alignment algorithm + /// + /// The month name to match + /// Minimum similarity threshold (0.0 to 1.0) + /// The month number (1-12) if found, null otherwise + public static int? FindBestMatch(string input, double threshold = 0.6) + { + if (string.IsNullOrWhiteSpace(input)) + return null; + + string normalizedInput = input; + + // Try exact match first for performance + if (CanonicalMonthNames.TryGetValue(normalizedInput, out int exactMatch)) + return exactMatch; + + double bestScore = 0; + int? bestMatch = null; + + // Compare against all canonical forms using sequence alignment + foreach (var kvp in CanonicalMonthNames) + { + string candidate = kvp.Key; + double similarity = CalculateSequenceAlignment(normalizedInput, candidate); + + if (similarity > bestScore && similarity >= threshold) + { + bestScore = similarity; + bestMatch = kvp.Value; + } + } + + return bestMatch; + } + + /// + /// Calculates sequence alignment similarity between two strings using modified Needleman-Wunsch algorithm + /// + /// First string + /// Second string + /// Similarity score between 0.0 and 1.0 + private static double CalculateSequenceAlignment(string s1, string s2) + { + if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2)) + return 0; + + if (s1 == s2) + return 1.0; + + int m = s1.Length; + int n = s2.Length; + + int[,] dp = new int[m + 1, n + 1]; + + const int MATCH_SCORE = 3; + const int MISMATCH_PENALTY = -1; + const int GAP_PENALTY = -1; + + for (int i = 0; i <= m; i++) + dp[i, 0] = i * GAP_PENALTY; + + for (int j = 0; j <= n; j++) + dp[0, j] = j * GAP_PENALTY; + + for (int i = 1; i <= m; i++) + { + for (int j = 1; j <= n; j++) + { + char c1 = char.ToLower(s1[i - 1]); + char c2 = char.ToLower(s2[j - 1]); + + int matchScore; + if (c1 == c2) + { + matchScore = MATCH_SCORE; + } + else + { + matchScore = MISMATCH_PENALTY; + } + + int diagonal = dp[i - 1, j - 1] + matchScore; + int deletion = dp[i - 1, j] + GAP_PENALTY; + int insertion = dp[i, j - 1] + GAP_PENALTY; + + dp[i, j] = Math.Max(diagonal, Math.Max(deletion, insertion)); + } + } + + int alignmentScore = dp[m, n]; + int maxPossibleScore = Math.Max(m, n) * MATCH_SCORE; + int minPossibleScore = Math.Max(m, n) * GAP_PENALTY; + + double normalizedScore = + (double)(alignmentScore - minPossibleScore) / (maxPossibleScore - minPossibleScore); + return Math.Max(0, Math.Min(1, normalizedScore)); + } + + } +} \ No newline at end of file diff --git a/src/NepDate/SmartDateParser.cs b/src/NepDate/SmartDateParser.cs index eeacd1c..b13ffba 100644 --- a/src/NepDate/SmartDateParser.cs +++ b/src/NepDate/SmartDateParser.cs @@ -392,143 +392,4 @@ private static bool IsNumeric(string input) return int.TryParse(input, out _); } } - - - - /// - /// Provides fuzzy matching for Nepali month names using sequence alignment algorithm - /// (Needleman-Wunsch algorithm adapted for string matching) - /// - public static class NepaliMonthMatcher - { - // Standard month names (canonical forms) - much smaller dictionary - private static readonly Dictionary CanonicalMonthNames = - new Dictionary(StringComparer.OrdinalIgnoreCase) - { - { "baisakh", 1 }, - { "jestha", 2 }, - { "asar", 3 },{ "ashad", 3 }, - { "shrawan", 4 },{ "saun", 4 }, - { "bhadra", 5 }, - { "ashwin", 6 },{ "ashoj", 6 },{"aswayuja",6}, - { "kartik", 7 }, - { "mangsir", 8 }, - { "poush", 9 }, - { "magh", 10 }, - { "falgun", 11 }, - { "chaitra", 12 }, - { "बैशाख", 1 }, - { "जेष्ठ", 2 },{ "जेठ", 2 }, - { "असार", 3 },{ "आषाढ", 3 }, - { "श्रावण", 4 },{ "साउन", 4 }, - { "भाद्र", 5 },{ "भदौ", 5 }, - { "आश्विन", 6 },{ "असोज", 6 }, - { "कार्तिक", 7 }, - { "मंसिर", 8 },{ "मङ्गसिर", 8 }, - { "पौष", 9 }, - { "माघ", 10 }, - { "फाल्गुन", 11 },{"फाल्गुण",11}, - { "चैत्र", 12 },{ "चैत", 12 }, - }; - - /// - /// Finds the best matching Nepali month using sequence alignment algorithm - /// - /// The month name to match - /// Minimum similarity threshold (0.0 to 1.0) - /// The month number (1-12) if found, null otherwise - public static int? FindBestMatch(string input, double threshold = 0.6) - { - if (string.IsNullOrWhiteSpace(input)) - return null; - - string normalizedInput = input; - - // Try exact match first for performance - if (CanonicalMonthNames.TryGetValue(normalizedInput, out int exactMatch)) - return exactMatch; - - double bestScore = 0; - int? bestMatch = null; - - // Compare against all canonical forms using sequence alignment - foreach (var kvp in CanonicalMonthNames) - { - string candidate = kvp.Key; - double similarity = CalculateSequenceAlignment(normalizedInput, candidate); - - if (similarity > bestScore && similarity >= threshold) - { - bestScore = similarity; - bestMatch = kvp.Value; - } - } - - return bestMatch; - } - - /// - /// Calculates sequence alignment similarity between two strings using modified Needleman-Wunsch algorithm - /// - /// First string - /// Second string - /// Similarity score between 0.0 and 1.0 - private static double CalculateSequenceAlignment(string s1, string s2) - { - if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2)) - return 0; - - if (s1 == s2) - return 1.0; - - int m = s1.Length; - int n = s2.Length; - - int[,] dp = new int[m + 1, n + 1]; - - const int MATCH_SCORE = 3; - const int MISMATCH_PENALTY = -1; - const int GAP_PENALTY = -1; - - for (int i = 0; i <= m; i++) - dp[i, 0] = i * GAP_PENALTY; - - for (int j = 0; j <= n; j++) - dp[0, j] = j * GAP_PENALTY; - - for (int i = 1; i <= m; i++) - { - for (int j = 1; j <= n; j++) - { - char c1 = char.ToLower(s1[i - 1]); - char c2 = char.ToLower(s2[j - 1]); - - int matchScore; - if (c1 == c2) - { - matchScore = MATCH_SCORE; - } - else - { - matchScore = MISMATCH_PENALTY; - } - - int diagonal = dp[i - 1, j - 1] + matchScore; - int deletion = dp[i - 1, j] + GAP_PENALTY; - int insertion = dp[i, j - 1] + GAP_PENALTY; - - dp[i, j] = Math.Max(diagonal, Math.Max(deletion, insertion)); - } - } - - int alignmentScore = dp[m, n]; - int maxPossibleScore = Math.Max(m, n) * MATCH_SCORE; - int minPossibleScore = Math.Max(m, n) * GAP_PENALTY; - - double normalizedScore = - (double)(alignmentScore - minPossibleScore) / (maxPossibleScore - minPossibleScore); - return Math.Max(0, Math.Min(1, normalizedScore)); - } - - } } \ No newline at end of file From 4e934c1ddbe16de75053deec8e5cb73db1c5796a Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 13:33:10 +0200 Subject: [PATCH 6/7] chore: update project packages --- tests/NepDate.Tests/NepDate.Tests.csproj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/NepDate.Tests/NepDate.Tests.csproj b/tests/NepDate.Tests/NepDate.Tests.csproj index bf3776b..63482ea 100644 --- a/tests/NepDate.Tests/NepDate.Tests.csproj +++ b/tests/NepDate.Tests/NepDate.Tests.csproj @@ -9,13 +9,13 @@ - - - + + + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all From e2fe72eb33deac24d90f098d97f01437250b3f03 Mon Sep 17 00:00:00 2001 From: Sandip Chaudhary Date: Sat, 19 Jul 2025 13:35:31 +0200 Subject: [PATCH 7/7] refac: remove redundant normalizedInput var --- src/NepDate/NepaliMonthMatcher.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/NepDate/NepaliMonthMatcher.cs b/src/NepDate/NepaliMonthMatcher.cs index 06b8a84..ca635e9 100644 --- a/src/NepDate/NepaliMonthMatcher.cs +++ b/src/NepDate/NepaliMonthMatcher.cs @@ -49,11 +49,9 @@ public static class NepaliMonthMatcher { if (string.IsNullOrWhiteSpace(input)) return null; - - string normalizedInput = input; - + // Try exact match first for performance - if (CanonicalMonthNames.TryGetValue(normalizedInput, out int exactMatch)) + if (CanonicalMonthNames.TryGetValue(input, out int exactMatch)) return exactMatch; double bestScore = 0; @@ -63,7 +61,7 @@ public static class NepaliMonthMatcher foreach (var kvp in CanonicalMonthNames) { string candidate = kvp.Key; - double similarity = CalculateSequenceAlignment(normalizedInput, candidate); + double similarity = CalculateSequenceAlignment(input, candidate); if (similarity > bestScore && similarity >= threshold) {