From 6502945f5cb30f2e3cd9d72424188eed8916d171 Mon Sep 17 00:00:00 2001 From: Andrey Kiselev Date: Fri, 12 Jun 2020 09:26:08 +0300 Subject: [PATCH 1/4] HW3 is completed --- hw03_frequency_analysis/go.mod | 2 +- hw03_frequency_analysis/top.go | 48 +++++++++++++++++++++++++++-- hw03_frequency_analysis/top_test.go | 19 +++++++++++- 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/hw03_frequency_analysis/go.mod b/hw03_frequency_analysis/go.mod index 1ff43eb..2b6e981 100644 --- a/hw03_frequency_analysis/go.mod +++ b/hw03_frequency_analysis/go.mod @@ -1,4 +1,4 @@ -module github.com/fixme_my_friend/hw03_frequency_analysis +module github.com/ezhk/golang-learning/hw03_frequency_analysis go 1.14 diff --git a/hw03_frequency_analysis/top.go b/hw03_frequency_analysis/top.go index 7cd719b..dfd58b3 100644 --- a/hw03_frequency_analysis/top.go +++ b/hw03_frequency_analysis/top.go @@ -1,6 +1,48 @@ package hw03_frequency_analysis //nolint:golint,stylecheck -func Top10(_ string) []string { - // Place your code here - return nil +import ( + "regexp" + "sort" + "strings" +) + +var SplitFilter = regexp.MustCompile(`[\s?!.;,]`) +var IgnoredSymbols = regexp.MustCompile(`[-]`) + +type WordFrequency struct { + word string + RepeatCounter int +} + +func Top10(inputLine string) []string { + const MaxLength = 10 + mostFrequentWords := make([]string, 0, MaxLength) + + // prepare map with counter + freqMap := make(map[string]int) + for _, word := range SplitFilter.Split(inputLine, -1) { + word = IgnoredSymbols.ReplaceAllString(word, "") + if len(word) > 0 { + freqMap[strings.ToLower(word)]++ + } + } + + // convert map to slice wordFrequency for next order + wordSlice := make([]WordFrequency, 0, len(freqMap)) + for word, counter := range freqMap { + wordSlice = append(wordSlice, WordFrequency{word, counter}) + } + sort.Slice(wordSlice, func(i, j int) bool { + return wordSlice[i].RepeatCounter > wordSlice[j].RepeatCounter + }) + + // store data to result slice + for _, value := range wordSlice { + if len(mostFrequentWords) >= MaxLength { + break + } + mostFrequentWords = append(mostFrequentWords, value.word) + } + + return mostFrequentWords } diff --git a/hw03_frequency_analysis/top_test.go b/hw03_frequency_analysis/top_test.go index 6ceef08..74c13df 100644 --- a/hw03_frequency_analysis/top_test.go +++ b/hw03_frequency_analysis/top_test.go @@ -7,7 +7,7 @@ import ( ) // Change to true if needed -var taskWithAsteriskIsCompleted = false +var taskWithAsteriskIsCompleted = true var text = `Как видите, он спускается по лестнице вслед за своим другом Кристофером Робином, головой вниз, пересчитывая @@ -58,3 +58,20 @@ func TestTop10(t *testing.T) { } }) } + +func TestExtendedTop10(t *testing.T) { + type test struct { + text string + expected []string + } + for _, testCase := range []test{ + {"какой-то", []string{"какойто"}}, + {"кот,собака собака", []string{"собака", "кот"}}, + // check ingoring "-" in next test + {"12 1-2 5", []string{"12", "5"}}, + {"1\n, ;1 -1 \t2,2 3.", []string{"1", "2", "3"}}, + } { + result := Top10(testCase.text) + assert.Equal(t, result, testCase.expected) + } +} From b78e537b004478e28807a3f4de57bd71039fefe4 Mon Sep 17 00:00:00 2001 From: Andrey Kiselev Date: Fri, 12 Jun 2020 09:28:27 +0300 Subject: [PATCH 2/4] update comments --- hw03_frequency_analysis/top.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw03_frequency_analysis/top.go b/hw03_frequency_analysis/top.go index dfd58b3..5a6b2d8 100644 --- a/hw03_frequency_analysis/top.go +++ b/hw03_frequency_analysis/top.go @@ -27,7 +27,7 @@ func Top10(inputLine string) []string { } } - // convert map to slice wordFrequency for next order + // convert map to slice wordFrequency for next sorting wordSlice := make([]WordFrequency, 0, len(freqMap)) for word, counter := range freqMap { wordSlice = append(wordSlice, WordFrequency{word, counter}) @@ -36,7 +36,7 @@ func Top10(inputLine string) []string { return wordSlice[i].RepeatCounter > wordSlice[j].RepeatCounter }) - // store data to result slice + // store data for _, value := range wordSlice { if len(mostFrequentWords) >= MaxLength { break From 95d1757b55648ce9176a7a255eea7370196bebec Mon Sep 17 00:00:00 2001 From: Andrey Kiselev Date: Sat, 13 Jun 2020 22:29:55 +0300 Subject: [PATCH 3/4] use FieldsFunc method and benchmark optimizations --- hw03_frequency_analysis/top.go | 12 ++++++++---- hw03_frequency_analysis/top_test.go | 6 ++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/hw03_frequency_analysis/top.go b/hw03_frequency_analysis/top.go index 5a6b2d8..647f193 100644 --- a/hw03_frequency_analysis/top.go +++ b/hw03_frequency_analysis/top.go @@ -4,10 +4,10 @@ import ( "regexp" "sort" "strings" + "unicode" ) -var SplitFilter = regexp.MustCompile(`[\s?!.;,]`) -var IgnoredSymbols = regexp.MustCompile(`[-]`) +var ignoredSymbols = regexp.MustCompile(`[-]`) type WordFrequency struct { word string @@ -20,8 +20,8 @@ func Top10(inputLine string) []string { // prepare map with counter freqMap := make(map[string]int) - for _, word := range SplitFilter.Split(inputLine, -1) { - word = IgnoredSymbols.ReplaceAllString(word, "") + inputLine = ignoredSymbols.ReplaceAllString(inputLine, "") + for _, word := range strings.FieldsFunc(inputLine, splitFunc) { if len(word) > 0 { freqMap[strings.ToLower(word)]++ } @@ -46,3 +46,7 @@ func Top10(inputLine string) []string { return mostFrequentWords } + +func splitFunc(char rune) bool { + return unicode.IsPunct(char) || unicode.IsSpace(char) +} diff --git a/hw03_frequency_analysis/top_test.go b/hw03_frequency_analysis/top_test.go index 74c13df..03830c4 100644 --- a/hw03_frequency_analysis/top_test.go +++ b/hw03_frequency_analysis/top_test.go @@ -75,3 +75,9 @@ func TestExtendedTop10(t *testing.T) { assert.Equal(t, result, testCase.expected) } } + +func BenchmarkTop(b *testing.B) { + for i := 0; i < b.N; i++ { + Top10(text) + } +} From 0c3527f75cf7ce974130249d481d40a4e978c958 Mon Sep 17 00:00:00 2001 From: Andrey Kiselev Date: Sun, 14 Jun 2020 20:31:50 +0300 Subject: [PATCH 4/4] remove zero len condition --- hw03_frequency_analysis/top.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw03_frequency_analysis/top.go b/hw03_frequency_analysis/top.go index 647f193..29cb5e8 100644 --- a/hw03_frequency_analysis/top.go +++ b/hw03_frequency_analysis/top.go @@ -22,9 +22,7 @@ func Top10(inputLine string) []string { freqMap := make(map[string]int) inputLine = ignoredSymbols.ReplaceAllString(inputLine, "") for _, word := range strings.FieldsFunc(inputLine, splitFunc) { - if len(word) > 0 { - freqMap[strings.ToLower(word)]++ - } + freqMap[strings.ToLower(word)]++ } // convert map to slice wordFrequency for next sorting