diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a7d3ab7 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,15 @@ +name: Test +on: + push: + branches: + - master + pull_request: + types: [opened, synchronize] + +jobs: + test: + runs-on: ubuntu-20.04 + + steps: + - name: Run tests + run: make test diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e0d0b98 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +test: + pytest tests diff --git a/notebooks/__init__.py b/notebooks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index bc69451..bb83594 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -32,16 +32,17 @@ "outputs": [], "source": [ "char_aug = CharAug(\n", - " unit_prob=0.3, # Процент фразы к которой будут применены аугментации\n", - " min_aug=1, # Минимальное количество аугментаций\n", - " max_aug=5, # Максимальное количество аугментаций\n", - " mult_num=3, # Максимальное количество повторений символов (только для метода multiply)\n", + " unit_prob=0.3,\n", + " min_aug=1,\n", + " max_aug=5,\n", + " mult_num=3,\n", " random_seed=42,\n", - " lang=\"eng\",\n", + " lang=\"rus\",\n", " platform=\"pc\",\n", " )\n", "\n", - "text = \"Screw you guys, I am going home. (c)\"" + "# text = \"Screw you guys, I am going home. (c)\"\n", + "text = \"Привет, как дела?\"" ] }, { @@ -56,7 +57,7 @@ { "data": { "text/plain": [ - "'Scorew yotu guys,u hI kam going home. (c)'" + "'Пнривыеут, как ждела?с'" ] }, "execution_count": 3, @@ -102,7 +103,7 @@ { "data": { "text/plain": [ - "'Screw YoU guys, I am going Home. (C)'" + "'ПРивЕт, каК дела?'" ] }, "execution_count": 5, @@ -127,7 +128,7 @@ { "data": { "text/plain": [ - "'Sedew you guya, I am going home. (c)'" + "'Пёевет, как дида?'" ] }, "execution_count": 6, @@ -152,7 +153,7 @@ { "data": { "text/plain": [ - "'Sxrew you gugs, I am going home. (x)'" + "'Привет, евк дела?'" ] }, "execution_count": 7, @@ -173,7 +174,7 @@ { "data": { "text/plain": [ - "'crew you guys Iam goinghme. (c)'" + "'Приеткк дла?'" ] }, "execution_count": 8, @@ -194,7 +195,7 @@ { "data": { "text/plain": [ - "'Screw you ughuys, I vam gcoing hxome. (c)'" + "'Пцриувет, кбак дьелба?'" ] }, "execution_count": 9, @@ -215,7 +216,7 @@ { "data": { "text/plain": [ - "'Screw yyou guyss, I am ggoinng home. (c)'" + "'Приивеет, какк дела?'" ] }, "execution_count": 10, @@ -236,7 +237,7 @@ { "data": { "text/plain": [ - "'Srcewy ou guys,I am oging hmoe. (c)'" + "'рПвие,т кка длеа?'" ] }, "execution_count": 11, @@ -257,16 +258,16 @@ { "data": { "text/plain": [ - "['Screw you guyss, I am going home. (c)',\n", - " 'Screw eou guys, I em goifg home. (c)',\n", + "['Scre you guy, Iam gng home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw you yuys, I am goibg hone. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw youg uys, Iam igong hmoe. (c)',\n", + " 'ScRew You guys6 I Am going home. (c)',\n", + " 'Screwyu guys, I am goig hoe. ()',\n", + " 'Screw you buys, I am go9mg ho,e. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Srcwe you gyus, Ia m giong home. (c)',\n", - " 'Screw you guys, I am going home. (c)']" + " 'Screw you guys, I am going home. (c)',\n", + " 'ScreW you guYs, I Am goIng home. (c0']" ] }, "execution_count": 12, @@ -280,6 +281,40 @@ "char_aug.aug_batch(text_list, batch_prob=0.5)" ] }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Привет, как дела?',\n", + " 'Питвет, как днда?',\n", + " 'Пииват, ка5 дела?',\n", + " 'Привет, как дела?',\n", + " 'Привет, как дела?']" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = \"Привет, как дела?\"\n", + "char_aug.aug_batch(\n", + " batch=[text] * 5,\n", + " batch_prob=0.5, \n", + " action=\"typo\"\n", + " )\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, { "cell_type": "code", "execution_count": 13, @@ -289,15 +324,15 @@ "data": { "text/plain": [ "['Screw you guys, I am going home. (c)',\n", + " 'Screw youu guys, II am goingg home. (c)',\n", + " 'Scrreew you guys, I am going home. (c)',\n", + " 'Screww you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw you guyss, I am goingg home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw you guys, II am going home. (c)',\n", - " 'Screw you guys, II am going hhome. (c))',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Scrrew you guys, I am goingg home. (c)',\n", - " 'Screw you guys, I am going home. (c)']" + " 'Screw you guys, I am going home. (c)',\n", + " 'Screw you guys, I am goinng home. (c)']" ] }, "execution_count": 13, @@ -335,15 +370,16 @@ "outputs": [], "source": [ "word_aug = WordAug(\n", - " unit_prob=0.4, # Процент фразы к которой будут применены аугментации\n", - " min_aug=1, # Минимальное количество аугментаций\n", - " max_aug=5, # Максимальное количество аугментаций\n", + " unit_prob=0.4,\n", + " min_aug=1,\n", + " max_aug=5,\n", " random_seed=42,\n", - " lang=\"eng\",\n", + " lang=\"rus\",\n", " platform=\"pc\",\n", " )\n", "\n", - "text = \"Screw you guys, I am going home. (c)\"" + "# text = \"Screw you guys, I am going home. (c)\"\n", + "text = \"Привет, как дела?\"" ] }, { @@ -354,7 +390,7 @@ { "data": { "text/plain": [ - "'S c r e w y o u guys, I am g o i n g home. (c)'" + "'Привет, как д е л а ?'" ] }, "execution_count": 16, @@ -403,7 +439,7 @@ { "data": { "text/plain": [ - "'Screw to guys, I to going com. (c)'" + "'пркет, как дела?'" ] }, "execution_count": 18, @@ -424,7 +460,7 @@ { "data": { "text/plain": [ - "'you I am home. (c)'" + "'как дела?'" ] }, "execution_count": 19, @@ -445,7 +481,7 @@ { "data": { "text/plain": [ - "'Screw I guys, am home. going you (c)'" + "'дела? как Привет,'" ] }, "execution_count": 20, @@ -466,7 +502,7 @@ { "data": { "text/plain": [ - "'like Screw you guys, I am going completely home. by the way (c)'" + "'Привет, скажем как дела?'" ] }, "execution_count": 21, @@ -487,7 +523,7 @@ { "data": { "text/plain": [ - "'Screw You guys, i Am going home. (c)'" + "'привет, как дела?'" ] }, "execution_count": 22, @@ -508,7 +544,7 @@ { "data": { "text/plain": [ - "'Screw you guys, I am going home. (c)'" + "'👉, как дела?'" ] }, "execution_count": 23, @@ -529,7 +565,7 @@ { "data": { "text/plain": [ - "'Screw y o u guys, I am going h o m e . (c)'" + "'П р и в е т , как дела?'" ] }, "execution_count": 24, @@ -550,7 +586,7 @@ { "data": { "text/plain": [ - "'Scren you guys, I am going home. (c)'" + "'Привет, как дела?'" ] }, "execution_count": 25, @@ -572,15 +608,15 @@ "data": { "text/plain": [ "['Screw you guys, I am going home. (c)',\n", - " 'I am guys, Screw you going (c) home.',\n", - " 'Screw you am going (c)',\n", + " 'S c r e w you guys, I am going h o m e . ( c )',\n", + " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", + " 'Screw guys, am going (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw you gugs, I am going hsme. (c)',\n", - " 'Scerw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'completely Screw you guys, I literally am going you know home. (c)']" + " 'Screw am I you guys, going home. (c)',\n", + " 'Screw you guys, I am going home. (c)']" ] }, "execution_count": 26, @@ -602,15 +638,15 @@ "data": { "text/plain": [ "['Screw you guys, I am going home. (c)',\n", - " 'screwed your guys, I am long home. (c)',\n", + " 'Screw you guys, I am going home. c',\n", + " 'Screw you guys, I am going home. (c)',\n", + " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw you boys, im am going hoem. (c)',\n", - " 'Screw yo guy, to am going home. (c)',\n", - " 'Screw do guys, I i guig home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", " 'Screw you guys, I am going home. (c)',\n", - " 'Screw so guys, I am going homes. wo']" + " 'Screw you guys, I am going home. (c)',\n", + " 'Screw you guys, I am going home. (c)']" ] }, "execution_count": 27, diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_charaug.py b/tests/test_charaug.py new file mode 100644 index 0000000..ef25623 --- /dev/null +++ b/tests/test_charaug.py @@ -0,0 +1,124 @@ +from augmentex import CharAug + + +char_aug_rus_pc = CharAug( + unit_prob=0.3, + min_aug=1, + max_aug=5, + mult_num=3, + random_seed=42, + lang="rus", + platform="pc", +) +char_aug_rus_mobile = CharAug( + unit_prob=0.3, + min_aug=1, + max_aug=5, + mult_num=3, + random_seed=42, + lang="rus", + platform="mobile", +) +char_aug_eng_pc = CharAug( + unit_prob=0.3, + min_aug=1, + max_aug=5, + mult_num=3, + random_seed=42, + lang="eng", + platform="pc", +) +char_aug_eng_mobile = CharAug( + unit_prob=0.3, + min_aug=1, + max_aug=5, + mult_num=3, + random_seed=42, + lang="eng", + platform="mobile", +) + +text_rus = "Привет, как дела?" +text_eng = "I am going home." + + +def test_methods() -> None: + assert char_aug_rus_pc.actions_list == [ + "shift", "orfo", "typo", "delete", "multiply", "swap", "insert"] + + +def test_shift() -> None: + assert "приВЕт, как дела?" == char_aug_rus_pc.augment( + text=text_rus, action="shift") + assert "ПРивЕт, каК дела?" == char_aug_rus_mobile.augment( + text=text_rus, action="shift") + assert "i Am gOinG home." == char_aug_eng_pc.augment( + text=text_eng, action="shift") + assert "i aM going hoMe." == char_aug_eng_mobile.augment( + text=text_eng, action="shift") + + +def test_orfo() -> None: + assert "Приыет, еак дела?" == char_aug_rus_pc.augment( + text=text_rus, action="orfo") + assert "Привит, кек доло?" == char_aug_rus_mobile.augment( + text=text_rus, action="orfo") + assert "I om going hamt." == char_aug_eng_pc.augment( + text=text_eng, action="orfo") + assert "I wm aoing hope." == char_aug_eng_mobile.augment( + text=text_eng, action="orfo") + + +def test_typo() -> None: + assert "Ппивет, каа деоа?" == char_aug_rus_pc.augment( + text=text_rus, action="typo") + assert "Приаео, квк днла?" == char_aug_rus_mobile.augment( + text=text_rus, action="typo") + assert "I am goijg bime." == char_aug_eng_pc.augment( + text=text_eng, action="typo") + assert "I am gpijg homw." == char_aug_eng_mobile.augment( + text=text_eng, action="typo") + + +def test_delete() -> None: + assert "Првет, к дл?" == char_aug_rus_pc.augment( + text=text_rus, action="delete") + assert "ивет какдела" == char_aug_rus_mobile.augment( + text=text_rus, action="delete") + assert "Iamgong hme." == char_aug_eng_pc.augment( + text=text_eng, action="delete") + assert "Iam gng hoe." == char_aug_eng_mobile.augment( + text=text_eng, action="delete") + + +def test_insert() -> None: + assert "Приветв, кщако денлъа?" == char_aug_rus_pc.augment( + text=text_rus, action="insert") + assert "Привретё, какг удела?з" == char_aug_rus_mobile.augment( + text=text_rus, action="insert") + assert "I am goinyg rhnomze." == char_aug_eng_pc.augment( + text=text_eng, action="insert") + assert "I aim goingz uhome.b" == char_aug_eng_mobile.augment( + text=text_eng, action="insert") + + +def test_multiply() -> None: + assert "Привеетт, как дела?" == char_aug_rus_pc.augment( + text=text_rus, action="multiply") + assert "Привет, как деелла?" == char_aug_rus_mobile.augment( + text=text_rus, action="multiply") + assert "I am going homme." == char_aug_eng_pc.augment( + text=text_eng, action="multiply") + assert "I am going home." == char_aug_eng_mobile.augment( + text=text_eng, action="multiply") + + +def test_swap() -> None: + assert "Првие, ткка деал?" == char_aug_rus_pc.augment( + text=text_rus, action="swap") + assert "Пиревт, какд лае?" == char_aug_rus_mobile.augment( + text=text_rus, action="swap") + assert "I am ginogh moe." == char_aug_eng_pc.augment( + text=text_eng, action="swap") + assert "I am ogngih ome." == char_aug_eng_mobile.augment( + text=text_eng, action="swap") diff --git a/tests/test_wordaug.py b/tests/test_wordaug.py new file mode 100644 index 0000000..aa40564 --- /dev/null +++ b/tests/test_wordaug.py @@ -0,0 +1,131 @@ +from augmentex import WordAug + + +word_aug_rus_pc = WordAug( + unit_prob=0.4, + min_aug=1, + max_aug=5, + random_seed=42, + lang="rus", + platform="pc", +) +word_aug_rus_mobile = WordAug( + unit_prob=0.4, + min_aug=1, + max_aug=5, + random_seed=42, + lang="rus", + platform="mobile", +) +word_aug_eng_pc = WordAug( + unit_prob=0.4, + min_aug=1, + max_aug=5, + random_seed=42, + lang="eng", + platform="pc", +) +word_aug_eng_mobile = WordAug( + unit_prob=0.4, + min_aug=1, + max_aug=5, + random_seed=42, + lang="eng", + platform="mobile", +) + +text_rus = "Привет, как дела?" +text_eng = "I am going home." + + +def test_methods() -> None: + assert word_aug_rus_pc.actions_list == [ + "replace", "delete", "swap", "stopword", "reverse", "text2emoji", "split", "ngram"] + + +def test_replace() -> None: + assert "Привет, как делло?" == word_aug_rus_pc.augment( + text=text_rus, action="replace") + assert "Привет, ккак дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="replace") + assert "I am gone home." == word_aug_eng_pc.augment( + text=text_eng, action="replace") + assert "I m going home." == word_aug_eng_mobile.augment( + text=text_eng, action="replace") + + +def test_delete() -> None: + assert "как дела?" == word_aug_rus_pc.augment( + text=text_rus, action="delete") + assert "Привет, как" == word_aug_rus_mobile.augment( + text=text_rus, action="delete") + assert "I am going" == word_aug_eng_pc.augment( + text=text_eng, action="delete") + assert "am going home." == word_aug_eng_mobile.augment( + text=text_eng, action="delete") + + +def test_swap() -> None: + assert "как Привет, дела?" == word_aug_rus_pc.augment( + text=text_rus, action="swap") + assert "как Привет, дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="swap") + assert "I am going home." == word_aug_eng_pc.augment( + text=text_eng, action="swap") + assert "I am going home." == word_aug_eng_mobile.augment( + text=text_eng, action="swap") + + +def test_stopword() -> None: + assert "Привет, как в общем-то дела?" == word_aug_rus_pc.augment( + text=text_rus, action="stopword") + assert "Привет, хотя как дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="stopword") + assert "totally I am going home." == word_aug_eng_pc.augment( + text=text_eng, action="stopword") + assert "I am going okay home." == word_aug_eng_mobile.augment( + text=text_eng, action="stopword") + + +def test_reverse() -> None: + assert "Привет, Как дела?" == word_aug_rus_pc.augment( + text=text_rus, action="reverse") + assert "Привет, как Дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="reverse") + assert "I am going Home." == word_aug_eng_pc.augment( + text=text_eng, action="reverse") + assert "I am Going home." == word_aug_eng_mobile.augment( + text=text_eng, action="reverse") + + +def test_text2emoji() -> None: + assert "Привет, как дела?" == word_aug_rus_pc.augment( + text=text_rus, action="text2emoji") + assert "✋, как дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="text2emoji") + assert "ℹ am going home." == word_aug_eng_pc.augment( + text=text_eng, action="text2emoji") + assert "I am going home." == word_aug_eng_mobile.augment( + text=text_eng, action="text2emoji") + + +def test_split() -> None: + assert "Привет, как д е л а ?" == word_aug_rus_pc.augment( + text=text_rus, action="split") + assert "Привет, к а к дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="split") + assert "I am going home." == word_aug_eng_pc.augment( + text=text_eng, action="split") + assert "I am g o i n g home." == word_aug_eng_mobile.augment( + text=text_eng, action="split") + + +def test_ngram() -> None: + assert "Привет, как дела?" == word_aug_rus_pc.augment( + text=text_rus, action="ngram") + assert "Пбриет, как дела?" == word_aug_rus_mobile.augment( + text=text_rus, action="ngram") + assert "I am going hoom." == word_aug_eng_pc.augment( + text=text_eng, action="ngram") + assert "I am going home." == word_aug_eng_mobile.augment( + text=text_eng, action="ngram")