Skip to content

Commit 329812e

Browse files
authored
Merge pull request #483 from PyThaiNLP/korakot-patch-1
Add method to remove a word from trie
2 parents c932d3f + aaced0b commit 329812e

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

pythainlp/util/trie.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,32 @@ def add(self, word: str) -> None:
4040
cur = child
4141
cur.end = True
4242

43+
def remove(self, word: str) -> None:
44+
"""
45+
Remove a word from the trie.
46+
If the word is not found, do nothing.
47+
48+
:param str text: a word
49+
"""
50+
# remove from set first
51+
if word not in self.words:
52+
return
53+
self.words.remove(word)
54+
# then remove from nodes
55+
parent = self.root
56+
data = [] # track path to leaf
57+
for ch in word:
58+
child = parent.children[ch]
59+
data.append((parent, child, ch))
60+
parent = child
61+
# remove the last one
62+
child.end = False
63+
# prune up the tree
64+
for parent, child, ch in reversed(data):
65+
if child.end or child.children:
66+
break
67+
del parent.children[ch] # remove from parent dict
68+
4369
def prefixes(self, text: str) -> List[str]:
4470
"""
4571
List all possible words from first sequence of characters in a word.
@@ -71,11 +97,11 @@ def __len__(self) -> int:
7197

7298
def dict_trie(dict_source: Union[str, Iterable[str], Trie]) -> Trie:
7399
"""
74-
Create a dictionary trie from a string or an iterable.
100+
Create a dictionary trie from a file or an iterable.
75101
76102
:param str|Iterable[str]|pythainlp.util.Trie dict_source: a path to
77103
dictionary file or a list of words or a pythainlp.util.Trie object
78-
:return: a trie object created from a dictionary input
104+
:return: a trie object
79105
:rtype: pythainlp.util.Trie
80106
"""
81107
trie = None

tests/test_util.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,15 @@ def test_trie(self):
388388
self.assertEqual(len(trie), 4)
389389
self.assertEqual(len(trie.prefixes("ทดสอบ")), 2)
390390

391+
trie.remove("ทบ")
392+
trie.remove("ทด")
393+
self.assertEqual(len(trie), 2)
394+
395+
trie = Trie([])
396+
self.assertEqual(len(trie), 0)
397+
trie.remove("หมด")
398+
self.assertEqual(len(trie), 0)
399+
391400
self.assertIsNotNone(dict_trie(Trie(["ลอง", "ลาก"])))
392401
self.assertIsNotNone(dict_trie(("ลอง", "สร้าง", "Trie", "ลน")))
393402
self.assertIsNotNone(dict_trie(["ลอง", "สร้าง", "Trie", "ลน"]))

0 commit comments

Comments
 (0)