diff --git a/README.md b/README.md
index 33240b65..5504daaa 100644
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@ This is an on-going development so many improvements are still being made. Comme
 
 ## Environments
 - Python: 3.10+
-- CUDA: 11.8, 12.1 (if training neural networks by GPU)
-- Pytorch: 2.0.1+
+- CUDA: 11.8, 12.1, 12.6 (if training neural networks by GPU)
+- Pytorch: 2.3.0+
 
 If you have a different version of CUDA, follow the installation instructions for PyTorch LTS at their [website](https://pytorch.org/).
 
diff --git a/docs/cli/nn.rst b/docs/cli/nn.rst
index efe2f5f2..102231fa 100644
--- a/docs/cli/nn.rst
+++ b/docs/cli/nn.rst
@@ -77,7 +77,7 @@ If a model was trained before by this package, the training procedure can start
 
 To use your own word embeddings or vocabulary set, specify the following parameters:
 
-- **embed_file**: choose one of the pretrained embeddings defined in `torchtext <https://pytorch.org/text/0.9.0/vocab.html#torchtext.vocab.Vocab.load_vectors>`_ or specify the path to your word embeddings with each line containing a word followed by its vectors. Example:
+- **embed_file**: choose one of the pretrained embeddings: `glove.6B.50d`, `glove.6B.100d`, `glove.6B.200d`, `glove.6B.300d`, `glove.42B.300d`, `glove.840B.300d`, or specify the path to your word embeddings with each line containing a word followed by its vectors. Example:
 
 .. code-block::
 
diff --git a/docs/examples/plot_KimCNN_quickstart.py b/docs/examples/plot_KimCNN_quickstart.py
index 39efd6ba..49ae1f0d 100644
--- a/docs/examples/plot_KimCNN_quickstart.py
+++ b/docs/examples/plot_KimCNN_quickstart.py
@@ -32,7 +32,7 @@
 # To run KimCNN, LibMultiLabel tokenizes documents and uses an embedding vector for each word.
 # Thus, ``tokenize_text=True`` is set.
 #
-# We choose ``glove.6B.300d`` from torchtext as embedding vectors.
+# We choose ``glove.6B.300d`` as embedding vectors.
 
 datasets = load_datasets("data/rcv1/train.txt", "data/rcv1/test.txt", tokenize_text=True)
 classes = load_or_build_label(datasets)
diff --git a/libmultilabel/nn/attentionxml.py b/libmultilabel/nn/attentionxml.py
index b54776ac..747f1b05 100644
--- a/libmultilabel/nn/attentionxml.py
+++ b/libmultilabel/nn/attentionxml.py
@@ -489,7 +489,7 @@ def reformat_text(self, dataset):
         # Convert words to numbers according to their indices in word_dict. Then pad each instance to a certain length.
         encoded_text = list(
             map(
-                lambda text: torch.tensor([self.word_dict[word] for word in text], dtype=torch.int64)
+                lambda text: torch.tensor([self.word_dict.get(word, self.word_dict[UNK]) for word in text], dtype=torch.int64)
                 if text
                 else torch.tensor([self.word_dict[UNK]], dtype=torch.int64),
                 [instance["text"][: self.max_seq_length] for instance in dataset],
diff --git a/libmultilabel/nn/data_utils.py b/libmultilabel/nn/data_utils.py
index 1d48bb06..950e9669 100644
--- a/libmultilabel/nn/data_utils.py
+++ b/libmultilabel/nn/data_utils.py
@@ -1,7 +1,12 @@
 import csv
 import gc
 import logging
+import os
+import re
 import warnings
+import zipfile
+from urllib.request import urlretrieve
+from collections import Counter, OrderedDict
 
 import pandas as pd
 import torch
@@ -11,7 +16,6 @@
 from sklearn.preprocessing import MultiLabelBinarizer
 from torch.nn.utils.rnn import pad_sequence
 from torch.utils.data import Dataset
-from torchtext.vocab import build_vocab_from_iterator, pretrained_aliases, Vocab
 from tqdm import tqdm
 
 transformers.logging.set_verbosity_error()
@@ -19,6 +23,14 @@
 
 UNK = "<unk>"
 PAD = "<pad>"
+GLOVE_WORD_EMBEDDING = {
+    "glove.42B.300d",
+    "glove.840B.300d",
+    "glove.6B.50d",
+    "glove.6B.100d",
+    "glove.6B.200d",
+    "glove.6B.300d",
+}
 
 
 class TextDataset(Dataset):
@@ -31,8 +43,7 @@ class TextDataset(Dataset):
         add_special_tokens (bool, optional): Whether to add the special tokens. Defaults to True.
         tokenizer (transformers.PreTrainedTokenizerBase, optional): HuggingFace's tokenizer of
             the transformer-based pretrained language model. Defaults to None.
-        word_dict (torchtext.vocab.Vocab, optional): A vocab object for word tokenizer to
-            map tokens to indices. Defaults to None.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
     """
 
     def __init__(
@@ -55,7 +66,7 @@ def __init__(
         self.num_classes = len(self.classes)
         self.label_binarizer = MultiLabelBinarizer().fit([classes])
 
-        if not isinstance(self.word_dict, Vocab) ^ isinstance(self.tokenizer, transformers.PreTrainedTokenizerBase):
+        if not isinstance(self.word_dict, dict) ^ isinstance(self.tokenizer, transformers.PreTrainedTokenizerBase):
             raise ValueError("Please specify exactly one of word_dict or tokenizer")
 
     def __len__(self):
@@ -71,7 +82,7 @@ def __getitem__(self, index):
             else:
                 input_ids = self.tokenizer.encode(data["text"], add_special_tokens=False)
         else:
-            input_ids = [self.word_dict[word] for word in data["text"]]
+            input_ids = [self.word_dict.get(word, self.word_dict[UNK]) for word in data["text"]]
         return {
             "text": torch.LongTensor(input_ids[: self.max_seq_length]),
             "label": torch.IntTensor(self.label_binarizer.transform([data["label"]])[0]),
@@ -128,8 +139,7 @@ def get_dataset_loader(
         add_special_tokens (bool, optional): Whether to add the special tokens. Defaults to True.
         tokenizer (transformers.PreTrainedTokenizerBase, optional): HuggingFace's tokenizer of
             the transformer-based pretrained language model. Defaults to None.
-        word_dict (torchtext.vocab.Vocab, optional): A vocab object for word tokenizer to
-            map tokens to indices. Defaults to None.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
 
     Returns:
         torch.utils.data.DataLoader: A pytorch DataLoader.
@@ -154,6 +164,7 @@ def _load_raw_data(data, is_test=False, tokenize_text=True, remove_no_label_data
     Args:
         data (Union[str, pandas,.Dataframe]): Training, test, or validation data in file or dataframe.
         is_test (bool, optional): Whether the data is for test or not. Defaults to False.
+        tokenize_text (bool, optional): Whether to tokenize text. Defaults to True.
         remove_no_label_data (bool, optional): Whether to remove training/validation instances that have no labels.
             This is effective only when is_test=False. Defaults to False.
 
@@ -265,20 +276,19 @@ def load_or_build_text_dict(
 ):
     """Build or load the vocabulary from the training dataset or the predefined `vocab_file`.
     The pretrained embedding can be either from a self-defined `embed_file` or from one of
-    the vectors defined in torchtext.vocab.pretrained_aliases
-    (https://github.com/pytorch/text/blob/main/torchtext/vocab/vectors.py).
+    the vectors: `glove.6B.50d`, `glove.6B.100d`, `glove.6B.200d`, `glove.6B.300d`, `glove.42B.300d`, or `glove.840B.300d`.
 
     Args:
         dataset (list): List of training instances with index, label, and tokenized text.
         vocab_file (str, optional): Path to a file holding vocabuaries. Defaults to None.
         min_vocab_freq (int, optional): The minimum frequency needed to include a token in the vocabulary. Defaults to 1.
-        embed_file (str): Path to a file holding pre-trained embeddings.
+        embed_file (str): Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding. Defaults to None.
         embed_cache_dir (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
         silent (bool, optional): Enable silent mode. Defaults to False.
         normalize_embed (bool, optional): Whether the embeddings of each word is normalized to a unit vector. Defaults to False.
 
     Returns:
-        tuple[torchtext.vocab.Vocab, torch.Tensor]: A vocab object which maps tokens to indices and the pre-trained word vectors of shape (vocab_size, embed_dim).
+        tuple[dict, torch.Tensor]: A dictionary which maps tokens to indices and the pre-trained word vectors of shape (vocab_size, embed_dim).
     """
     if vocab_file:
         logging.info(f"Load vocab from {vocab_file}")
@@ -286,14 +296,14 @@ def load_or_build_text_dict(
             vocab_list = [[vocab.strip() for vocab in fp.readlines()]]
         # Keep PAD index 0 to align `padding_idx` of
         # class Embedding in libmultilabel.nn.networks.modules.
-        vocabs = build_vocab_from_iterator(vocab_list, min_freq=1, specials=[PAD, UNK])
+        word_dict = _build_word_dict(vocab_list, min_vocab_freq=1, specials=[PAD, UNK])
     else:
         vocab_list = [set(data["text"]) for data in dataset]
-        vocabs = build_vocab_from_iterator(vocab_list, min_freq=min_vocab_freq, specials=[PAD, UNK])
-    vocabs.set_default_index(vocabs[UNK])
-    logging.info(f"Read {len(vocabs)} vocabularies.")
+        word_dict = _build_word_dict(vocab_list, min_vocab_freq=min_vocab_freq, specials=[PAD, UNK])
+
+    logging.info(f"Read {len(word_dict)} vocabularies.")
 
-    embedding_weights = get_embedding_weights_from_file(vocabs, embed_file, silent, embed_cache_dir)
+    embedding_weights = get_embedding_weights_from_file(word_dict, embed_file, silent, embed_cache_dir)
 
     if normalize_embed:
         # To have better precision for calculating the normalization, we convert the original
@@ -306,7 +316,41 @@ def load_or_build_text_dict(
             embedding_weights[i] = vector / float(torch.linalg.norm(vector) + 1e-6)
         embedding_weights = embedding_weights.float()
 
-    return vocabs, embedding_weights
+    return word_dict, embedding_weights
+
+
+def _build_word_dict(vocab_list, min_vocab_freq=1, specials=None):
+    r"""Build word dictionary, modified from `torchtext.vocab.build-vocab-from-iterator`
+    (https://docs.pytorch.org/text/stable/vocab.html#build-vocab-from-iterator)
+
+    Args:
+        vocab_list: List of words.
+        min_vocab_freq (int, optional): The minimum frequency needed to include a token in the vocabulary. Defaults to 1.
+        specials: Special tokens (e.g., <unk>, <pad>) to add. Defaults to None.
+
+    Returns:
+        dict: A dictionary which maps tokens to indices.
+    """
+
+    counter = Counter()
+    for tokens in vocab_list:
+        counter.update(tokens)
+
+    # sort by descending frequency, then lexicographically
+    sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
+    ordered_dict = OrderedDict(sorted_by_freq_tuples)
+
+    # add special tokens at the beginning
+    tokens = specials or []
+    for token, freq in ordered_dict.items():
+        if freq >= min_vocab_freq:
+            tokens.append(token)
+
+    # build token to indices dict
+    word_dict = dict()
+    for idx, token in enumerate(tokens):
+        word_dict[token] = idx
+    return word_dict
 
 
 def load_or_build_label(datasets, label_file=None, include_test_labels=False):
@@ -344,70 +388,84 @@ def load_or_build_label(datasets, label_file=None, include_test_labels=False):
     return classes
 
 
-def get_embedding_weights_from_file(word_dict, embed_file, silent=False, cache=None):
-    """If the word exists in the embedding file, load the pretrained word embedding.
-    Otherwise, assign a zero vector to that word.
+def get_embedding_weights_from_file(word_dict, embed_file, silent=False, cache_dir=None):
+    """Obtain the word embeddings from file. If the word exists in the embedding file,
+    load the pretrained word embedding. Otherwise, assign a zero vector to that word.
+    If the given `embed_file` is the name of a pretrained GloVe embedding, the function
+    will first download the corresponding file.
 
     Args:
-        word_dict (torchtext.vocab.Vocab): A vocab object which maps tokens to indices.
-        embed_file (str): Path to a file holding pre-trained embeddings.
+        word_dict (dict): A dictionary for mapping tokens to indices.
+        embed_file (str): Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding.
         silent (bool, optional): Enable silent mode. Defaults to False.
-        cache (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
+        cache_dir (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
 
     Returns:
         torch.Tensor: Embedding weights (vocab_size, embed_size).
     """
-    # Load pretrained word embedding
-    load_embedding_from_file = embed_file not in pretrained_aliases
-    if load_embedding_from_file:
-        logging.info(f"Load pretrained embedding from file: {embed_file}.")
-        with open(embed_file) as f:
-            word_vectors = f.readlines()
-        embed_size = len(word_vectors[0].split()) - 1
-        vector_dict = {}
-        for word_vector in tqdm(word_vectors, disable=silent):
-            word, vector = word_vector.rstrip().split(" ", 1)
-            vector = torch.Tensor(list(map(float, vector.split())))
-            vector_dict[word] = vector
-    else:
-        logging.info(f"Load pretrained embedding from torchtext.")
-        # Adapted from https://pytorch.org/text/0.9.0/_modules/torchtext/vocab.html#Vocab.load_vectors.
-        if embed_file not in pretrained_aliases:
-            raise ValueError(
-                "Got embed_file {}, but allowed pretrained "
-                "vectors are {}".format(embed_file, list(pretrained_aliases.keys()))
-            )
-
-        # Hotfix: Glove URLs are outdated in Torchtext
-        # (https://github.com/pytorch/text/blob/main/torchtext/vocab/vectors.py#L213-L217)
-        pretrained_cls = pretrained_aliases[embed_file]
-        if embed_file.startswith("glove"):
-            for name, url in pretrained_cls.func.url.items():
-                file_name = url.split("/")[-1]
-                pretrained_cls.func.url[name] = f"https://huggingface.co/stanfordnlp/glove/resolve/main/{file_name}"
-
-        vector_dict = pretrained_cls(cache=cache)
-        embed_size = vector_dict.dim
 
-    embedding_weights = torch.zeros(len(word_dict), embed_size)
+    if embed_file in GLOVE_WORD_EMBEDDING:
+        embed_file = _download_glove_embedding(embed_file, cache_dir=cache_dir)
+    elif not os.path.isfile(embed_file):
+        raise ValueError(
+            "Got embed_file {}, but allowed pretrained " "embeddings are {}".format(embed_file, GLOVE_WORD_EMBEDDING)
+        )
+
+    logging.info(f"Load pretrained embedding from {embed_file}.")
+    with open(embed_file) as f:
+        word_vectors = f.readlines()
+    embed_size = len(word_vectors[0].split()) - 1
 
-    if load_embedding_from_file:
-        # Add UNK embedding
-        # AttentionXML: np.random.uniform(-1.0, 1.0, embed_size)
-        # CAML: np.random.randn(embed_size)
-        unk_vector = torch.randn(embed_size)
-        embedding_weights[word_dict[UNK]] = unk_vector
+    vector_dict = {}
+    for word_vector in tqdm(word_vectors, disable=silent):
+        word, vector = word_vector.rstrip().split(" ", 1)
+        vector = torch.Tensor(list(map(float, vector.split())))
+        vector_dict[word] = vector
+
+    embedding_weights = torch.zeros(len(word_dict), embed_size)
+    # Add UNK embedding
+    #   AttentionXML: np.random.uniform(-1.0, 1.0, embed_size)
+    #   CAML: np.random.randn(embed_size)
+    unk_vector = torch.randn(embed_size)
+    embedding_weights[word_dict[UNK]] = unk_vector
 
     # Store pretrained word embedding
     vec_counts = 0
-    for word in word_dict.get_itos():
-        # The condition can be used to process the word that does not in the embedding file.
-        # Note that torchtext vector object has already dealt with this,
-        # so we can directly make a query without addtional handling.
-        if (load_embedding_from_file and word in vector_dict) or not load_embedding_from_file:
+    for word in word_dict.keys():
+        if word in vector_dict:
             embedding_weights[word_dict[word]] = vector_dict[word]
             vec_counts += 1
 
-    logging.info(f"loaded {vec_counts}/{len(word_dict)} word embeddings")
+    logging.info(f"Loaded {vec_counts}/{len(word_dict)} word embeddings")
 
     return embedding_weights
+
+
+def _download_glove_embedding(embed_name, cache_dir=None):
+    """Download pretrained glove embedding from https://huggingface.co/stanfordnlp/glove/tree/main.
+
+    Args:
+        embed_name (str): The name of the pretrained GloVe embedding. Defaults to None.
+        cache_dir (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
+
+    Returns:
+        str: Path to the file that contains the cached embeddings.
+    """
+    cache_dir = ".vector_cache" if cache_dir is None else cache_dir
+    cached_embed_file = f"{cache_dir}/{embed_name}.txt"
+    if os.path.isfile(cached_embed_file):
+        return cached_embed_file
+    os.makedirs(cache_dir, exist_ok=True)
+
+    remote_embed_file = re.sub(r"6B.*", "6B", embed_name) + ".zip"
+    url = f"https://huggingface.co/stanfordnlp/glove/resolve/main/{remote_embed_file}"
+    logging.info(f"Downloading pretrained embeddings from {url}.")
+    try:
+        zip_file, _ = urlretrieve(url, f"{cache_dir}/{remote_embed_file}")
+        with zipfile.ZipFile(zip_file, "r") as zf:
+            zf.extractall(cache_dir)
+    except Exception as e:
+        os.remove(zip_file)
+        raise e
+    logging.info(f"Downloaded pretrained embeddings {embed_name} to {cached_embed_file}.")
+    return cached_embed_file
diff --git a/libmultilabel/nn/model.py b/libmultilabel/nn/model.py
index a1dcf070..f7f76439 100644
--- a/libmultilabel/nn/model.py
+++ b/libmultilabel/nn/model.py
@@ -181,7 +181,7 @@ class Model(MultiLabelModel):
 
     Args:
         classes (list): List of class names.
-        word_dict (torchtext.vocab.Vocab): A vocab object which maps tokens to indices.
+        word_dict (dict): A dictionary for mapping tokens to indices.
         network (nn.Module): Network (i.e., CAML, KimCNN, or XMLCNN).
         loss_function (str, optional): Loss function name (i.e., binary_cross_entropy_with_logits,
             cross_entropy). Defaults to 'binary_cross_entropy_with_logits'.
diff --git a/libmultilabel/nn/nn_utils.py b/libmultilabel/nn/nn_utils.py
index a4ac82c2..f9107d01 100644
--- a/libmultilabel/nn/nn_utils.py
+++ b/libmultilabel/nn/nn_utils.py
@@ -61,8 +61,7 @@ def init_model(
         model_name (str): Model to be used such as KimCNN.
         network_config (dict): Configuration for defining the network.
         classes (list): List of class names.
-        word_dict (torchtext.vocab.Vocab, optional): A vocab object for word tokenizer to
-            map tokens to indices. Defaults to None.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
         embed_vecs (torch.Tensor, optional): The pre-trained word vectors of shape
             (vocab_size, embed_dim). Defaults to None.
         init_weight (str): Weight initialization method from `torch.nn.init`.
diff --git a/main.py b/main.py
index 12564f6b..3a1aa98c 100644
--- a/main.py
+++ b/main.py
@@ -141,7 +141,7 @@ def add_all_arguments(parser):
     # pretrained vocab / embeddings
     parser.add_argument("--vocab_file", type=str, help="Path to a file holding vocabuaries (default: %(default)s)")
     parser.add_argument(
-        "--embed_file", type=str, help="Path to a file holding pre-trained embeddings (default: %(default)s)"
+        "--embed_file", type=str, help="Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding (default: %(default)s)"
     )
     parser.add_argument("--label_file", type=str, help="Path to a file holding all labels (default: %(default)s)")
 
diff --git a/requirements_nn.txt b/requirements_nn.txt
index 2c0da6a4..edd34545 100644
--- a/requirements_nn.txt
+++ b/requirements_nn.txt
@@ -1,8 +1,6 @@
 nltk
 lightning
 # https://github.com/pytorch/text/releases
-torch<=2.3
+torch
 torchmetrics==0.10.3
-torchtext
-# https://github.com/huggingface/transformers/issues/38464
-transformers<=4.51.3
+transformers
diff --git a/search_params.py b/search_params.py
index aad38ece..c4d8b9db 100644
--- a/search_params.py
+++ b/search_params.py
@@ -25,8 +25,8 @@ def train_libmultilabel_tune(config, datasets, classes, word_dict):
     Args:
         config (dict): Config of the experiment.
         datasets (dict): A dictionary of datasets.
-        classes(list): List of class names.
-        word_dict(torchtext.vocab.Vocab): A vocab object which maps tokens to indices.
+        classes (list): List of class names.
+        word_dict (dict): A dictionary for mapping tokens to indices.
     """
 
     # ray convert AttributeDict to dict
diff --git a/setup.cfg b/setup.cfg
index a676ce91..778e0958 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = libmultilabel
-version = 0.8.0
+version = 0.8.1
 author = LibMultiLabel Team
 license = MIT License
 license_file = LICENSE
@@ -12,7 +12,7 @@ project_urls =
     Documentation = https://www.csie.ntu.edu.tw/~cjlin/libmultilabel
     Source Code = https://github.com/ASUS-AICS/LibMultiLabel/
 classifiers =
-    Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.1
+    Environment :: GPU :: NVIDIA CUDA :: 12
     Environment :: GPU :: NVIDIA CUDA :: 11.8
     Intended Audience :: Developers
     Intended Audience :: Education
@@ -21,6 +21,9 @@ classifiers =
     Operating System :: OS Independent
     Programming Language :: Python :: 3
     Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
+    Programming Language :: Python :: 3.12
+    Programming Language :: Python :: 3.13
 
 [options]
 packages = find:
@@ -40,10 +43,9 @@ python_requires = >=3.10
 nn =
     lightning
     nltk
-    torch<=2.3
+    torch
     torchmetrics==0.10.3
-    torchtext
-    transformers<=4.51.3
+    transformers
 
 
 [options.packages.find]
diff --git a/tests/nn/components.py b/tests/nn/components.py
index b74a1c15..bcfbcd68 100644
--- a/tests/nn/components.py
+++ b/tests/nn/components.py
@@ -20,7 +20,7 @@ def get_name(self):
         return "token_to_id"
 
     def get_from_trainer(self, trainer):
-        return trainer.model.word_dict.get_stoi()
+        return trainer.model.word_dict
 
     def compare(self, a, b):
         return a == b
diff --git a/torch_trainer.py b/torch_trainer.py
index 8dc259b5..a7f0641d 100644
--- a/torch_trainer.py
+++ b/torch_trainer.py
@@ -19,7 +19,7 @@ class TorchTrainer:
         config (AttributeDict): Config of the experiment.
         datasets (dict, optional): Datasets for training, validation, and test. Defaults to None.
         classes(list, optional): List of class names.
-        word_dict(torchtext.vocab.Vocab, optional): A vocab object which maps tokens to indices.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
         embed_vecs (torch.Tensor, optional): The pre-trained word vectors of shape (vocab_size, embed_dim).
         save_checkpoints (bool, optional): Whether to save the last and the best checkpoint or not.
             Defaults to True.
@@ -136,7 +136,7 @@ def _setup_model(
 
         Args:
             classes(list): List of class names.
-            word_dict(torchtext.vocab.Vocab): A vocab object which maps tokens to indices.
+            word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
             embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
             log_path (str): Path to the log file. The log file contains the validation
                 results for each epoch and the test results. If the `log_path` is None, no performance