diff --git a/tensorflow_text/core/kernels/sentencepiece/BUILD b/tensorflow_text/core/kernels/sentencepiece/BUILD index 473dc1c6e..db2792090 100644 --- a/tensorflow_text/core/kernels/sentencepiece/BUILD +++ b/tensorflow_text/core/kernels/sentencepiece/BUILD @@ -10,6 +10,7 @@ licenses(["notice"]) # Visibility rules package(default_visibility = [ + "//java/com/google/android/apps/pixel/psi:__subpackages__", "//visibility:public", ]) diff --git a/tensorflow_text/core/kernels/sentencepiece/model_converter.cc b/tensorflow_text/core/kernels/sentencepiece/model_converter.cc index 6eb7b4b05..61814ce33 100644 --- a/tensorflow_text/core/kernels/sentencepiece/model_converter.cc +++ b/tensorflow_text/core/kernels/sentencepiece/model_converter.cc @@ -46,6 +46,9 @@ DecodePrecompiledCharsmap( const ::sentencepiece::NormalizerSpec& normalizer_spec) { // This function "undoes" encoding done by // sentencepiece::normalizer::Normalizer::EncodePrecompiledCharsMap. + if (normalizer_spec.precompiled_charsmap().empty()) { + return std::make_tuple(std::vector(), std::vector()); + } const char* precompiled_map = normalizer_spec.precompiled_charsmap().data(); const uint32_t trie_size = *reinterpret_cast(precompiled_map); @@ -89,6 +92,7 @@ absl::StatusOr ConvertSentencepieceModelToFlatBuffer( break; case ::sentencepiece::ModelProto::SentencePiece::UNKNOWN: case ::sentencepiece::ModelProto::SentencePiece::CONTROL: + case ::sentencepiece::ModelProto::SentencePiece::BYTE: // Ignore unknown and control codes. break; default: