From fe08cf460377c7cbff30788020f0ba9a446c2d2a Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Sun, 10 Jul 2022 10:52:03 +0200 Subject: [PATCH] tokenization: log spm usage only in debug mode to avoid spaming --- tokenization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tokenization.py b/tokenization.py index 65addec..5f3ae8a 100644 --- a/tokenization.py +++ b/tokenization.py @@ -233,7 +233,7 @@ def tokenize(self, text): def convert_tokens_to_ids(self, tokens): if self.sp_model: - tf.logging.info("using sentence piece tokenzier.") + tf.logging.debug("using sentence piece tokenizer.") return [self.sp_model.PieceToId( printable_text(token)) for token in tokens] else: @@ -241,7 +241,7 @@ def convert_tokens_to_ids(self, tokens): def convert_ids_to_tokens(self, ids): if self.sp_model: - tf.logging.info("using sentence piece tokenzier.") + tf.logging.debug("using sentence piece tokenizer.") return [self.sp_model.IdToPiece(id_) for id_ in ids] else: return convert_by_vocab(self.inv_vocab, ids)