diff --git a/utils/convert-hf-to-gguf-bitnet.py b/utils/convert-hf-to-gguf-bitnet.py index 23e84384c..0a6b8c560 100644 --- a/utils/convert-hf-to-gguf-bitnet.py +++ b/utils/convert-hf-to-gguf-bitnet.py @@ -957,7 +957,7 @@ class BitnetModel(Model): model_arch = gguf.MODEL_ARCH.BITNET def set_vocab(self): - self._set_vocab_sentencepiece() + self._set_vocab_gpt2() def set_gguf_parameters(self): super().set_gguf_parameters() diff --git a/utils/convert-ms-to-gguf-bitnet.py b/utils/convert-ms-to-gguf-bitnet.py index edf702788..1b9b8d901 100644 --- a/utils/convert-ms-to-gguf-bitnet.py +++ b/utils/convert-ms-to-gguf-bitnet.py @@ -1208,6 +1208,9 @@ def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list def add_meta_vocab(self, vocab: Vocab) -> None: # Ensure that tokenizer_model is added to the GGUF model self.gguf.add_tokenizer_model(vocab.tokenizer_model) + # Add pre-tokenizer type for BPE models (required for correct tokenization) + if vocab.tokenizer_model == "gpt2": + self.gguf.add_token_pre_type("gpt-2") # Extract model vocabulary for model conversion tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab)