|
45 | 45 | _MISTRAL_COMMON_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) |
46 | 46 |
|
47 | 47 | _mistral_common_installed = False |
48 | | - TokenizerVersion = None |
49 | | - Tekkenizer = None |
50 | | - SentencePieceTokenizer = None |
| 48 | + TokenizerVersion: Any = None |
| 49 | + Tekkenizer: Any = None |
| 50 | + SentencePieceTokenizer: Any = None |
51 | 51 | _mistral_import_error_msg = ( |
52 | 52 | "Mistral format requires `mistral-common` to be installed. Please run " |
53 | 53 | "`pip install mistral-common[image,audio]` to install it." |
@@ -220,7 +220,7 @@ def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Call |
220 | 220 | if weight_map is None or not isinstance(weight_map, dict): |
221 | 221 | raise ValueError(f"Can't load 'weight_map' from {index_name!r}") |
222 | 222 | tensor_names_from_index.update(weight_map.keys()) |
223 | | - part_dict: dict[str, None] = dict.fromkeys(weight_map.values(), None) |
| 223 | + part_dict: dict[str, None] = dict.fromkeys(weight_map.values(), None) # ty: ignore[invalid-assignment] |
224 | 224 | part_names = sorted(part_dict.keys()) |
225 | 225 | else: |
226 | 226 | weight_map = {} |
@@ -5878,7 +5878,7 @@ def set_vocab(self): |
5878 | 5878 | logger.error(f'Error: Missing {tokenizer_path}') |
5879 | 5879 | sys.exit(1) |
5880 | 5880 |
|
5881 | | - sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] |
| 5881 | + sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] |
5882 | 5882 | sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read()) |
5883 | 5883 | add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix |
5884 | 5884 |
|
@@ -6199,7 +6199,7 @@ def _xlmroberta_set_vocab(self) -> None: |
6199 | 6199 |
|
6200 | 6200 | vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size) |
6201 | 6201 | else: |
6202 | | - sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] |
| 6202 | + sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] |
6203 | 6203 | sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read()) |
6204 | 6204 | assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM |
6205 | 6205 |
|
@@ -8876,7 +8876,7 @@ def set_vocab(self): |
8876 | 8876 | if not tokenizer_path.is_file(): |
8877 | 8877 | raise FileNotFoundError(f"File not found: {tokenizer_path}") |
8878 | 8878 |
|
8879 | | - sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] |
| 8879 | + sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] |
8880 | 8880 | sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read()) |
8881 | 8881 |
|
8882 | 8882 | # some models like Pile-T5 family use BPE tokenizer instead of Unigram |
@@ -9013,7 +9013,7 @@ def set_vocab(self): |
9013 | 9013 | if not tokenizer_path.is_file(): |
9014 | 9014 | raise FileNotFoundError(f"File not found: {tokenizer_path}") |
9015 | 9015 |
|
9016 | | - sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] |
| 9016 | + sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] |
9017 | 9017 | sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read()) |
9018 | 9018 |
|
9019 | 9019 | # some models like Pile-T5 family use BPE tokenizer instead of Unigram |
@@ -12275,6 +12275,7 @@ def __torch_function__(cls, func, types, args=(), kwargs=None): |
12275 | 12275 | kwargs = {} |
12276 | 12276 |
|
12277 | 12277 | if func is torch.Tensor.numpy: |
| 12278 | + assert len(args) |
12278 | 12279 | return args[0].numpy() |
12279 | 12280 |
|
12280 | 12281 | return cls._wrap_fn(func)(*args, **kwargs) |
|
0 commit comments