Fixed special tokens map to not ignore unk
Browse files- BertForMorphTagging.py +1 -1
BertForMorphTagging.py
CHANGED
|
@@ -157,7 +157,7 @@ class BertForMorphTagging(BertPreTrainedModel):
|
|
| 157 |
# for each sentence, return a dict object with the following files { text, tokens }
|
| 158 |
# Where tokens is a list of dicts, where each dict is:
|
| 159 |
# { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
|
| 160 |
-
special_tokens = set(tokenizer.
|
| 161 |
ret = []
|
| 162 |
for sent_idx,sentence in enumerate(sentences):
|
| 163 |
input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])
|
|
|
|
| 157 |
# for each sentence, return a dict object with the following files { text, tokens }
|
| 158 |
# Where tokens is a list of dicts, where each dict is:
|
| 159 |
# { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
|
| 160 |
+
special_tokens = set([tokenizer.pad_token, tokenizer.cls_token, tokenizer.sep_token])
|
| 161 |
ret = []
|
| 162 |
for sent_idx,sentence in enumerate(sentences):
|
| 163 |
input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])
|