dicta-il
/

dictabert-morph

@@ -157,7 +157,7 @@ class BertForMorphTagging(BertPreTrainedModel):
         # for each sentence, return a dict object with the following files { text, tokens }
         # Where tokens is a list of dicts, where each dict is:
         #       { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
-        special_tokens = set(tokenizer.special_tokens_map.values())
         ret = []
         for sent_idx,sentence in enumerate(sentences):
             input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])

         # for each sentence, return a dict object with the following files { text, tokens }
         # Where tokens is a list of dicts, where each dict is:
         #       { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
+        special_tokens = set([tokenizer.pad_token, tokenizer.cls_token, tokenizer.sep_token])
         ret = []
         for sent_idx,sentence in enumerate(sentences):
             input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])