| from typing import Optional |
|
|
| from transformers import AutoTokenizer |
| from transformers.processing_utils import ProcessorMixin |
|
|
| from .image_processing_m2_encoder import M2EncoderImageProcessor |
|
|
|
|
| class M2EncoderProcessor(ProcessorMixin): |
| attributes = ["image_processor", "tokenizer"] |
| image_processor_class = "M2EncoderImageProcessor" |
| tokenizer_class = ("GLMChineseTokenizer", None) |
|
|
| def __init__(self, image_processor, tokenizer): |
| self.image_processor = image_processor |
| self.tokenizer = tokenizer |
|
|
| @classmethod |
| def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): |
| trust_remote_code = kwargs.pop("trust_remote_code", True) |
| image_processor = M2EncoderImageProcessor.from_pretrained( |
| pretrained_model_name_or_path, **kwargs |
| ) |
| tokenizer = AutoTokenizer.from_pretrained( |
| pretrained_model_name_or_path, |
| trust_remote_code=trust_remote_code, |
| **kwargs, |
| ) |
| return cls(image_processor=image_processor, tokenizer=tokenizer) |
|
|
| def __call__( |
| self, |
| text=None, |
| images=None, |
| padding="max_length", |
| truncation=True, |
| max_length: Optional[int] = 52, |
| return_tensors=None, |
| **kwargs, |
| ): |
| encoding = {} |
| if text is not None: |
| encoding.update( |
| self.tokenizer( |
| text, |
| padding=padding, |
| truncation=truncation, |
| max_length=max_length, |
| return_special_tokens_mask=True, |
| return_tensors=return_tensors, |
| **kwargs, |
| ) |
| ) |
| if images is not None: |
| encoding.update( |
| self.image_processor(images, return_tensors=return_tensors, **kwargs) |
| ) |
| return encoding |
|
|