Zero-Shot Image Classification
Transformers
ONNX
Chinese
English
m2_encoder
feature-extraction
multimodal
image-text-retrieval
bilingual
chinese
english
vision-language
custom-code
custom_code
Eval Results (legacy)
Instructions to use malusama/M2-Encoder-1B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use malusama/M2-Encoder-1B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("zero-shot-image-classification", model="malusama/M2-Encoder-1B", trust_remote_code=True) pipe( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png", candidate_labels=["animals", "humans", "landscape"], )# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("malusama/M2-Encoder-1B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from typing import Optional | |
| from transformers import AutoTokenizer | |
| from transformers.processing_utils import ProcessorMixin | |
| from .image_processing_m2_encoder import M2EncoderImageProcessor | |
| class M2EncoderProcessor(ProcessorMixin): | |
| attributes = ["image_processor", "tokenizer"] | |
| image_processor_class = "M2EncoderImageProcessor" | |
| tokenizer_class = ("GLMChineseTokenizer", None) | |
| def __init__(self, image_processor, tokenizer): | |
| self.image_processor = image_processor | |
| self.tokenizer = tokenizer | |
| def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): | |
| trust_remote_code = kwargs.pop("trust_remote_code", True) | |
| image_processor = M2EncoderImageProcessor.from_pretrained( | |
| pretrained_model_name_or_path, **kwargs | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| pretrained_model_name_or_path, | |
| trust_remote_code=trust_remote_code, | |
| **kwargs, | |
| ) | |
| return cls(image_processor=image_processor, tokenizer=tokenizer) | |
| def __call__( | |
| self, | |
| text=None, | |
| images=None, | |
| padding="max_length", | |
| truncation=True, | |
| max_length: Optional[int] = 52, | |
| return_tensors=None, | |
| **kwargs, | |
| ): | |
| encoding = {} | |
| if text is not None: | |
| encoding.update( | |
| self.tokenizer( | |
| text, | |
| padding=padding, | |
| truncation=truncation, | |
| max_length=max_length, | |
| return_special_tokens_mask=True, | |
| return_tensors=return_tensors, | |
| **kwargs, | |
| ) | |
| ) | |
| if images is not None: | |
| encoding.update( | |
| self.image_processor(images, return_tensors=return_tensors, **kwargs) | |
| ) | |
| return encoding | |