Zero-Shot Image Classification
Transformers
ONNX
Chinese
English
m2_encoder
feature-extraction
multimodal
image-text-retrieval
bilingual
chinese
english
vision-language
custom-code
custom_code
Eval Results (legacy)
Instructions to use malusama/M2-Encoder-0.4B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use malusama/M2-Encoder-0.4B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("zero-shot-image-classification", model="malusama/M2-Encoder-0.4B", trust_remote_code=True) pipe( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png", candidate_labels=["animals", "humans", "landscape"], )# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("malusama/M2-Encoder-0.4B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from typing import List, Optional, Union | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| from transformers.feature_extraction_utils import BatchFeature, FeatureExtractionMixin | |
| from transformers.image_utils import ImageFeatureExtractionMixin | |
| class M2EncoderImageProcessor(FeatureExtractionMixin, ImageFeatureExtractionMixin): | |
| model_input_names = ["pixel_values"] | |
| def __init__(self, size: int = 224, resample: int = Image.BICUBIC, **kwargs): | |
| super().__init__(**kwargs) | |
| if isinstance(size, dict): | |
| size = int(size.get("height") or size.get("width")) | |
| self.size = size | |
| self.resample = resample | |
| def __call__( | |
| self, | |
| images, | |
| return_tensors: Optional[Union[str, torch.Tensor]] = None, | |
| **kwargs, | |
| ) -> BatchFeature: | |
| if not isinstance(images, (list, tuple)): | |
| images = [images] | |
| pixel_values: List[np.ndarray] = [] | |
| for image in images: | |
| if not isinstance(image, Image.Image): | |
| image = Image.fromarray(np.asarray(image)) | |
| image = image.convert("RGB") | |
| image = image.resize((self.size, self.size), resample=self.resample) | |
| array = np.asarray(image, dtype=np.float32) / 255.0 | |
| array = np.transpose(array, (2, 0, 1)) | |
| pixel_values.append(array) | |
| return BatchFeature( | |
| data={"pixel_values": pixel_values}, | |
| tensor_type=return_tensors, | |
| ) | |