Zero-Shot Image Classification
Transformers
ONNX
Chinese
English
m2_encoder
feature-extraction
multimodal
image-text-retrieval
bilingual
chinese
english
vision-language
custom-code
custom_code
Eval Results (legacy)
Instructions to use malusama/M2-Encoder-0.4B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use malusama/M2-Encoder-0.4B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("zero-shot-image-classification", model="malusama/M2-Encoder-0.4B", trust_remote_code=True) pipe( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png", candidate_labels=["animals", "humans", "landscape"], )# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("malusama/M2-Encoder-0.4B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 1,476 Bytes
f471fb4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | from typing import List, Optional, Union
import numpy as np
import torch
from PIL import Image
from transformers.feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from transformers.image_utils import ImageFeatureExtractionMixin
class M2EncoderImageProcessor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
model_input_names = ["pixel_values"]
def __init__(self, size: int = 224, resample: int = Image.BICUBIC, **kwargs):
super().__init__(**kwargs)
if isinstance(size, dict):
size = int(size.get("height") or size.get("width"))
self.size = size
self.resample = resample
def __call__(
self,
images,
return_tensors: Optional[Union[str, torch.Tensor]] = None,
**kwargs,
) -> BatchFeature:
if not isinstance(images, (list, tuple)):
images = [images]
pixel_values: List[np.ndarray] = []
for image in images:
if not isinstance(image, Image.Image):
image = Image.fromarray(np.asarray(image))
image = image.convert("RGB")
image = image.resize((self.size, self.size), resample=self.resample)
array = np.asarray(image, dtype=np.float32) / 255.0
array = np.transpose(array, (2, 0, 1))
pixel_values.append(array)
return BatchFeature(
data={"pixel_values": pixel_values},
tensor_type=return_tensors,
)
|