| | import mteb |
| | from mteb.encoder_interface import PromptType |
| | from sentence_transformers import SentenceTransformer, models |
| | import numpy as np |
| | import torch |
| | import os |
| | import math |
| |
|
| | model_save_path = "./" |
| |
|
| | |
| | model = SentenceTransformer(model_save_path) |
| |
|
| | |
| | class CustomModel: |
| | def __init__(self, model): |
| | self.model = model |
| |
|
| | def encode( |
| | self, |
| | sentences, |
| | task_name: str, |
| | prompt_type = None, |
| | max_batch_size: int = 32, |
| | **kwargs |
| | ) -> np.ndarray: |
| | """ |
| | Encodes the given sentences using the model with a maximum batch size. |
| | |
| | Args: |
| | sentences (List[str]): The sentences to encode. |
| | task_name (str): The name of the task. |
| | prompt_type (Optional[PromptType]): The prompt type to use. |
| | max_batch_size (int): The maximum number of sentences to process in a single batch. |
| | **kwargs: Additional arguments to pass to the encoder. |
| | |
| | Returns: |
| | np.ndarray: Encoded sentences as a numpy array. |
| | """ |
| |
|
| | sentences = [str(sentence) for sentence in sentences] |
| | total_sentences = len(sentences) |
| | num_batches = math.ceil(total_sentences / max_batch_size) |
| | embeddings_list = [] |
| |
|
| | for batch_idx in range(num_batches): |
| | start_idx = batch_idx * max_batch_size |
| | end_idx = min(start_idx + max_batch_size, total_sentences) |
| | batch_sentences = sentences[start_idx:end_idx] |
| | batch_embeddings = self.model.encode(batch_sentences, convert_to_tensor=True) |
| |
|
| | if not isinstance(batch_embeddings, torch.Tensor): |
| | batch_embeddings = torch.tensor(batch_embeddings) |
| |
|
| | embeddings_list.append(batch_embeddings.cpu().numpy()) |
| |
|
| | return np.vstack(embeddings_list) |
| |
|
| |
|
| |
|
| | |
| | custom_model = CustomModel(model) |
| |
|
| | |
| | tasks = mteb.get_benchmark("MTEB(eng, classic)") |
| |
|
| | |
| | evaluation = mteb.MTEB(tasks=tasks) |
| |
|
| | |
| | results = evaluation.run(custom_model, output_folder="results/model_results") |
| |
|