plugin.py

import json
from typing import Any, Dict, List

from pymemri.data.itembase import Edge, Item
from pymemri.data.schema import CategoricalPrediction, EmailMessage, Message
from pymemri.data.schema import Model as ModelItem
from pymemri.plugin.pluginbase import PluginBase

from .model import Model
from .utils import item_to_data


class ClassifierPlugin(PluginBase):

    schema_classes = [Message, CategoricalPrediction, ModelItem]

    def __init__(
        self,
        item_type: str = "Message",
        item_service: str = None,
        model_name: str = "test_abcd123_model",
        model_version: str = "0.1",
        isMock: bool = True,
        **kwargs,
    ):
        """
        ClassifierPlugin is a plugin that wraps any classifier and handles all communication with the Pod and conversion from/to `Item`s

        Args:
            item_type (str, optional): The Item type this plugin should make predictions on. Defaults to "Message".
            item_service (str, optional): The service of Items this plugin should make predictions on. Defaults to None.
            model_name (str, optional): Name of the model the plugin should use. Defaults to None.
            model_version (str, optional): Version of the model the plugin should use. Defaults to "0.1".
        """
        super().__init__(**kwargs)
        self.batch_size = 512
        self.model_name = model_name
        self.model_version = model_version

        self.query = {"type": item_type}
        if item_service is not None:
            self.query["service"] = item_service
        if isMock is True:
            self.query["isMock"] = True

    def run(self):
        """Run `self.model` on all data in `self.client.search(self.query)`"""
        print("Loading model...")
        self.load_model()

        print(f"Start predicting...")
        for i, item_batch in enumerate(self.client.search_paginate(
            self.query, limit=self.batch_size
        )):
            print(f"Predicting batch {i:<4}")
            item_batch = self.filter_items(item_batch)
            prepared_batch = self.prepare_batch(item_batch)
            predictions = self.model.predict(prepared_batch)

            prediction_items = [self.prediction_to_item(p) for p in predictions]
            self.sync_to_pod(item_batch, prediction_items)
        print("Done")

    def load_model(self):
        self.model = Model(client=self.client)

        # Without model name, do not create a model item
        if self.model_name is None:
            return