Source code for deeppavlov.models.classifiers.proba2labels

# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from logging import getLogger

import numpy as np

from deeppavlov.core.common.errors import ConfigError
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component

log = getLogger(__name__)


[docs]@register('proba2labels') class Proba2Labels(Component): """ Class implements probability to labels processing using the following ways: \ choosing one or top_n indices with maximal probability or choosing any number of indices \ which probabilities to belong with are higher than given confident threshold Args: max_proba: whether to choose label with maximal probability confidence_threshold: boundary probability value for sample to belong with the class (best use for multi-label) top_n: how many top labels with the highest probabilities to return Attributes: max_proba: whether to choose label with maximal probability confidence_threshold: boundary probability value for sample to belong with the class (best use for multi-label) top_n: how many top labels with the highest probabilities to return """ def __init__(self, max_proba: bool = None, confidence_threshold: float = None, top_n: int = None, is_binary: bool = False, **kwargs) -> None: """ Initialize class with given parameters""" self.max_proba = max_proba self.confidence_threshold = confidence_threshold self.top_n = top_n self.is_binary = is_binary
[docs] def __call__(self, *args, **kwargs): """ Process probabilities to labels Args: Every argument is a list of vectors with probability distribution Returns: list of labels (only label classification) or list of lists of labels (multi-label classification), or list of the following lists (in multitask setting) for every argument """ answer = [] log.debug(f'input {args}') for data in args: if all([k is None for k in data]): answer.append([]) elif self.confidence_threshold: if self.is_binary: answer.append([int(el > self.confidence_threshold) for el in data]) else: answer.append([list(np.where(np.array(d) > self.confidence_threshold)[0]) for d in data]) elif self.max_proba: answer.append([np.argmax(d) for d in data]) elif self.top_n: answer.append([np.argsort(d)[::-1][:self.top_n] for d in data]) else: raise ConfigError("Proba2Labels requires one of three arguments: bool `max_proba` or " "float `confidence_threshold` for multi-label classification or" "integer `top_n` for choosing several labels with the highest probabilities") if len(args) == 1: # only one argument answer = answer[0] log.debug(f'output {answer}') return answer