# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logging import getLogger
import numpy as np
from deeppavlov.core.common.errors import ConfigError
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
log = getLogger(__name__)
[docs]@register('proba2labels')
class Proba2Labels(Component):
"""
Class implements probability to labels processing using the following ways: \
choosing one or top_n indices with maximal probability or choosing any number of indices \
which probabilities to belong with are higher than given confident threshold
Args:
max_proba: whether to choose label with maximal probability
confidence_threshold: boundary probability value for sample to belong with the class (best use for multi-label)
top_n: how many top labels with the highest probabilities to return
Attributes:
max_proba: whether to choose label with maximal probability
confidence_threshold: boundary probability value for sample to belong with the class (best use for multi-label)
top_n: how many top labels with the highest probabilities to return
"""
def __init__(self,
max_proba: bool = None,
confidence_threshold: float = None,
top_n: int = None,
is_binary: bool = False,
**kwargs) -> None:
""" Initialize class with given parameters"""
self.max_proba = max_proba
self.confidence_threshold = confidence_threshold
self.top_n = top_n
self.is_binary = is_binary
[docs] def __call__(self,
*args,
**kwargs):
"""
Process probabilities to labels
Args:
Every argument is a list of vectors with probability distribution
Returns:
list of labels (only label classification) or list of lists of labels (multi-label classification),
or list of the following lists (in multitask setting) for every argument
"""
answer = []
log.debug(f'input {args}')
for data in args:
if all([k is None for k in data]):
answer.append([])
elif self.confidence_threshold:
if self.is_binary:
answer.append([int(el > self.confidence_threshold) for el in data])
else:
answer.append([list(np.where(np.array(d) > self.confidence_threshold)[0]) for d in data])
elif self.max_proba:
answer.append([np.argmax(d) for d in data])
elif self.top_n:
answer.append([np.argsort(d)[::-1][:self.top_n] for d in data])
else:
raise ConfigError("Proba2Labels requires one of three arguments: bool `max_proba` or "
"float `confidence_threshold` for multi-label classification or"
"integer `top_n` for choosing several labels with the highest probabilities")
if len(args) == 1: # only one argument
answer = answer[0]
log.debug(f'output {answer}')
return answer