Source code for deeppavlov.models.classifiers.logreg_classifier

# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Tuple, Union

import numpy as np
from scipy.sparse import vstack
from scipy.sparse import csr_matrix
from sklearn.linear_model import LogisticRegression

from deeppavlov.core.common.registry import register
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.models.estimator import Estimator
from deeppavlov.core.common.file import save_pickle
from deeppavlov.core.common.file import load_pickle
from deeppavlov.core.commands.utils import expand_path, make_all_dirs
from deeppavlov.core.models.serializable import Serializable

logger = get_logger(__name__)


[docs]@register("logreg_classifier") class LogregClassifier(Estimator, Serializable): """ Logistic Regression Classifier Parameters: top_n: how many top answers classifier'll return for input vectorized question c: regularization strength in logistic regression model penalty: regularization penalty type in logistic regression model save_path: path to save the model load_path: path to load the model Returns: None """ def __init__(self, top_n: int = 1, c: int = 1, penalty: str = 'l2', save_path: str = None, load_path: str = None, **kwargs) -> None: self.save_path = save_path self.load_path = load_path self.top_n = top_n self.c = c self.penalty = penalty if kwargs['mode'] != 'train': self.load()
[docs] def __call__(self, q_vects: List) -> Tuple[List[str], List[int]]: """Found most similar answer for input vectorized questions Parameters: q_vects: vectorized questions Returns: Tuple of Answer and Score """ probs = self.logreg.predict_proba(q_vects) answer_ids = np.argsort(probs)[:, -self.top_n:] answers = [] scores = [] for i in range(len(answer_ids)): answers.append([self.logreg.classes_[id] for id in answer_ids[i, ::-1]]) scores.append([np.round(probs[i, id], 2) for id in answer_ids[i, ::-1]]) return answers, scores
[docs] def fit(self, x_train_vects: Tuple[Union[csr_matrix, List]], y_train: Tuple[str]) -> None: """Train classifier Parameters: x_train_vects: vectorized questions for train dataset y_train: answers for train dataset Returns: None """ if isinstance(x_train_vects, tuple): if len(x_train_vects) != 0: if isinstance(x_train_vects[0], csr_matrix): x_train_features = vstack(list(x_train_vects)) elif isinstance(x_train_vects[0], np.ndarray): x_train_features = np.vstack(list(x_train_vects)) else: raise NotImplementedError('Not implemented this type of vectors') else: raise ValueError("Train vectors can't be empty") else: x_train_features = x_train_vects self.logreg = LogisticRegression(C=self.c, penalty=self.penalty) self.logreg.fit(x_train_features, list(y_train))
[docs] def save(self) -> None: """Save classifier parameters""" logger.info("Saving faq_logreg_model to {}".format(self.save_path)) path = expand_path(self.save_path) make_all_dirs(path) save_pickle(self.logreg, path)
[docs] def load(self) -> None: """Load classifier parameters""" logger.info("Loading faq_logreg_model from {}".format(self.load_path)) self.logreg = load_pickle(expand_path(self.load_path))