Source code for deeppavlov.models.ranking.bilstm_gru_siamese_network

# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from logging import getLogger

from keras import backend as K
from keras.layers import Input, GlobalMaxPooling1D, Lambda, Dense, GRU
from keras.models import Model

from deeppavlov.core.common.registry import register
from deeppavlov.models.ranking.bilstm_siamese_network import BiLSTMSiameseNetwork

log = getLogger(__name__)

[docs]@register('bilstm_gru_nn') class BiLSTMGRUSiameseNetwork(BiLSTMSiameseNetwork): """The class implementing a siamese neural network with BiLSTM, GRU and max pooling. GRU is used to take into account multi-turn dialogue ``context``. Args: len_vocab: A size of the vocabulary to build embedding layer. seed: Random seed. shared_weights: Whether to use shared weights in the model to encode ``contexts`` and ``responses``. embedding_dim: Dimensionality of token (word) embeddings. reccurent: A type of the RNN cell. Possible values are ``lstm`` and ``bilstm``. hidden_dim: Dimensionality of the hidden state of the RNN cell. If ``reccurent`` equals ``bilstm`` ``hidden_dim`` should be doubled to get the actual dimensionality. max_pooling: Whether to use max-pooling operation to get ``context`` (``response``) vector representation. If ``False``, the last hidden state of the RNN will be used. triplet_loss: Whether to use a model with triplet loss. If ``False``, a model with crossentropy loss will be used. margin: A margin parameter for triplet loss. Only required if ``triplet_loss`` is set to ``True``. hard_triplets: Whether to use hard triplets sampling to train the model i.e. to choose negative samples close to positive ones. If set to ``False`` random sampling will be used. Only required if ``triplet_loss`` is set to ``True``. """ def create_model(self) -> Model: input = [] if self.use_matrix: for i in range(self.num_context_turns + 1): input.append(Input(shape=(self.max_sequence_length,))) context = input[:self.num_context_turns] response = input[-1] emb_layer = self.embedding_layer() emb_c = [emb_layer(el) for el in context] emb_r = emb_layer(response) else: for i in range(self.num_context_turns + 1): input.append(Input(shape=(self.max_sequence_length, self.embedding_dim,))) context = input[:self.num_context_turns] response = input[-1] emb_c = context emb_r = response lstm_layer = self.lstm_layer() lstm_c = [lstm_layer(el) for el in emb_c] lstm_r = lstm_layer(emb_r) pooling_layer = GlobalMaxPooling1D(name="pooling") lstm_c = [pooling_layer(el) for el in lstm_c] lstm_r = pooling_layer(lstm_r) lstm_c = [Lambda(lambda x: K.expand_dims(x, 1))(el) for el in lstm_c] lstm_c = Lambda(lambda x: K.concatenate(x, 1))(lstm_c) gru_layer = GRU(2 * self.hidden_dim, name="gru") gru_c = gru_layer(lstm_c) if self.triplet_mode: dist = Lambda(self._pairwise_distances)([gru_c, lstm_r]) else: dist = Lambda(self._diff_mult_dist)([gru_c, lstm_r]) dist = Dense(1, activation='sigmoid', name="score_model")(dist) model = Model(context + [response], dist) return model def create_score_model(self) -> Model: cr = self.model.inputs if self.triplet_mode: emb_c = self.model.get_layer("gru").output emb_r = self.model.get_layer("pooling").get_output(-1) dist_score = Lambda(lambda x: self.euclidian_dist(x), name="score_model") score = dist_score([emb_c, emb_r]) else: score = self.model.get_layer("score_model").output score = Lambda(lambda x: 1. - K.squeeze(x, -1))(score) score = Lambda(lambda x: 1. - x)(score) model = Model(cr, score) return model def create_context_model(self) -> Model: m = Model(self.model.inputs[:-1], self.model.get_layer("gru").output) return m def create_response_model(self) -> Model: m = Model(self.model.inputs[-1], self.model.get_layer("pooling").get_output_at(-1)) return m