Source code for deeppavlov.dataset_readers.intent_catcher_reader

# Copyright 2018 Neural Networks and Deep Learning lab, MIPT
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from json import load
from logging import getLogger
from pathlib import Path
from typing import Dict, List, Tuple

from deeppavlov.core.common.registry import register
from deeppavlov.core.data.dataset_reader import DatasetReader

log = getLogger(__file__)


[docs]@register('intent_catcher_reader') class IntentCatcherReader(DatasetReader): """Reader for Intent Catcher dataset in json format""" def read(self, data_path: str, *args, **kwargs) -> Dict[str, List[Tuple[str, str]]]: data_types = ["train", "valid", "test"] train_file = kwargs.get('train', 'train.json') if not Path(data_path, train_file).exists(): raise Exception( "data path {} does not exist or is empty!".format( data_path)) data = {"train": [], "valid": [], "test": []} for data_type in data_types: file_name = kwargs.get(data_type, '{}.{}'.format(data_type, "json")) if file_name is None: continue file = Path(data_path).joinpath(file_name) if file.exists(): with open(file) as fp: file = load(fp) for label in file: data[data_type].extend([(phrase, label) for phrase in file[label]]) else: log.warning("Cannot find {} file".format(file)) return data