Source code for deeppavlov.models.kbqa.wiki_parser_online

# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from logging import getLogger
from typing import List, Tuple, Dict

import requests

from deeppavlov.core.common.registry import register

log = getLogger(__name__)


[docs]@register('wiki_parser_online') class WikiParserOnline: """This class extract relations or labels from Wikidata query service"""
[docs] def __init__(self, url: str, timeout: float = 0.5, **kwargs) -> None: self.url = url self.timeout = timeout
def get_answer(self, query: str) -> List[Dict[str, Dict[str, str]]]: data = [] for i in range(5): try: data_0 = requests.get(self.url, params={'query': query, 'format': 'json'}, timeout=self.timeout).json() if "results" in data_0.keys(): data = data_0['results']['bindings'] elif "boolean" in data_0.keys(): data = data_0['boolean'] break except: pass return data def find_label(self, entity: str, question: str) -> str: entity = str(entity).replace('"', '') if entity.startswith("http://www.wikidata.org/entity/Q"): entity = entity.split('/')[-1] if entity.startswith("Q"): query = f"SELECT DISTINCT ?label WHERE {{ wd:{entity} rdfs:label ?label . FILTER (lang(?label) = 'en') }}" labels = self.get_answer(query) if labels: labels = [label["label"]["value"] for label in labels] return labels[0] elif entity.endswith("T00:00:00Z"): return entity.split('T00:00:00Z')[0] else: return entity def find_rels(self, entity: str, direction: str, rel_type: str = "no_type") -> List[str]: if direction == "forw": query = f"SELECT DISTINCT ?rel WHERE {{ wd:{entity} ?rel ?obj . }}" else: query = f"SELECT DISTINCT ?rel WHERE {{ ?subj ?rel wd:{entity} . }}" rels = self.get_answer(query) if rels: rels = [rel["rel"]["value"] for rel in rels] if rel_type != "no_type": start_str = f"http://www.wikidata.org/prop/{rel_type}" else: start_str = "http://www.wikidata.org/prop/P" rels = [rel for rel in rels if rel.startswith(start_str)] return rels