Source code for fairgraph.queries

"""
This module provides Python classes to assist in writing Knowledge Graph queries.
"""

# Copyright 2018-2024 CNRS

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations
from typing import Optional, List, Any, Dict, Union
from uuid import UUID
from datetime import date, datetime
import logging
from warnings import warn

from openminds.base import IRI, EmbeddedMetadata, LinkedMetadata, Node
from .utility import as_list, expand_uri


logger = logging.getLogger("fairgraph")


class PathElement:
    """
    A single element in a multi-element query path, carrying optional
    ``reverse`` and ``type_filter`` settings for that step.

    Args:
        uri (str): The URI for this path step.
        reverse (bool): Whether to follow this link in reverse. Defaults to False.
        type_filter (str or list of str, optional): Type URI(s) to filter on at this step.
    """

    def __init__(self, uri: str, reverse: bool = False, type_filter: Optional[Union[str, List[str]]] = None):
        self.uri = uri
        self.reverse = reverse
        self.type_filter = type_filter

    def __repr__(self):
        return f"PathElement('{self.uri}', reverse={self.reverse}, type_filter={self.type_filter!r})"

    def serialize(self) -> Union[str, Dict[str, Any]]:
        if not self.reverse and not self.type_filter:
            return self.uri
        d: Dict[str, Any] = {"@id": self.uri}
        if self.type_filter:
            if isinstance(self.type_filter, (list, tuple)):
                d["typeFilter"] = [{"@id": t} for t in self.type_filter]
            else:
                d["typeFilter"] = {"@id": self.type_filter}
        if self.reverse:
            d["reverse"] = True
        return d


[docs] class Filter: """ A filter for querying Knowledge Graph nodes. Args: operation (str): The operation for the filter. Options are: ("CONTAINS", "EQUALS", "IS_EMPTY", "STARTS_WITH", "ENDS_WITH", "REGEX") parameter (str, optional): A parameter name for the filter. value (str, optional): The value to filter on. Methods: serialize: Returns a dictionary containing the serialized filter. """ def __init__(self, operation: str, parameter: Optional[str] = None, value: Optional[str] = None): self.operation = operation self.parameter = parameter self.value = value def __repr__(self): repr = f"Filter(operation='{self.operation}'" if self.parameter: repr += f", parameter='{self.parameter}'" if self.value: repr += f", value='{self.value}'" return repr + ")"
[docs] def serialize(self): data = {"op": self.operation} if self.parameter: data["parameter"] = self.parameter if self.value: data["value"] = self.value return data
[docs] class QueryProperty: """ A property for a Knowledge Graph query. Args: path (URI): The path of the property as a URI. name (str, optional): The name of the property to be used in the returned results. filter (Filter, optional): A filter based on the property. sorted (bool, optional): Whether to sort the results based on the property. Defaults to False. required (bool, optional): Whether the property is required. Defaults to False. ensure_order (bool, optional): Whether to ensure the ordering of results is maintained. Defaults to False. properties (List[QueryProperty], optional): A list of sub-properties. type_filter (URI, optional): Ensure that only objects that match the given type URI are returned. reverse (bool, optional): Whether the link defined by the path should be followed in the reverse direction. Defaults to False. expect_single (bool, optional): Whether to expect a single element in the result. Defaults to False. Methods: add_property: Adds a sub-property to the QueryProperty object. serialize: Returns a dictionary containing the serialized QueryProperty. Example: >>> p = QueryProperty( ... "https://openminds.ebrains.eu/vocab/fullName", ... name="full_name", ... filter=Filter("CONTAINS", parameter="name"), ... sorted=True, ... required=True ... ) """ def __init__( self, path: Union[str, List[Union[str, PathElement]]], name: Optional[str] = None, filter: Optional[Filter] = None, sorted: bool = False, required: bool = False, ensure_order: bool = False, properties: Optional[List[QueryProperty]] = None, type_filter: Optional[Union[str, List[str]]] = None, reverse: bool = False, expect_single: bool = False, ): self.path = path self.name = name self.filter = filter self.sorted = sorted self.required = required self.ensure_order = ensure_order self.properties = properties or [] self.type_filter = type_filter self.reverse = reverse self.expect_single = expect_single # Normalize path to a list of PathElement for clean serialization if isinstance(path, str): self._path_elements = [PathElement(path, reverse=reverse, type_filter=type_filter)] else: has_path_elements = any(isinstance(p, PathElement) for p in path) if has_path_elements and (reverse or type_filter is not None): raise ValueError( "Cannot use top-level 'reverse' or 'type_filter' when path contains PathElement objects; " "set those on the PathElement directly." ) normalized = [] for i, p in enumerate(path): if isinstance(p, PathElement): normalized.append(p) elif i == 0 and not has_path_elements: # backwards compat: top-level reverse/type_filter apply to first element normalized.append(PathElement(p, reverse=reverse, type_filter=type_filter)) else: normalized.append(PathElement(p)) self._path_elements = normalized for prop in self.properties: if prop.sorted: raise ValueError("Sorting is only allowed on the root level of a query.") def __repr__(self): return f"QueryProperty({self.path}, name={self.name}, reverse={self.reverse})"
[docs] def add_property(self, prop: QueryProperty): assert isinstance(prop, QueryProperty) if prop.sorted: raise ValueError("Sorting is only allowed on the root level of a query.") self.properties.append(prop)
[docs] def serialize(self) -> Dict[str, Any]: serialized_elements = [pe.serialize() for pe in self._path_elements] if isinstance(self.path, str): data: Dict[str, Any] = {"path": serialized_elements[0]} else: data: Dict[str, Any] = {"path": serialized_elements} if self.name: data["propertyName"] = self.name if self.filter: data["filter"] = self.filter.serialize() if self.sorted: data["sort"] = True if self.required: data["required"] = True if self.ensure_order: data["ensureOrder"] = True if self.properties: data["structure"] = [prop.serialize() for prop in self.properties] if self.expect_single: data["singleValue"] = "FIRST" return data
[docs] class Query: """ A Python representation of an EBRAINS Knowledge Graph query, which can be serialized to the JSON-LD used by the kg-core query API. Args: node_type (URI): The URI of the node type to query. label (str, optional): A label for this query. space (Optional[str], optional): The KG space to query. properties (Optional[List[QueryProperty]], optional): A list of QueryProperty objects representing the properties to include in the results. Methods: add_property: Adds a QueryProperty object to the list of properties to include. serialize: Returns a JSON-LD representation of the query, suitable for sending to the KG. Example: >>> q = Query( ... node_type="https://openminds.ebrains.eu/core/ModelVersion", ... label="fg-testing-modelversion", ... space="model", ... properties=[ ... QueryProperty("@type"), ... QueryProperty( ... "https://openminds.ebrains.eu/vocab/fullName", ... name="vocab:fullName", ... filter=Filter("CONTAINS", parameter="name"), ... sorted=True, ... required=True, ... ), ... QueryProperty( ... "https://openminds.ebrains.eu/vocab/versionIdentifier", ... name="vocab:versionIdentifier", ... filter=Filter("EQUALS", parameter="version"), ... required=True, ... ), ... QueryProperty( ... "https://openminds.ebrains.eu/vocab/format", ... name="vocab:format", ... ensure_order=True, ... properties=[ ... QueryProperty("@id", filter=Filter("EQUALS", parameter="format")), ... QueryProperty("@type"), ... ], ... ) ... ) """ def __init__( self, node_type: str, label: Optional[str] = None, space: Optional[str] = None, properties: Optional[List[QueryProperty]] = None, ): self.node_type = node_type self.label = label self.space = space self.properties = [QueryProperty("@id", filter=Filter("EQUALS", parameter="id"))] if properties: self.properties.extend(properties) if space: found = False for property in self.properties: if property.path == "https://core.kg.ebrains.eu/vocab/meta/space": property.filter = Filter("EQUALS", value=self.space) found = True if not found: self.properties.append( QueryProperty( "https://core.kg.ebrains.eu/vocab/meta/space", name="query:space", filter=Filter("EQUALS", value=self.space), ) )
[docs] def add_property(self, prop: QueryProperty): assert isinstance(prop, QueryProperty) self.properties.append(prop)
[docs] def serialize(self) -> Dict[str, Any]: query = { "@context": { "@vocab": "https://core.kg.ebrains.eu/vocab/query/", "query": "https://schema.hbp.eu/myQuery/", "propertyName": {"@id": "propertyName", "@type": "@id"}, "merge": {"@type": "@id", "@id": "merge"}, "path": {"@id": "path", "@type": "@id"}, }, "meta": { "type": self.node_type, "description": "Automatically generated by fairgraph", }, "structure": [prop.serialize() for prop in self.properties], } if self.label: query["meta"]["name"] = self.label return query
# todo: I think only one property can have "sort": True - need to check this def _get_query_property_name(property, possible_classes): if isinstance(property.path, str): property_name = property.path else: assert isinstance(property.path, list) found_match = False for cls in possible_classes: for path in property.path: for prop in cls.properties: if path == prop.path: property_name = path found_match = True break if found_match: break if found_match: break assert found_match return property_name def get_query_properties( property, context, follow_links: Optional[Dict[str, Any]] = None, with_reverse_properties: Optional[bool] = False ) -> List[QueryProperty]: """ Generate one or more QueryProperty instances for this property, for use in constructing a KG query definition. """ expanded_path = expand_uri(property.path, context) properties = [] if any(issubclass(_type, EmbeddedMetadata) for _type in property.types): if not all(issubclass(_type, EmbeddedMetadata) for _type in property.types): warn(f"Mixed types in {property}") return properties for cls in property.types: if len(property.types) > 1: property_name = f"{property.path}__{cls.__name__}" assert isinstance(cls.type_, str) type_filter = cls.type_ else: property_name = property.path type_filter = None properties.append( QueryProperty( expanded_path, name=property_name, reverse=bool(property.reverse), type_filter=type_filter, ensure_order=property.multiple, expect_single=property.is_link and not property.multiple, properties=cls.generate_query_properties(follow_links, with_reverse_properties), ) ) elif any(issubclass(_type, LinkedMetadata) for _type in property.types): assert all(issubclass(_type, LinkedMetadata) for _type in property.types) if follow_links is not None: for cls in property.types: property_name = _get_query_property_name(property, possible_classes=[cls]) if len(property.types) > 1: property_name = f"{property_name}__{cls.__name__}" assert isinstance(cls.type_, str) type_filter = cls.type_ else: type_filter = None properties.append( QueryProperty( expanded_path, name=property_name, reverse=bool(property.reverse), type_filter=type_filter, ensure_order=property.multiple, expect_single=property.is_link and not property.multiple, properties=[ QueryProperty("@id"), *cls.generate_query_properties(follow_links, with_reverse_properties), ], ) ) else: if isinstance(property.path, str): property_name = property.path properties.append( QueryProperty( expanded_path, name=property_name, reverse=bool(property.reverse), type_filter=None, ensure_order=property.multiple, expect_single=property.is_link and not property.multiple, properties=[ QueryProperty("@id"), QueryProperty("@type"), ], ) ) else: assert isinstance(property.path, list) logger.warning(f"Cannot yet handle case where property.path is a list: {property}") else: assert not property.reverse properties.append( QueryProperty( expanded_path, name=property.path, reverse=bool(property.reverse), ensure_order=property.multiple, expect_single=property.is_link and not property.multiple, ) ) return properties def get_query_filter_property(property, context, filter: Any) -> QueryProperty: """ Generate a QueryProperty instance containing a filter, for use in constructing a KG query definition. """ assert filter is not None if isinstance(filter, dict): # we pass the filter through to the next level filter_obj = None else: # we have a filter value for this property if property.types[0] in (int, float, bool, datetime, date): op = "EQUALS" else: op = "CONTAINS" filter_obj = Filter(op, value=filter) expanded_path = expand_uri(property.path, context) if any(issubclass(_type, Node) for _type in property.types): assert all(issubclass(_type, Node) for _type in property.types) prop = QueryProperty(expanded_path, name=f"Q{property.name}", required=True, reverse=property.reverse) if filter_obj: if filter_obj.value.startswith("https://kg.ebrains.eu/api/instances"): filter_path = "@id" else: filter_path = "http://schema.org/identifier" prop.properties.append(QueryProperty(filter_path, filter=filter_obj)) else: for cls in property.types: child_properties = cls.generate_query_filter_properties(filter) if child_properties: # if the class has properties with the appropriate name # we add them, then break to avoid adding the same # prop twice prop.properties.extend(child_properties) break else: prop = QueryProperty(expanded_path, name=f"Q{property.name}", filter=filter_obj, required=True) return prop def get_filter_value(property, value: Any) -> Union[str, List[str]]: """ Normalize a value for use in a KG query Example: >>> import fairgraph.openminds.core as omcore >>> person = omcore.Person.from_uuid("045f846f-f010-4db8-97b9-b95b20970bf2", kg_client) >>> prop = Property(name='custodians', types=(omcore.Organization, omcore.Person), ... path="vocab:custodian", multiple=True) >>> get_filter_value(prop, person) https://kg.ebrains.eu/api/instances/045f846f-f010-4db8-97b9-b95b20970bf2 """ from .kgproxy import KGProxy def is_valid(val): if isinstance(val, str): try: val = UUID(val) except ValueError: pass if isinstance(val, str) and IRI in property.types: return True return isinstance(val, (IRI, UUID, *property.types)) or ( isinstance(val, KGProxy) and not set(val.classes).isdisjoint(property.types) ) if isinstance(value, list) and len(value) > 0: valid_type = all(is_valid(item) for item in value) have_multiple = True else: valid_type = is_valid(value) have_multiple = False if not valid_type: if property.name == "hash": # bit of a hack filter_value = value elif isinstance(value, str) and value.startswith("http"): # for @id filter_value = value else: raise TypeError("{} must be of type {}, not {}".format(property.name, property.types, type(value))) filter_items = [] for item in as_list(value): if isinstance(item, IRI): filter_item = item.value elif isinstance(item, (date, datetime)): filter_item = item.isoformat() elif hasattr(item, "id"): filter_item = item.id elif isinstance(item, UUID): # todo: consider using client.uri_from_uuid() # would require passing client as arg filter_item = f"https://kg.ebrains.eu/api/instances/{item}" elif isinstance(item, str) and "+" in item: # workaround for KG bug invalid_char_index = item.index("+") if invalid_char_index < 3: raise ValueError(f"Cannot use {item} as filter, contains invalid characters") filter_item = item[:invalid_char_index] warn(f"Truncating filter value {item} --> {filter_item}") else: filter_item = item filter_items.append(filter_item) if have_multiple: return filter_items else: return filter_items[0]