openspg/python/knext/core/schema/schema_ml.py

1237 lines
51 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2023 Ant Group CO., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
import re
from enum import Enum
from knext.core.schema import Schema
from knext.core.schema.model import (
EntityType,
ConceptType,
EventType,
StandardType,
Property,
Relation,
)
from knext.core.schema.model.base import (
HypernymPredicateEnum,
BasicTypeEnum,
ConstraintTypeEnum,
AlterOperationEnum,
SpgTypeEnum,
PropertyGroupEnum,
)
class IndentLevel(Enum):
# Define entity/concept/event/standard types or subtypes
Type = 0
# Define description/properties/relations of type
TypeMeta = 1
# Define property/relation name of type
Predicate = 2
# Define description/constraint/rule of property/relation
PredicateMeta = 3
# Define property about property
SubProperty = 4
# Define constraint of sub property
SubPropertyMeta = 5
class RegisterUnit(Enum):
Type = "type"
Property = "property"
Relation = "relation"
SubProperty = "subProperty"
class SPGSchemaMarkLang:
"""
SPG Schema Mark Language Parser
Feature1: parse schema script and then alter the schema of project
Feature2: export schema script from a project
"""
internal_type = set()
entity_internal_property = set()
event_internal_property = {"eventTime"}
concept_internal_property = {"stdId", "alias"}
keyword_type = {"EntityType", "ConceptType", "EventType", "StandardType"}
semantic_rel = {
"SYNANT": [
"synonym",
"antonym",
"symbolOf",
"distinctFrom",
"definedAs",
"locatedNear",
"similarTo",
"etymologicallyRelatedTo",
],
"CAU": ["leadTo", "causes", "obstructedBy", "createdBy", "causesDesire"],
"SEQ": [
"happenedBefore",
"hasSubevent",
"hasFirstSubevent",
"hasLastSubevent",
"hasPrerequisite",
],
"IND": ["belongTo"],
"INC": ["isPartOf", "hasA", "madeOf", "derivedFrom", "hasContext"],
"USE": ["usedFor", "capableOf", "receivesAction", "motivatedByGoal"],
}
semantic_rel_zh = {
"synonym": "同义",
"antonym": "反义",
"symbolOf": "象征",
"distinctFrom": "区别于",
"definedAs": "定义为",
"locatedNear": "位置临近",
"similarTo": "类似于",
"etymologicallyRelatedTo": "词源相关",
"leadTo": "导致",
"causes": "引起",
"obstructedBy": "受阻于",
"createdBy": "由...创建",
"causesDesire": "引发欲望",
"happenedBefore": "先于...发生",
"hasSubevent": "拥有子事件",
"hasFirstSubevent": "以...作为开始",
"hasLastSubevent": "以...作为结束",
"hasPrerequisite": "有前提条件",
"belongTo": "属于",
"isPartOf": "是...的一部分",
"hasA": "拥有",
"madeOf": "由…制成",
"derivedFrom": "源自于",
"hasContext": "有上下文",
"usedFor": "用于",
"capableOf": "能够",
"receivesAction": "接受动作",
"motivatedByGoal": "目标驱动",
}
parsing_register = {
RegisterUnit.Type: None,
RegisterUnit.Property: None,
RegisterUnit.Relation: None,
RegisterUnit.SubProperty: None,
}
indent_level_pos = [None, None, None, None, None, None]
rule_quote_predicate = None
rule_quote_open = False
current_parsing_level = 0
last_indent_level = 0
namespace = None
types = {}
def __init__(self, filename):
self.schema_file = filename
self.current_line_num = 0
schema = Schema()
thing = schema.query_spg_type("Thing")
for prop in thing.properties:
self.entity_internal_property.add(prop)
self.event_internal_property.add(prop)
self.concept_internal_property.add(prop)
session = schema.create_session()
for type_name in session._spg_types:
spg_type = session.get(type_name)
if session.get(type_name).spg_type_enum in [
SpgTypeEnum.Basic,
SpgTypeEnum.Standard,
]:
self.internal_type.add(spg_type.name)
self.load_script()
def save_register(self, element: RegisterUnit, value):
"""
maintain the session for parsing
"""
self.parsing_register[element] = value
if element == RegisterUnit.Type:
self.parsing_register[RegisterUnit.Property] = None
self.parsing_register[RegisterUnit.Relation] = None
self.parsing_register[RegisterUnit.SubProperty] = None
elif element == RegisterUnit.Property:
self.parsing_register[RegisterUnit.Relation] = None
self.parsing_register[RegisterUnit.SubProperty] = None
elif element == RegisterUnit.Relation:
self.parsing_register[RegisterUnit.Property] = None
self.parsing_register[RegisterUnit.SubProperty] = None
def adjust_parsing_level(self, step):
"""
mark the indent level and clear related session
"""
if step == 0:
self.current_parsing_level = IndentLevel.Type.value
# finish type parsing, clear the field in session
self.save_register(RegisterUnit.Type, None)
return
if step < 0:
self.current_parsing_level = max(0, self.current_parsing_level + step)
if self.current_parsing_level == IndentLevel.PredicateMeta.value:
# finish sub property parsing, clear the field in session
self.save_register(RegisterUnit.SubProperty, None)
elif self.current_parsing_level == IndentLevel.Predicate.value:
# finish predicate parsing, clear the fields in session
if self.parsing_register[RegisterUnit.Property] is not None:
self.save_register(
RegisterUnit.Property,
Property(name="_", object_type_name="Thing"),
)
elif self.parsing_register[RegisterUnit.Relation] is not None:
self.save_register(
RegisterUnit.Relation,
Relation(name="_", object_type_name="Thing"),
)
elif step == 1:
assert self.current_parsing_level + 1 < len(IndentLevel), self.error_msg(
"Invalid indentation (too many levels?)"
)
self.current_parsing_level += 1
def error_msg(self, msg):
return f"Line# {self.current_line_num}: {msg}"
def get_type_name_with_ns(self, type_name: str):
if "." in type_name:
return type_name
else:
return f"{self.namespace}.{type_name}"
def parse_type(self, expression):
"""
parse the SPG type definition
"""
namespace_match = re.match(r"^namespace\s+([a-zA-Z0-9]+)$", expression)
if namespace_match:
assert self.namespace is None, self.error_msg(
"Duplicated namespace define, please ensure define it only once"
)
self.namespace = namespace_match.group(1)
return
type_match = re.match(
r"^([a-zA-Z0-9\.]+)\((\w+)\):\s*?([a-zA-Z0-9,]+)$", expression
)
if type_match:
assert self.namespace is not None, self.error_msg(
"Missing namespace, please define namespace at the first"
)
type_name = type_match.group(1)
type_name_zh = type_match.group(2).strip()
type_class = type_match.group(3).strip()
assert type_class in self.keyword_type, self.error_msg(
f"{type_class} is illegal, please define it before current line"
)
spg_type = None
if type_class == "EntityType":
spg_type = EntityType(
name=self.get_type_name_with_ns(type_name), name_zh=type_name_zh
)
elif type_class == "ConceptType":
spg_type = ConceptType(
name=self.get_type_name_with_ns(type_name),
name_zh=type_name_zh,
hypernym_predicate=HypernymPredicateEnum.IsA,
)
elif type_class == "EventType":
spg_type = EventType(
name=self.get_type_name_with_ns(type_name), name_zh=type_name_zh
)
elif type_class == "StandardType":
spg_type = StandardType(name=f"{type_name}", name_zh=type_name_zh)
spg_type.spreadable = False
assert type_name.startswith("STD."), self.error_msg(
"The name of standard type must start with STD."
)
ns_type_name = self.get_type_name_with_ns(type_name)
assert ns_type_name not in self.types, self.error_msg(
f'Type "{type_name}" is duplicated in the schema'
)
self.types[ns_type_name] = spg_type
self.save_register(RegisterUnit.Type, spg_type)
return
sub_type_match = re.match(
r"^([a-zA-Z0-9]+)\((\w+)\)\s*?->\s*?([a-zA-Z0-9\.]+):$", expression
)
if sub_type_match:
assert self.namespace is not None, self.error_msg(
"Missing namespace, please define namespace at the first"
)
type_name = sub_type_match.group(1)
type_name_zh = sub_type_match.group(2).strip()
type_class = sub_type_match.group(3).strip()
if "." not in type_class:
ns_type_class = self.get_type_name_with_ns(type_class)
else:
ns_type_class = type_class
assert (
type_class not in self.keyword_type
and type_class not in self.internal_type
), self.error_msg(f"{type_class} is not a valid inheritable type")
assert ns_type_class in self.types, self.error_msg(
f"{type_class} not found, please define it first"
)
parent_spg_type = self.types[ns_type_class]
assert parent_spg_type.spg_type_enum in [
SpgTypeEnum.Entity,
SpgTypeEnum.Event,
], self.error_msg(
f'"{type_class}" cannot be inherited, only entity/event type can be inherited.'
)
spg_type = EntityType(
name=f"{self.namespace}.{type_name}", name_zh=type_name_zh
)
if type_class == "EventType":
spg_type = EventType(
name=f"{self.namespace}.{type_name}", name_zh=type_name_zh
)
spg_type.name = type_name
spg_type.name_zh = type_name_zh
spg_type.parent_type_name = ns_type_class
ns_type_name = f"{self.namespace}.{type_name}"
self.types[ns_type_name] = spg_type
self.save_register(RegisterUnit.Type, spg_type)
return
raise Exception(
self.error_msg(
"unrecognized expression, expect namespace A or A(B):C or A(B)->C"
)
)
def parse_type_meta(self, expression):
"""
parse the meta definition of SPG type
"""
match = re.match(
r"^(desc|properties|relations|hypernymPredicate|regular|spreadable|relateTo):\s*?(.*)$",
expression,
)
assert match, self.error_msg(
"Unrecognized expression, expect desc:|properties:|relations:"
)
type_meta = match.group(1)
meta_value = match.group(2).strip()
if type_meta == "desc" and len(meta_value) > 0:
self.parsing_register[RegisterUnit.Type].desc = meta_value
elif type_meta == "properties":
assert (
self.parsing_register[RegisterUnit.Type].spg_type_enum
!= SpgTypeEnum.Concept
), self.error_msg("Concept type does not allow defining properties.")
self.save_register(
RegisterUnit.Property, Property(name="_", object_type_name="Thing")
)
elif type_meta == "relations":
self.save_register(
RegisterUnit.Relation, Relation(name="_", object_type_name="Thing")
)
elif type_meta == "hypernymPredicate":
assert meta_value in ["isA", "locateAt", "mannerOf"], self.error_msg(
"Invalid hypernym predicate, expect isA or locateAt or mannerOf"
)
assert (
self.parsing_register[RegisterUnit.Type].spg_type_enum
== SpgTypeEnum.Concept
), self.error_msg("Hypernym predicate is available for concept type only")
if meta_value == "isA":
self.parsing_register[
RegisterUnit.Type
].hypernym_predicate = HypernymPredicateEnum.IsA
elif meta_value == "isA":
self.parsing_register[
RegisterUnit.Type
].hypernym_predicate = HypernymPredicateEnum.LocateAt
elif meta_value == "mannerOf":
self.parsing_register[
RegisterUnit.Type
].hypernym_predicate = HypernymPredicateEnum.MannerOf
elif type_meta == "regular":
assert (
self.parsing_register[RegisterUnit.Type].spg_type_enum
== SpgTypeEnum.Standard
), self.error_msg("Regular is available for standard type only")
self.parsing_register[RegisterUnit.Type].constraint = {
"REGULAR": meta_value
}
elif type_meta == "spreadable":
assert (
self.parsing_register[RegisterUnit.Type].spg_type_enum
== SpgTypeEnum.Standard
), self.error_msg("Spreadable is available for standard type only")
assert meta_value == "True" or meta_value == "False", self.error_msg(
"Spreadable only accept True or False as its value"
)
self.parsing_register[RegisterUnit.Type].spreadable = meta_value == "True"
elif type_meta == "relateTo":
assert (
self.parsing_register[RegisterUnit.Type].spg_type_enum
== SpgTypeEnum.Concept
), self.error_msg("RelateTo definition is available for concept type only")
concept_types = meta_value.split(",")
for concept in concept_types:
c = self.get_type_name_with_ns(concept.strip())
assert (
c in self.types
and self.types[c].spg_type_enum == SpgTypeEnum.Concept
), self.error_msg(
f"{concept.strip()} is not a concept type, "
f"concept type only allow relationships defined between concept types"
)
for k in self.semantic_rel:
if k == "IND":
continue
for p in self.semantic_rel[k]:
predicate = Relation(
name=p, name_zh=self.semantic_rel_zh[p], object_type_name=c
)
self.parsing_register[RegisterUnit.Type].add_relation(predicate)
return
def check_semantic_relation(self, predicate_name, predicate_class):
"""
Check if the definition of semantic relations is correct
"""
name_arr = predicate_name.split("#")
short_name = name_arr[0]
pred_name = name_arr[1]
assert short_name in self.semantic_rel, self.error_msg(
f"{short_name} is incorrect, expect SYNANT/CAU/SEQ/IND/INC"
)
assert pred_name in self.semantic_rel[short_name], self.error_msg(
f'{pred_name} is incorrect, expect {" / ".join(self.semantic_rel[short_name])}'
)
subject_type = self.parsing_register[RegisterUnit.Type]
predicate_class_ns = predicate_class
if "." not in predicate_class:
predicate_class_ns = f"{self.namespace}.{predicate_class}"
assert predicate_class_ns in self.types, self.error_msg(
f"{predicate_class} is illegal"
)
object_type = self.types[predicate_class_ns]
if short_name == "SYNANT":
assert subject_type.spg_type_enum == SpgTypeEnum.Concept, self.error_msg(
"Only concept types could define synonym/antonym relation"
)
assert subject_type.name == predicate_class_ns, self.error_msg(
"Synonymy/antonym relation should be self-referential"
)
elif short_name == "CAU":
assert subject_type.spg_type_enum in [
SpgTypeEnum.Concept,
SpgTypeEnum.Event,
], self.error_msg("Only concept types could define causal relation")
assert object_type.spg_type_enum in [
SpgTypeEnum.Concept,
SpgTypeEnum.Event,
], self.error_msg(
f'"{predicate_class}" must be a concept type to conform to the definition of causal relation'
)
elif short_name == "SEQ":
assert subject_type.spg_type_enum in [
SpgTypeEnum.Event,
SpgTypeEnum.Concept,
], self.error_msg(
"Only concept/event types could define sequential relation"
)
assert (
subject_type.spg_type_enum == object_type.spg_type_enum
), self.error_msg(
f'"{predicate_class}" should keep the same type with "{subject_type.name.split(".")[1]}"'
)
elif short_name == "IND":
assert subject_type.spg_type_enum in [
SpgTypeEnum.Entity,
SpgTypeEnum.Event,
], self.error_msg("Only entity/event types could define inductive relation")
assert object_type.spg_type_enum == SpgTypeEnum.Concept, self.error_msg(
f'"{predicate_class}" must be a concept type to conform to the definition of inductive relation'
)
elif short_name == "INC":
assert subject_type.spg_type_enum == SpgTypeEnum.Concept, self.error_msg(
"Only concept types could define inclusive relation"
)
assert subject_type.name == predicate_class_ns, self.error_msg(
"Inclusive relation should be self-referential"
)
elif short_name == "USE":
assert subject_type.spg_type_enum == SpgTypeEnum.Concept, self.error_msg(
"Only concept types could define usage relation"
)
def parse_predicate(self, expression):
"""
parse the property/relation definition of SPG type
"""
match = re.match(
r"^([a-zA-Z0-9#]+)\(([\w\.]+)\):\s*?([a-zA-Z0-9,\.]+)$", expression
)
assert match, self.error_msg(
"Unrecognized expression, expect pattern like english(Chinese):Type"
)
predicate_name = match.group(1)
predicate_name_zh = match.group(2).strip()
predicate_class = match.group(3).strip()
cur_type = self.parsing_register[RegisterUnit.Type]
type_name = cur_type.name
if cur_type.spg_type_enum == SpgTypeEnum.Concept:
assert "#" in predicate_name, self.error_msg(
"Concept type only accept following categories of property/relation: #INC/#CAU/#SYNANT/#IND/#USE/#SEQ"
)
if "#" in predicate_name:
self.check_semantic_relation(predicate_name, predicate_class)
predicate_name = predicate_name.split("#")[1]
else:
for semantic_short in self.semantic_rel.values():
assert predicate_name not in semantic_short, self.error_msg(
f"{predicate_name} is a semantic predicate, please add the semantic prefix"
)
assert (
f"{self.namespace}.{predicate_class}" in self.types
or predicate_class in self.internal_type
), self.error_msg(f"{predicate_class} is illegal")
assert predicate_name not in self.entity_internal_property, self.error_msg(
f"property {predicate_name} is the default property of type"
)
if predicate_class not in self.internal_type:
predicate_type = self.types[f"{self.namespace}.{predicate_class}"]
if predicate_type is not None:
if cur_type.spg_type_enum == SpgTypeEnum.Concept:
assert (
predicate_type.spg_type_enum == SpgTypeEnum.Concept
), self.error_msg(
"Concept type only allow relationships that point to themselves"
)
elif cur_type.spg_type_enum == SpgTypeEnum.Entity:
assert (
predicate_type.spg_type_enum != SpgTypeEnum.Event
), self.error_msg(
"Relationships of entity types are not allowed to point to event types; "
"instead, they are only permitted to point from event types to entity types, "
"adhering to the principle of moving from dynamic to static."
)
if self.parsing_register[RegisterUnit.Relation] is not None:
assert (
predicate_name
not in self.parsing_register[RegisterUnit.Relation].sub_properties
), self.error_msg(
f'Property "{predicate_name}" is duplicated under the relation '
f"{self.parsing_register[RegisterUnit.Relation].name}"
)
else:
assert (
predicate_name
not in self.parsing_register[RegisterUnit.Type].properties
), self.error_msg(
f'Property "{predicate_name}" is duplicated under the type {type_name[type_name.index(".") + 1:]}'
)
if predicate_class == "ConceptType":
assert not self.is_internal_property(
predicate_name, SpgTypeEnum.Concept
), self.error_msg(
f"property {predicate_name} is the default property of ConceptType"
)
if predicate_class == "EventType":
assert not self.is_internal_property(
predicate_name, SpgTypeEnum.Event
), self.error_msg(
f"property {predicate_name} is the default property of EventType"
)
if (
"." not in predicate_class
and predicate_class not in BasicTypeEnum.__members__
):
predicate_class = f"{self.namespace}.{predicate_class}"
if self.parsing_register[RegisterUnit.SubProperty]:
# predicate is sub property
predicate = Property(name=predicate_name, object_type_name=predicate_class)
if self.parsing_register[RegisterUnit.Property] is not None:
self.parsing_register[RegisterUnit.Property].add_sub_property(predicate)
elif self.parsing_register[RegisterUnit.Relation] is not None:
self.parsing_register[RegisterUnit.Relation].add_sub_property(predicate)
self.save_register(RegisterUnit.SubProperty, predicate)
elif self.parsing_register[RegisterUnit.Property]:
# predicate is property
predicate = Property(name=predicate_name, object_type_name=predicate_class)
if (
self.parsing_register[RegisterUnit.Type].spg_type_enum
== SpgTypeEnum.Event
and predicate_name == "subject"
):
assert predicate_class not in self.internal_type, self.error_msg(
f"The subject of event type only allows entity/concept type"
)
predicate.property_group = PropertyGroupEnum.Subject
if "," in predicate_class:
# multi-types for subject
predicate.object_type_name = "Text"
subject_types = predicate_class.split(",")
for subject_type in subject_types:
subject_type = subject_type.strip()
assert (
subject_type not in BasicTypeEnum.__members__
), self.error_msg(
f"{predicate_class} is illegal for subject in event type"
)
if "." not in subject_type:
subject_type = f"{self.namespace}.{predicate_class}"
assert subject_type in self.types, self.error_msg(
f"{predicate_class} is illegal"
)
subject_predicate = Property(
name=f"subject{subject_type}", object_type_name=subject_type
)
subject_predicate.property_group = PropertyGroupEnum.Subject
self.parsing_register[RegisterUnit.Type].add_property(
subject_predicate
)
self.parsing_register[RegisterUnit.Type].add_property(predicate)
self.save_register(RegisterUnit.Property, predicate)
else:
# predicate is relation
assert predicate_class in self.types, self.error_msg(
f"{predicate_class} is illegal"
)
assert (
f"{predicate_name}_{predicate_class}"
not in self.parsing_register[RegisterUnit.Type].relations
), self.error_msg(
f'Relation "{match.group()}" is duplicated under the type {type_name[type_name.index(".") + 1:]}'
if self.parsing_register[RegisterUnit.Type].spg_type_enum
!= SpgTypeEnum.Concept
else f'Relation "{match.group()}" is already defined by keyword relateTo '
f'under the {type_name[type_name.index(".") + 1:]}'
)
predicate = Relation(name=predicate_name, object_type_name=predicate_class)
self.parsing_register[RegisterUnit.Type].add_relation(predicate)
self.save_register(RegisterUnit.Relation, predicate)
predicate.name_zh = predicate_name_zh
def parse_property_meta(self, expression):
"""
parse the property meta definition of SPG type
"""
match = re.match(r"^(desc|properties|constraint|rule):\s*?(.*)$", expression)
assert match, self.error_msg(
"Unrecognized expression, expect desc:|properties:|constraint:|rule:"
)
property_meta = match.group(1)
meta_value = match.group(2)
if property_meta == "desc" and len(meta_value) > 0:
if self.parsing_register[RegisterUnit.SubProperty] is not None:
self.parsing_register[RegisterUnit.SubProperty].desc = meta_value
elif self.parsing_register[RegisterUnit.Property] is not None:
self.parsing_register[RegisterUnit.Property].desc = meta_value
elif property_meta == "constraint":
if self.parsing_register[RegisterUnit.SubProperty] is not None:
self.parse_constraint_for_property(
meta_value, self.parsing_register[RegisterUnit.SubProperty]
)
elif self.parsing_register[RegisterUnit.Property] is not None:
self.parse_constraint_for_property(
meta_value, self.parsing_register[RegisterUnit.Property]
)
elif property_meta == "properties":
self.save_register(
RegisterUnit.SubProperty, Property(name="_", object_type_name="Thing")
)
elif property_meta == "rule":
self.parse_predicate_rule(meta_value.lstrip(), RegisterUnit.Property)
def parse_relation_meta(self, expression):
"""
parse the relation meta definition of SPG type
"""
match = re.match(r"^(desc|properties|rule):\s*?(.*)$", expression)
assert match, self.error_msg(
"Unrecognized expression, expect desc:|properties:|rule:"
)
property_meta = match.group(1)
meta_value = match.group(2)
if property_meta == "desc" and len(meta_value) > 0:
self.parsing_register[RegisterUnit.Relation].desc = meta_value
elif property_meta == "properties":
self.save_register(
RegisterUnit.SubProperty, Property(name="_", object_type_name="Thing")
)
elif property_meta == "rule":
self.parse_predicate_rule(meta_value.lstrip(), RegisterUnit.Relation)
def parsing_dispatch(self, expression, parsing_level):
if parsing_level == IndentLevel.Type.value:
self.parse_type(expression)
elif parsing_level == IndentLevel.TypeMeta.value:
self.parse_type_meta(expression)
elif parsing_level == IndentLevel.Predicate.value:
self.parse_predicate(expression)
elif parsing_level == IndentLevel.PredicateMeta.value:
if self.parsing_register[RegisterUnit.Property] is not None:
self.parse_property_meta(expression)
else:
self.parse_relation_meta(expression)
elif parsing_level == IndentLevel.SubProperty.value:
self.parse_predicate(expression)
elif parsing_level == IndentLevel.SubPropertyMeta.value:
self.parse_property_meta(expression)
def parse_predicate_rule(self, rule, key):
"""
parse the logic rule for property/relation
"""
strip_rule = rule
if strip_rule.startswith("[["):
self.rule_quote_predicate = self.parsing_register[key]
self.rule_quote_open = True
if len(strip_rule) > 2:
self.rule_quote_predicate.logical_rule = strip_rule[2].lstrip()
else:
self.rule_quote_predicate.logical_rule = ""
else:
self.parsing_register[key].logical_rule = rule
def parse_constraint_for_property(self, expression, prop):
"""
parse the constraint definition of property
"""
if len(expression) == 0:
return
pattern = re.compile(r"(Enum|Regular)\s*?=\s*?\"([^\"]+)\"", re.IGNORECASE)
matches = re.findall(pattern, expression)
if matches:
for group in matches:
if group[0].lower() == "enum":
enum_values = group[1].split(",")
strip_enum_values = list()
for ev in enum_values:
strip_enum_values.append(ev.strip())
prop.add_constraint(ConstraintTypeEnum.Enum, strip_enum_values)
elif group[0].lower() == "regular":
prop.add_constraint(ConstraintTypeEnum.Regular, group[1])
expression = re.sub(r"(Enum|Regular)\s*?=\s*?\"([^\"]+)\"", "", expression)
array = expression.split(",")
for cons in array:
cons = cons.strip()
if cons.lower() == "multivalue":
prop.add_constraint(ConstraintTypeEnum.MultiValue)
elif cons.lower() == "notnull":
prop.add_constraint(ConstraintTypeEnum.NotNull)
def complete_rule(self, rule):
"""
Auto generate define statement and append namespace to the entity name
"""
pattern = re.compile(r"Define\s*\(", re.IGNORECASE)
match = pattern.match(rule.strip())
if not match:
subject_name = self.parsing_register[RegisterUnit.Type].name
predicate = None
if self.parsing_register[RegisterUnit.Property] is not None:
predicate = self.parsing_register[RegisterUnit.Property]
elif self.parsing_register[RegisterUnit.Relation] is not None:
predicate = self.parsing_register[RegisterUnit.Relation]
head = (
f"Define (s:{subject_name})-[p:{predicate.name}]->(o:{predicate.object_type_name})"
+ " {\n"
)
rule = head + rule
rule += "\n}"
pattern = re.compile(r"\(([\w\s]*?:)([\w\s\.]+)\)", re.IGNORECASE)
matches = re.findall(pattern, rule)
replace_list = []
if matches:
for group in matches:
if "." in group[1] or group[1].lower() in ["integer", "text", "float"]:
continue
replace_list.append(
(
f"({group[0]}{group[1]})",
f"({group[0]}{self.namespace}.{group[1].strip()})",
)
)
if len(replace_list) > 0:
for t in replace_list:
rule = rule.replace(t[0], t[1])
return rule.strip()
def load_script(self):
"""
Load and then parse the script file
"""
file = open(self.schema_file, "r", encoding="utf-8")
lines = file.read().splitlines()
for line in lines:
self.current_line_num += 1
strip_line = line.strip()
# replace tabs with two spaces
line = line.replace("\t", " ")
if strip_line == "" or strip_line.startswith("#"):
# skip empty or comments line
continue
if self.rule_quote_open:
# process the multi-line assignment [[ .... ]]
right_strip_line = line.rstrip()
if strip_line.endswith("]]"):
self.rule_quote_open = False
if len(right_strip_line) > 2:
self.rule_quote_predicate.logical_rule += right_strip_line[
: len(right_strip_line) - 2
]
self.rule_quote_predicate.logical_rule = self.complete_rule(
self.rule_quote_predicate.logical_rule
)
else:
self.rule_quote_predicate.logical_rule += line + "\n"
continue
indent_count = len(line) - len(line.lstrip())
if indent_count == 0:
# the line without indent is namespace definition or a type definition
self.adjust_parsing_level(0)
elif indent_count > self.last_indent_level:
# the line is the sub definition of the previous line
self.adjust_parsing_level(1)
elif indent_count < self.last_indent_level:
# finish current indent parsing
backward_step = None
for i in range(0, len(self.indent_level_pos)):
if indent_count == self.indent_level_pos[i]:
backward_step = i - self.current_parsing_level
break
assert backward_step, self.error_msg(
f"Invalid indentation, please align with the previous definition"
)
if backward_step != 0:
self.adjust_parsing_level(backward_step)
self.parsing_dispatch(strip_line, self.current_parsing_level)
self.last_indent_level = indent_count
self.indent_level_pos[self.current_parsing_level] = indent_count
def is_internal_property(self, prop: Property, spg_type: SpgTypeEnum):
if spg_type == SpgTypeEnum.Entity or spg_type == SpgTypeEnum.Standard:
return prop in self.entity_internal_property
elif spg_type == SpgTypeEnum.Concept:
return prop in self.concept_internal_property
elif spg_type == SpgTypeEnum.Event:
return prop in self.event_internal_property
def sync_schema(self):
return self.diff_and_sync(False)
def print_diff(self):
self.diff_and_sync(True)
def diff_sub_property(self, new, old, old_type_name, old_property, new_property):
need_update = False
inherited_type = self.get_inherited_type(old_type_name)
for prop in old:
if not old_property.inherited and prop not in new:
assert inherited_type is None, self.error_msg(
f'"{old_type_name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
old[prop].alter_operation = AlterOperationEnum.Delete
need_update = True
print(
f"Delete sub property: [{old_type_name}] {old_property.name}.{prop}"
)
for prop, o in new.items():
if prop not in old and not new_property.inherited:
assert inherited_type is None, self.error_msg(
f'"{old_type_name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
old_property.add_sub_property(new[prop])
need_update = True
print(
f"Create sub property: [{old_type_name}] {old_property.name}.{prop}"
)
elif old[prop].object_type_name != new[prop].object_type_name:
assert inherited_type is None, self.error_msg(
f'"{old_type_name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
assert not old_property.inherited, self.error_msg(
f"{old_type_name}] {old_property.name}.{prop} is inherited sub property, deny modify"
)
old[prop].alter_operation = AlterOperationEnum.Delete
old_property.add_sub_property(new[prop])
need_update = True
print(
f"Recreate sub property: [{old_type_name}] {old_property.name}.{prop}"
)
elif old[prop] != new[prop]:
assert inherited_type is None, self.error_msg(
f'"{old_type_name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
assert not old_property.inherited, self.error_msg(
f"{old_type_name}] {old_property.name}.{prop} is inherited property, deny modify"
)
old[prop].overwritten_by(o)
old[prop].alter_operation = AlterOperationEnum.Update
need_update = True
print(f"Update property: [{old_type_name}] {old_property.name}.{prop}")
return need_update
def get_inherited_type(self, type_name):
for spg_type in self.types:
if self.types[spg_type].parent_type_name == type_name:
return spg_type
return None
def diff_and_sync(self, print_only):
"""
Get the schema diff and then sync to graph storage
"""
schema = Schema()
session = schema.create_session()
# generate the delete list of spg type
for spg_type in session._spg_types:
if spg_type in self.internal_type:
continue
if spg_type not in self.types:
session.delete_type(session.get(spg_type))
print(f"Delete type: {spg_type}")
for spg_type in self.types:
# generate the creation list of spg type
if spg_type not in session._spg_types:
session.create_type(self.types[spg_type])
print(f"Create type: {spg_type}")
relations = self.types[spg_type].relations
if len(relations) > 0:
for rel in relations:
print(f'Create relation: [{spg_type}] {rel.split("_")[0]}')
else:
# generate the update list
new_type = self.types[spg_type]
old_type = session.get(spg_type)
assert (
new_type.spg_type_enum == old_type.spg_type_enum
and new_type.parent_type_name == old_type.parent_type_name
), self.error_msg(
f"Cannot alter the type definition or its parent type of {new_type.name}. "
"if you still want to make change, please delete it first then re-create it."
)
need_update = False
if new_type.desc != old_type.desc:
old_type.desc = new_type.desc
need_update = True
if new_type.name_zh != old_type.name_zh:
old_type.name_zh = new_type.name_zh
need_update = True
if new_type.spg_type_enum == SpgTypeEnum.Concept:
assert (
new_type.hypernym_predicate == old_type.hypernym_predicate
), self.error_msg(
f"Cannot alter the hypernym predicate of {new_type.name}. "
"if you still want to make change, please delete it first then re-create it."
)
if new_type.spg_type_enum == SpgTypeEnum.Standard:
assert old_type.spreadable == new_type.spreadable, self.error_msg(
f"Cannot alter the spreadable value of {new_type.name}. "
f"if you still want to make change, "
"please delete the definition first and then re-create it."
)
if old_type.constraint != new_type.constraint:
old_type.constraint = new_type.constraint
need_update = True
print(f"Update standard type constraint: {spg_type}")
for prop in old_type.properties:
if (
not old_type.properties[prop].inherited
and prop not in new_type.properties
and not self.is_internal_property(prop, new_type.spg_type_enum)
):
assert (
prop != "subject"
and old_type.properties[prop].property_group
!= PropertyGroupEnum.Subject
), self.error_msg(
"The subject property of event type cannot be deleted"
)
old_type.properties[
prop
].alter_operation = AlterOperationEnum.Delete
need_update = True
print(f"Delete property: [{new_type.name}] {prop}")
for prop, o in new_type.properties.items():
inherited_type = self.get_inherited_type(new_type.name)
if (
prop not in old_type.properties
and not self.is_internal_property(prop, new_type.spg_type_enum)
and not o.inherited
):
assert inherited_type is None, self.error_msg(
f'"{new_type.name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
old_type.add_property(new_type.properties[prop])
need_update = True
print(f"Create property: [{new_type.name}] {prop}")
elif (
old_type.properties[prop].object_type_name
!= new_type.properties[prop].object_type_name
):
assert inherited_type is None, self.error_msg(
f'"{new_type.name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
assert not old_type.properties[prop].inherited, self.error_msg(
f"{new_type.name}] {prop} is inherited property, deny modify"
)
old_type.properties[
prop
].alter_operation = AlterOperationEnum.Delete
old_type.add_property(new_type.properties[prop])
need_update = True
print(f"Recreate property: [{new_type.name}] {prop}")
elif (
old_type.properties[prop].sub_properties
!= new_type.properties[prop].sub_properties
):
need_update = self.diff_sub_property(
new_type.properties[prop].sub_properties,
old_type.properties[prop].sub_properties,
old_type.name,
old_type.properties[prop],
new_type.properties[prop],
)
if need_update:
old_type.properties[
prop
].alter_operation = AlterOperationEnum.Update
elif old_type.properties[prop] != new_type.properties[prop]:
assert inherited_type is None, self.error_msg(
f'"{new_type.name} was inherited by other type, such as "{inherited_type}". Prohibit property alteration!'
)
assert not old_type.properties[prop].inherited, self.error_msg(
f"{new_type.name}] {prop} is inherited property, deny modify"
)
old_type.properties[prop].overwritten_by(o)
old_type.properties[
prop
].alter_operation = AlterOperationEnum.Update
need_update = True
print(f"Update property: [{new_type.name}] {prop}")
for relation in new_type.relations:
p_name = relation.split("_")[0]
if (
relation not in old_type.relations
or old_type.relations[relation].object_type_name
!= new_type.relations[relation].object_type_name
):
old_type.add_relation(new_type.relations[relation])
need_update = True
print(f"Create relation: [{new_type.name}] {p_name}")
elif (
old_type.relations[relation].sub_properties
!= new_type.relations[relation].sub_properties
):
need_update = self.diff_sub_property(
new_type.relations[relation].sub_properties,
old_type.relations[relation].sub_properties,
old_type.name,
old_type.relations[relation],
new_type.relations[relation],
)
if need_update:
old_type.relations[
relation
].alter_operation = AlterOperationEnum.Update
elif old_type.relations[relation] != new_type.relations[relation]:
assert not old_type.relations[
relation
].inherited, self.error_msg(
f"{new_type.name}] {p_name} is inherited relation, deny modify"
)
old_type.relations[relation].overwritten_by(
new_type.relations[relation]
)
old_type.relations[
relation
].alter_operation = AlterOperationEnum.Update
need_update = True
print(f"Update relation: [{new_type.name}] {relation}")
for relation, o in old_type.relations.items():
p_name = relation.split("_")[0]
if o.inherited or p_name in new_type.properties or o.is_dynamic:
# skip the inherited and semantic relation
continue
if (
relation not in new_type.relations
and not o.inherited
and not o.is_dynamic
and not (
new_type.spg_type_enum == SpgTypeEnum.Concept
and p_name in ["isA", "locateAt"]
)
):
old_type.relations[
relation
].alter_operation = AlterOperationEnum.Delete
need_update = True
print(f"Delete relation: [{new_type.name}] {p_name}")
if need_update:
session.update_type(old_type)
if not print_only:
session.commit()
if session._alter_spg_types:
return True
return False
def export_schema_python(self, filename):
"""
Export the schema helper class in python
You can import the exported class in your code to obtain the code prompt in IDE
"""
schema = Schema()
session = schema.create_session()
assert len(self.namespace) > 0, "Schema is invalid"
inner = ""
content = "# ATTENTION!\n"
content += "# This file is generated by Schema automatically, "
content += "it will be refreshed after schema has been committed\n"
content += "# PLEASE DO NOT MODIFY THIS FILE!!!\n#\n\n"
content += f"class {self.namespace}:\n"
content += "\tdef __init__(self):\n"
for spg_type in sorted(session._spg_types):
if spg_type.startswith("STD.") or spg_type in self.internal_type:
continue
type_name = spg_type.split(".")[1]
content += f"\t\tself.{type_name} = self.{type_name}()\n"
inner += f"\n\tclass {type_name}:\n"
inner += f'\t\t__typename__ = "{spg_type}"\n'
for prop in session.get(spg_type).properties:
inner += f'\t\t{prop} = "{prop}"\n'
inner += "\n\t\tdef __init__(self):\n\t\t\tpass\n"
content += inner + "\n"
with open(filename, "w") as f:
f.write(content)