From 57f6abdd9cd7e62dac35e5dfae4279ea7c98b77c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E5=9F=B9=E9=BE=99?= Date: Wed, 17 Apr 2024 15:21:35 +0800 Subject: [PATCH] feat(reasoner): support re match udf (#203) --- .../parser/pattern/PatternParser.scala | 13 +++++- .../parser/pattern/PatternParserTest.scala | 30 +++++++++++++ .../reasoner/udf/builtin/udf/ReMatch.java | 42 +++++++++++++++++++ .../openspg/reasoner/udf/test/UdfTest.java | 10 +++++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 reasoner/udf/src/main/java/com/antgroup/openspg/reasoner/udf/builtin/udf/ReMatch.java diff --git a/reasoner/kgdsl-parser/src/main/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParser.scala b/reasoner/kgdsl-parser/src/main/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParser.scala index 61ab7c39..f386059a 100644 --- a/reasoner/kgdsl-parser/src/main/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParser.scala +++ b/reasoner/kgdsl-parser/src/main/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParser.scala @@ -943,7 +943,7 @@ class PatternParser extends Serializable { def parseLabelName(ctx: Label_nameContext): LabelType = { ctx.getChild(0) match { - case c: Entity_typeContext => EntityLabelType(c.getText) + case c: Entity_typeContext => parseEntityType(c) case c: Concept_nameContext => ConceptLabelType( parseIdentifier(c.meta_concept_type().identifier()), @@ -951,6 +951,17 @@ class PatternParser extends Serializable { } } + def parseEntityType(ctx: Entity_typeContext): LabelType = { + val name = ctx.getChild(0) match { + case c: IdentifierContext => parseIdentifier(c) + case c: Prefix_nameContext => + parseIdentifier(c.identifier(0)) + + c.period().getText + + parseIdentifier(c.identifier(1)) + } + EntityLabelType(name) + } + def parseIdentifier(ctx: IdentifierContext): String = { ctx.oC_SymbolicName().EscapedSymbolicName() match { case null => ctx.oC_SymbolicName().getText diff --git a/reasoner/kgdsl-parser/src/test/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParserTest.scala b/reasoner/kgdsl-parser/src/test/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParserTest.scala index ce4960f2..ef06cee1 100644 --- a/reasoner/kgdsl-parser/src/test/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParserTest.scala +++ b/reasoner/kgdsl-parser/src/test/scala/com/antgroup/openspg/reasoner/parser/pattern/PatternParserTest.scala @@ -19,6 +19,36 @@ import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers.{convertToAnyShouldWrapper, equal} class PatternParserTest extends AnyFunSpec { + it("test key word") { + val s = + """ + |GraphStructure { + | A [`Action`] + | B [Finish] + | A->B [endAction] + |} + |Rule { + |} + |Action { + | get(A.name,B.name) + |} + |""".stripMargin + val parser = new LexerInit().initKGReasonerParser(s) + val patternParser = new PatternParser() + val block = patternParser.parseGraphStructureDefine( + parser + .kg_dsl() + .base_job() + .kgdsl_old_define() + .the_graph_structure() + .graph_structure_define()) + print(block.pretty) + block.isInstanceOf[MatchBlock] should equal(true) + val aNode = block.asInstanceOf[MatchBlock] + .patterns("unresolved_default_path").graphPattern.getNode("A") + aNode == null should equal(false) + aNode.typeNames.contains("Action") should equal(true) + } it("old") { val s = """ diff --git a/reasoner/udf/src/main/java/com/antgroup/openspg/reasoner/udf/builtin/udf/ReMatch.java b/reasoner/udf/src/main/java/com/antgroup/openspg/reasoner/udf/builtin/udf/ReMatch.java new file mode 100644 index 00000000..72103b4e --- /dev/null +++ b/reasoner/udf/src/main/java/com/antgroup/openspg/reasoner/udf/builtin/udf/ReMatch.java @@ -0,0 +1,42 @@ +/* + * Copyright 2023 OpenSPG Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. + */ + +package com.antgroup.openspg.reasoner.udf.builtin.udf; + +import com.antgroup.openspg.reasoner.udf.model.UdfDefine; +import com.antgroup.openspg.reasoner.udf.model.UdfOperatorTypeEnum; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class ReMatch { + + /** + * regex like match + * + * @param inputStr + * @param regex + * @return + */ + @UdfDefine(name = "regex_match", udfType = UdfOperatorTypeEnum.OPERATOR) + public String regexMatch(String inputStr, String regex) { + if (inputStr == null) { + return null; + } + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(inputStr); + if (matcher.find()) { + return matcher.group(); + } + return null; + } +} diff --git a/reasoner/udf/src/test/java/com/antgroup/openspg/reasoner/udf/test/UdfTest.java b/reasoner/udf/src/test/java/com/antgroup/openspg/reasoner/udf/test/UdfTest.java index 17bc9179..a329f2bf 100644 --- a/reasoner/udf/src/test/java/com/antgroup/openspg/reasoner/udf/test/UdfTest.java +++ b/reasoner/udf/src/test/java/com/antgroup/openspg/reasoner/udf/test/UdfTest.java @@ -54,6 +54,16 @@ public class UdfTest { DateUtils.timeZone = TimeZone.getTimeZone("Asia/Shanghai"); } + @Test + public void testReMatch() { + UdfMng mng = UdfMngFactory.getUdfMng(); + IUdfMeta udfMeta = + mng.getUdfMeta("regex_match", Lists.newArrayList(KTString$.MODULE$, KTString$.MODULE$)); + Object rst = + udfMeta.invoke("Hello, my email address is example@example.com", "\\b\\w+@\\w+\\.\\w+\\b"); + Assert.assertEquals(rst, "example@example.com"); + } + @Test public void testJsonGet() { UdfMng mng = UdfMngFactory.getUdfMng();