datahub/docs/demo/graph-onboarding.md
Mars Lan adf975ee18
build: build GitHub Pages from /docs directory (#1752)
- Move non-README top-level MD files to /docs
- Update all absolute links to files in /docs to relative links
- Add a placeholder front page for GitHub Pages
2020-07-26 10:41:32 -07:00

7.4 KiB

Onboarding to GMA Graph - Adding a new relationship type

Steps for this already detailed in How to onboard to GMA graph?

For this exercise, we'll add a new relationship type FollowedBy which is extracted out of Follow aspect. For that, we first need to add Follow aspect.

1. Onboard Follow aspect

Referring to How to add a new metadata aspect?

1.1 Model new aspect

  • Follow.pdl
namespace com.linkedin.common

/**
 * Follow information of an entity.
 */
record Follow {

  /**
   * List of followers of an entity.
   */
  followers: array[FollowAction]
}
  • FollowAction.pdl
namespace com.linkedin.common

/**
 * Follow Action of an entity.
 */
record FollowAction {

  /**
   * Follower (User or a Group) of an entity
   */
  follower: FollowerType

  /**
   * Audit stamp containing who last modified the record and when.
   */
  lastModified: optional AuditStamp
}
  • FollowerType.pdl
namespace com.linkedin.common

/**
 * A union of all supported follower types
 */
typeref FollowerType = union[
  corpUser: CorpuserUrn,
  corpGroup: CorpGroupUrn
]

1.2 Update aspect union for dataset

namespace com.linkedin.metadata.aspect

import com.linkedin.common.Follow
import com.linkedin.common.InstitutionalMemory
import com.linkedin.common.Ownership
import com.linkedin.common.Status
import com.linkedin.dataset.DatasetDeprecation
import com.linkedin.dataset.DatasetProperties
import com.linkedin.dataset.UpstreamLineage
import com.linkedin.schema.SchemaMetadata

/**
 * A union of all supported metadata aspects for a Dataset
 */
typeref DatasetAspect = union[
  DatasetProperties,
  DatasetDeprecation,
  Follow,
  UpstreamLineage,
  InstitutionalMemory,
  Ownership,
  Status,
  SchemaMetadata
]

2. Create FollowedBy relationship

namespace com.linkedin.metadata.relationship

/**
 * A generic model for the Followed-By relationship
 */
@pairings = [ {
  "destination" : "com.linkedin.common.urn.CorpuserUrn",
  "source" : "com.linkedin.common.urn.DatasetUrn"
}, {
  "destination" : "com.linkedin.common.urn.CorpGroupUrn",
  "source" : "com.linkedin.common.urn.DatasetUrn"
} ]
record FollowedBy includes BaseRelationship {
}

3. Build the repo to generate Java classes for newly added models

./gradlew build -Prest.model.compatibility=ignore

You can verify that API definitions for /dataset endpoint of GMS as well as MXE schemas are automatically updated to include new model changes.

4. Create FollowedBy relationship builder from Follow aspect

package com.linkedin.metadata.builders.graph.relationship;

import com.linkedin.common.Follow;
import com.linkedin.common.urn.Urn;
import com.linkedin.metadata.builders.graph.GraphBuilder;
import com.linkedin.metadata.relationship.FollowedBy;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;

import static com.linkedin.metadata.dao.internal.BaseGraphWriterDAO.RemovalOption.*;


public class FollowedByBuilderFromFollow extends BaseRelationshipBuilder<Follow> {

  public FollowedByBuilderFromFollow() {
    super(Follow.class);
  }

  @Nonnull
  @Override
  public <URN extends Urn> List<GraphBuilder.RelationshipUpdates> buildRelationships(@Nonnull URN urn,
      @Nonnull Follow follow) {
    final List<FollowedBy> followedByList = follow.getFollowers().stream().map(followAction -> {
      if (followAction.getFollower().isCorpUser()) {
        return new FollowedBy().setSource(urn).setDestination(followAction.getFollower().getCorpUser());
      }
      if (followAction.getFollower().isCorpGroup()) {
        return new FollowedBy().setSource(urn).setDestination(followAction.getFollower().getCorpGroup());
      }
      return null;
    }).filter(Objects::nonNull).collect(Collectors.toList());

    return Collections.singletonList(new GraphBuilder.RelationshipUpdates(followedByList, REMOVE_ALL_EDGES_FROM_SOURCE));
  }
}

5. Update set of relationship builders for dataset by adding FollowedByBuilderFromFollow

package com.linkedin.metadata.builders.graph;

import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.builders.graph.relationship.BaseRelationshipBuilder;
import com.linkedin.metadata.builders.graph.relationship.DownstreamOfBuilderFromUpstreamLineage;
import com.linkedin.metadata.builders.graph.relationship.FollowedByBuilderFromFollow;
import com.linkedin.metadata.builders.graph.relationship.OwnedByBuilderFromOwnership;
import com.linkedin.metadata.entity.DatasetEntity;
import com.linkedin.metadata.snapshot.DatasetSnapshot;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nonnull;

public class DatasetGraphBuilder extends BaseGraphBuilder<DatasetSnapshot> {
  private static final Set<BaseRelationshipBuilder> RELATIONSHIP_BUILDERS =
      Collections.unmodifiableSet(new HashSet<BaseRelationshipBuilder>() {
        {
          add(new DownstreamOfBuilderFromUpstreamLineage());
          add(new FollowedByBuilderFromFollow());
          add(new OwnedByBuilderFromOwnership());
        }
      });

  public DatasetGraphBuilder() {
    super(DatasetSnapshot.class, RELATIONSHIP_BUILDERS);
  }

  @Nonnull
  @Override
  protected List<? extends RecordTemplate> buildEntities(@Nonnull DatasetSnapshot snapshot) {
    final DatasetUrn urn = snapshot.getUrn();
    final DatasetEntity entity = new DatasetEntity().setUrn(urn)
        .setName(urn.getDatasetNameEntity())
        .setPlatform(urn.getPlatformEntity())
        .setOrigin(urn.getOriginEntity());

    setRemovedProperty(snapshot, entity);

    return Collections.singletonList(entity);
  }
}

6. Rebuild & restart all containers with new changes

This is all the code change we need to do to enable linking datasets and corp users (groups as well). Now we can re-build & start all Docker images.

./docker/rebuild-all/rebuild-all.sh

7. That's it. Let's test our new feature!

Let's ingest a user first

curl 'http://localhost:8080/corpUsers?action=ingest' -X POST -H 'X-RestLi-Protocol-Version:2.0.0' --data '
{
  "snapshot": {
    "aspects": [{
      "com.linkedin.identity.CorpUserInfo": {
        "active": true,
        "displayName": "Foo Bar",
        "fullName": "Foo Bar",
        "email": "fbar@linkedin.com"
      }
    }],
    "urn": "urn:li:corpuser:fbar"
  }
}'

And now let's ingest a dataset with two aspects: Ownership & Follow

curl 'http://localhost:8080/datasets?action=ingest' -X POST -H 'X-RestLi-Protocol-Version:2.0.0' --data '
{
  "snapshot": {
    "aspects": [{
      "com.linkedin.common.Ownership": {
        "owners": [{
            "owner": "urn:li:corpuser:fbar",
            "type": "DATAOWNER"
        }],
        "lastModified": {
            "time": 0,
            "actor": "urn:li:corpuser:fbar"
        }
      }
    },
    {
      "com.linkedin.common.Follow": {
        "followers": [{
          "follower": {
            "corpUser": "urn:li:corpuser:fbar"
          }
        }]
      }
    }],
    "urn": "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)"
  }
}'

Ownership aspect will help create an OwnedBy edge between user & dataset nodes. That existed already. Now that we added follow aspect, we'll also be able to see a FollowedBy edge between same user & dataset nodes.

You can confirm this by connecting to Neo4j browser on http://localhost:7474/browser