107 lines
3.6 KiB
Python
Raw Normal View History

feat(ingest): Feast ingestion integration (#2605) * Add feast testing setup * Init Feast test script * Add feast to dependencies * Update feast descriptors * Sort integrations * Working feast pytest * Clean up feast docker-compose file * Expand Feast tests * Setup feast classes * Add continuous and bytes data to feature types * Update field type mapping * Add PDLs * Add MLFeatureSetUrn.java * Comment out feast setup * Add snapshot file and update inits * Init Feast golden files generation * Clean up Feast ingest * Feast testing comments * Yield Feature snapshots * Fix Feature URN naming * Update feast MCE * Update Feature URN prefix * Add MLEntity * Update golden files with entities * Specify feast sources * Add feast source configs * Working feast docker ingestion * List entities and features before adding tables * Add featureset names * Remove unused * Rename feast image * Update README * Add env to feast URNs * Fix URN naming * Remove redundant URN names * Fix enum backcompatibility * Move feast testing to docker * Move URN generators to mce_builder * Add source for features * Switch TypeClass -> enum_type * Rename source -> sourceDataset * Add local Feast ingest image builds * Rename Entity -> MLPrimaryKey * Restore features and keys for each featureset * Do not json encode source configs * Remove old source properties from feature sets * Regenerate golden file * Fix race condition with Feast tests * Exclude unknown source * Update feature datatype enum * Update README and fix typos * Fix Entity typo * Fix path to local docker image * Specify feast config and version * Fix feast env variables * PR fixes * Refactor feast ingest constants * Make feature sources optional for back-compatibility * Remove unused GCP files * adding docker publish workflow * Simplify name+namespace in PrimaryKeys * adding docker publish workflow * debug * final attempt * final final attempt * final final final commit * Switch to published ingestion image * Update name and namespace in java files * Rename FeatureSet -> FeatureTable * Regenerate codegen * Fix initial generation errors * Update snapshot jsons * Regenerated schemas * Fix URN formats * Revise builds * Clean up feast URN builders * Fix naming typos * Fix Feature Set -> Feature Table * Fix comments * PR fixes * All you need is Urn * Regenerate snapshots and update validation * Add UNKNOWN data type * URNs for source types * Add note on docker requirement * Fix typo * Reorder aspect unions * Refactor feast ingest functions * Update snapshot jsons * Rebuild Co-authored-by: Shirshanka Das <shirshanka@apache.org>
2021-06-09 15:07:04 -07:00
import socket
from feast import Client
from feast.data_format import ParquetFormat
from feast.data_source import FileSource
from feast.entity import Entity
from feast.feature import Feature
from feast.feature_table import FeatureTable
from feast.value_type import ValueType
if __name__ == "__main__":
test_client = Client(core_url="testfeast:6565")
# create dummy entity since Feast demands it
entity_1 = Entity(
name="dummy_entity_1",
description="Dummy entity 1",
value_type=ValueType.STRING,
labels={"key": "val"},
)
# create dummy entity since Feast demands it
entity_2 = Entity(
name="dummy_entity_2",
description="Dummy entity 2",
value_type=ValueType.INT32,
labels={"key": "val"},
)
# commit entities
test_client.apply([entity_1, entity_2])
# dummy file source
batch_source = FileSource(
file_format=ParquetFormat(),
file_url="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)
# first feature table for testing, with all of Feast's datatypes
table_1 = FeatureTable(
name="test_feature_table_all_feature_dtypes",
features=[
Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
Feature(name="test_STRING_feature", dtype=ValueType.STRING),
Feature(name="test_INT32_feature", dtype=ValueType.INT32),
Feature(name="test_INT64_feature", dtype=ValueType.INT64),
Feature(name="test_DOUBLE_feature", dtype=ValueType.DOUBLE),
Feature(name="test_FLOAT_feature", dtype=ValueType.FLOAT),
Feature(name="test_BOOL_feature", dtype=ValueType.BOOL),
Feature(name="test_BYTES_LIST_feature", dtype=ValueType.BYTES_LIST),
Feature(name="test_STRING_LIST_feature", dtype=ValueType.STRING_LIST),
Feature(name="test_INT32_LIST_feature", dtype=ValueType.INT32_LIST),
Feature(name="test_INT64_LIST_feature", dtype=ValueType.INT64_LIST),
Feature(name="test_DOUBLE_LIST_feature", dtype=ValueType.DOUBLE_LIST),
Feature(name="test_FLOAT_LIST_feature", dtype=ValueType.FLOAT_LIST),
Feature(name="test_BOOL_LIST_feature", dtype=ValueType.BOOL_LIST),
],
entities=["dummy_entity_1", "dummy_entity_2"],
labels={"team": "matchmaking"},
batch_source=batch_source,
)
# second feature table for testing, with just a single feature
table_2 = FeatureTable(
name="test_feature_table_single_feature",
features=[
Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
],
entities=["dummy_entity_1"],
labels={"team": "matchmaking"},
batch_source=batch_source,
)
# third feature table for testing, no labels
table_3 = FeatureTable(
name="test_feature_table_no_labels",
features=[
Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
],
entities=["dummy_entity_2"],
labels={},
batch_source=batch_source,
)
# commit the tables to the feature store
test_client.apply([table_1, table_2, table_3])
print("make_tests.py setup finished")
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# listen to port 6789 once done so test script knows when to start ingestion
server_address = ("localhost", 6789)
sock.bind(server_address)
sock.listen(1)
print("make_tests.py listening on 6789")
while True:
# Wait for a connection
connection, client_address = sock.accept()