mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-31 05:18:18 +00:00

* Add feast testing setup * Init Feast test script * Add feast to dependencies * Update feast descriptors * Sort integrations * Working feast pytest * Clean up feast docker-compose file * Expand Feast tests * Setup feast classes * Add continuous and bytes data to feature types * Update field type mapping * Add PDLs * Add MLFeatureSetUrn.java * Comment out feast setup * Add snapshot file and update inits * Init Feast golden files generation * Clean up Feast ingest * Feast testing comments * Yield Feature snapshots * Fix Feature URN naming * Update feast MCE * Update Feature URN prefix * Add MLEntity * Update golden files with entities * Specify feast sources * Add feast source configs * Working feast docker ingestion * List entities and features before adding tables * Add featureset names * Remove unused * Rename feast image * Update README * Add env to feast URNs * Fix URN naming * Remove redundant URN names * Fix enum backcompatibility * Move feast testing to docker * Move URN generators to mce_builder * Add source for features * Switch TypeClass -> enum_type * Rename source -> sourceDataset * Add local Feast ingest image builds * Rename Entity -> MLPrimaryKey * Restore features and keys for each featureset * Do not json encode source configs * Remove old source properties from feature sets * Regenerate golden file * Fix race condition with Feast tests * Exclude unknown source * Update feature datatype enum * Update README and fix typos * Fix Entity typo * Fix path to local docker image * Specify feast config and version * Fix feast env variables * PR fixes * Refactor feast ingest constants * Make feature sources optional for back-compatibility * Remove unused GCP files * adding docker publish workflow * Simplify name+namespace in PrimaryKeys * adding docker publish workflow * debug * final attempt * final final attempt * final final final commit * Switch to published ingestion image * Update name and namespace in java files * Rename FeatureSet -> FeatureTable * Regenerate codegen * Fix initial generation errors * Update snapshot jsons * Regenerated schemas * Fix URN formats * Revise builds * Clean up feast URN builders * Fix naming typos * Fix Feature Set -> Feature Table * Fix comments * PR fixes * All you need is Urn * Regenerate snapshots and update validation * Add UNKNOWN data type * URNs for source types * Add note on docker requirement * Fix typo * Reorder aspect unions * Refactor feast ingest functions * Update snapshot jsons * Rebuild Co-authored-by: Shirshanka Das <shirshanka@apache.org>
107 lines
3.6 KiB
Python
107 lines
3.6 KiB
Python
import socket
|
|
|
|
from feast import Client
|
|
from feast.data_format import ParquetFormat
|
|
from feast.data_source import FileSource
|
|
from feast.entity import Entity
|
|
from feast.feature import Feature
|
|
from feast.feature_table import FeatureTable
|
|
from feast.value_type import ValueType
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_client = Client(core_url="testfeast:6565")
|
|
|
|
# create dummy entity since Feast demands it
|
|
entity_1 = Entity(
|
|
name="dummy_entity_1",
|
|
description="Dummy entity 1",
|
|
value_type=ValueType.STRING,
|
|
labels={"key": "val"},
|
|
)
|
|
|
|
# create dummy entity since Feast demands it
|
|
entity_2 = Entity(
|
|
name="dummy_entity_2",
|
|
description="Dummy entity 2",
|
|
value_type=ValueType.INT32,
|
|
labels={"key": "val"},
|
|
)
|
|
|
|
# commit entities
|
|
test_client.apply([entity_1, entity_2])
|
|
|
|
# dummy file source
|
|
batch_source = FileSource(
|
|
file_format=ParquetFormat(),
|
|
file_url="file://feast/*",
|
|
event_timestamp_column="ts_col",
|
|
created_timestamp_column="timestamp",
|
|
date_partition_column="date_partition_col",
|
|
)
|
|
|
|
# first feature table for testing, with all of Feast's datatypes
|
|
table_1 = FeatureTable(
|
|
name="test_feature_table_all_feature_dtypes",
|
|
features=[
|
|
Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
|
|
Feature(name="test_STRING_feature", dtype=ValueType.STRING),
|
|
Feature(name="test_INT32_feature", dtype=ValueType.INT32),
|
|
Feature(name="test_INT64_feature", dtype=ValueType.INT64),
|
|
Feature(name="test_DOUBLE_feature", dtype=ValueType.DOUBLE),
|
|
Feature(name="test_FLOAT_feature", dtype=ValueType.FLOAT),
|
|
Feature(name="test_BOOL_feature", dtype=ValueType.BOOL),
|
|
Feature(name="test_BYTES_LIST_feature", dtype=ValueType.BYTES_LIST),
|
|
Feature(name="test_STRING_LIST_feature", dtype=ValueType.STRING_LIST),
|
|
Feature(name="test_INT32_LIST_feature", dtype=ValueType.INT32_LIST),
|
|
Feature(name="test_INT64_LIST_feature", dtype=ValueType.INT64_LIST),
|
|
Feature(name="test_DOUBLE_LIST_feature", dtype=ValueType.DOUBLE_LIST),
|
|
Feature(name="test_FLOAT_LIST_feature", dtype=ValueType.FLOAT_LIST),
|
|
Feature(name="test_BOOL_LIST_feature", dtype=ValueType.BOOL_LIST),
|
|
],
|
|
entities=["dummy_entity_1", "dummy_entity_2"],
|
|
labels={"team": "matchmaking"},
|
|
batch_source=batch_source,
|
|
)
|
|
|
|
# second feature table for testing, with just a single feature
|
|
table_2 = FeatureTable(
|
|
name="test_feature_table_single_feature",
|
|
features=[
|
|
Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
|
|
],
|
|
entities=["dummy_entity_1"],
|
|
labels={"team": "matchmaking"},
|
|
batch_source=batch_source,
|
|
)
|
|
|
|
# third feature table for testing, no labels
|
|
table_3 = FeatureTable(
|
|
name="test_feature_table_no_labels",
|
|
features=[
|
|
Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
|
|
],
|
|
entities=["dummy_entity_2"],
|
|
labels={},
|
|
batch_source=batch_source,
|
|
)
|
|
|
|
# commit the tables to the feature store
|
|
test_client.apply([table_1, table_2, table_3])
|
|
|
|
print("make_tests.py setup finished")
|
|
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
|
|
# listen to port 6789 once done so test script knows when to start ingestion
|
|
server_address = ("localhost", 6789)
|
|
sock.bind(server_address)
|
|
|
|
sock.listen(1)
|
|
|
|
print("make_tests.py listening on 6789")
|
|
|
|
while True:
|
|
# Wait for a connection
|
|
connection, client_address = sock.accept()
|