mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-29 09:52:40 +00:00
feat(ingest): update bigquery demo data (#2607)
This commit is contained in:
parent
f2580f7bd7
commit
dbe42e07f6
@ -11,7 +11,7 @@ source:
|
|||||||
- "bigquery-public-data\\.covid19.*"
|
- "bigquery-public-data\\.covid19.*"
|
||||||
deny:
|
deny:
|
||||||
# Except for tables that end with an underscore.
|
# Except for tables that end with an underscore.
|
||||||
- ".*_$"
|
- ".*_\$" # also escapes the variable expansion on $
|
||||||
|
|
||||||
sink:
|
sink:
|
||||||
type: "file"
|
type: "file"
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -92,4 +92,13 @@ bigquery-public-data.covid19_usafacts.summary,,USAFacts,"bigquery-public-data.co
|
|||||||
bigquery-public-data.covid19_weathersource_com.county_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.county_day_history
|
bigquery-public-data.covid19_weathersource_com.county_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.county_day_history
|
||||||
bigquery-public-data.covid19_weathersource_com.county_day_history,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
|
bigquery-public-data.covid19_weathersource_com.county_day_history,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
|
||||||
bigquery-public-data.covid19_weathersource_com.postal_code_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
|
bigquery-public-data.covid19_weathersource_com.postal_code_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
|
||||||
bigquery-public-data.covid19_weathersource_com.postal_code_day_history,,Weather Source,
|
bigquery-public-data.covid19_weathersource_com.postal_code_day_history,,Weather Source,
|
||||||
|
bigquery-public-data.covid19_tracking.city_level_cases_and_deaths,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.covid_racial_data_tracker,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.national_testing_and_outcomes,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.state_facility_level_long_term_care,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.state_level_aggregate_long_term_care,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.state_level_cumulative_long_term_care,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.state_level_current_outbreak_long_term_care,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.state_screenshots,x,,
|
||||||
|
bigquery-public-data.covid19_tracking.state_testing_and_outcomes,x,,
|
||||||
|
@ -193,7 +193,13 @@ if __name__ == "__main__":
|
|||||||
if not directive.drop
|
if not directive.drop
|
||||||
}
|
}
|
||||||
|
|
||||||
assert all(dataset.proposedSnapshot.urn in all_dataset_urns for dataset in datasets)
|
missing_dataset_directives = [
|
||||||
|
dataset.proposedSnapshot.urn
|
||||||
|
for dataset in datasets
|
||||||
|
if dataset.proposedSnapshot.urn not in all_dataset_urns
|
||||||
|
]
|
||||||
|
assert not missing_dataset_directives
|
||||||
|
|
||||||
filtered_dataset_mces = [
|
filtered_dataset_mces = [
|
||||||
dataset for dataset in datasets if dataset.proposedSnapshot.urn in allowed_urns
|
dataset for dataset in datasets if dataset.proposedSnapshot.urn in allowed_urns
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,15 +1,13 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -euxo pipefail
|
||||||
|
|
||||||
# This script will use the YML files in examples/demo_data to generate
|
# This script will use the YML files in examples/demo_data to generate
|
||||||
# all_covid19_datasets.json, directives.csv, and finally demo_data.json.
|
# all_covid19_datasets.json, directives.csv, and finally demo_data.json.
|
||||||
|
|
||||||
set -euxo pipefail
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
FILE="$DIR/bigquery_covid19_to_file.yml"
|
|
||||||
if [ ! -f "$FILE" ]; then
|
# Fetch public COVID-19 datasets from BigQuery.
|
||||||
# Fetch public COVID-19 datasets from BigQuery.
|
datahub ingest -c "$DIR/bigquery_covid19_to_file.yml"
|
||||||
datahub ingest -c $FILE
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Pull the directives CSV from Google sheets.
|
# Pull the directives CSV from Google sheets.
|
||||||
# See https://docs.google.com/spreadsheets/d/17c5SBiXEw5PuV7oEkC2uQnX55C6TPZTnr6XRQ6X-Qy0/edit#gid=0.
|
# See https://docs.google.com/spreadsheets/d/17c5SBiXEw5PuV7oEkC2uQnX55C6TPZTnr6XRQ6X-Qy0/edit#gid=0.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user