mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-29 01:42:08 +00:00
feat(ingest): update bigquery demo data (#2607)
This commit is contained in:
parent
f2580f7bd7
commit
dbe42e07f6
@ -11,7 +11,7 @@ source:
|
||||
- "bigquery-public-data\\.covid19.*"
|
||||
deny:
|
||||
# Except for tables that end with an underscore.
|
||||
- ".*_$"
|
||||
- ".*_\$" # also escapes the variable expansion on $
|
||||
|
||||
sink:
|
||||
type: "file"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -92,4 +92,13 @@ bigquery-public-data.covid19_usafacts.summary,,USAFacts,"bigquery-public-data.co
|
||||
bigquery-public-data.covid19_weathersource_com.county_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.county_day_history
|
||||
bigquery-public-data.covid19_weathersource_com.county_day_history,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
|
||||
bigquery-public-data.covid19_weathersource_com.postal_code_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
|
||||
bigquery-public-data.covid19_weathersource_com.postal_code_day_history,,Weather Source,
|
||||
bigquery-public-data.covid19_weathersource_com.postal_code_day_history,,Weather Source,
|
||||
bigquery-public-data.covid19_tracking.city_level_cases_and_deaths,x,,
|
||||
bigquery-public-data.covid19_tracking.covid_racial_data_tracker,x,,
|
||||
bigquery-public-data.covid19_tracking.national_testing_and_outcomes,x,,
|
||||
bigquery-public-data.covid19_tracking.state_facility_level_long_term_care,x,,
|
||||
bigquery-public-data.covid19_tracking.state_level_aggregate_long_term_care,x,,
|
||||
bigquery-public-data.covid19_tracking.state_level_cumulative_long_term_care,x,,
|
||||
bigquery-public-data.covid19_tracking.state_level_current_outbreak_long_term_care,x,,
|
||||
bigquery-public-data.covid19_tracking.state_screenshots,x,,
|
||||
bigquery-public-data.covid19_tracking.state_testing_and_outcomes,x,,
|
||||
|
@ -193,7 +193,13 @@ if __name__ == "__main__":
|
||||
if not directive.drop
|
||||
}
|
||||
|
||||
assert all(dataset.proposedSnapshot.urn in all_dataset_urns for dataset in datasets)
|
||||
missing_dataset_directives = [
|
||||
dataset.proposedSnapshot.urn
|
||||
for dataset in datasets
|
||||
if dataset.proposedSnapshot.urn not in all_dataset_urns
|
||||
]
|
||||
assert not missing_dataset_directives
|
||||
|
||||
filtered_dataset_mces = [
|
||||
dataset for dataset in datasets if dataset.proposedSnapshot.urn in allowed_urns
|
||||
]
|
||||
|
||||
@ -1,15 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -euxo pipefail
|
||||
|
||||
# This script will use the YML files in examples/demo_data to generate
|
||||
# all_covid19_datasets.json, directives.csv, and finally demo_data.json.
|
||||
|
||||
set -euxo pipefail
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
FILE="$DIR/bigquery_covid19_to_file.yml"
|
||||
if [ ! -f "$FILE" ]; then
|
||||
# Fetch public COVID-19 datasets from BigQuery.
|
||||
datahub ingest -c $FILE
|
||||
fi
|
||||
|
||||
# Fetch public COVID-19 datasets from BigQuery.
|
||||
datahub ingest -c "$DIR/bigquery_covid19_to_file.yml"
|
||||
|
||||
# Pull the directives CSV from Google sheets.
|
||||
# See https://docs.google.com/spreadsheets/d/17c5SBiXEw5PuV7oEkC2uQnX55C6TPZTnr6XRQ6X-Qy0/edit#gid=0.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user