feat(ingest): update bigquery demo data (#2607)

This commit is contained in:
Harshal Sheth 2021-05-25 18:52:10 -07:00 committed by GitHub
parent f2580f7bd7
commit dbe42e07f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 11541 additions and 5470 deletions

View File

@ -11,7 +11,7 @@ source:
- "bigquery-public-data\\.covid19.*"
deny:
# Except for tables that end with an underscore.
- ".*_$"
- ".*_\$" # also escapes the variable expansion on $
sink:
type: "file"

File diff suppressed because it is too large Load Diff

View File

@ -92,4 +92,13 @@ bigquery-public-data.covid19_usafacts.summary,,USAFacts,"bigquery-public-data.co
bigquery-public-data.covid19_weathersource_com.county_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.county_day_history
bigquery-public-data.covid19_weathersource_com.county_day_history,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
bigquery-public-data.covid19_weathersource_com.postal_code_day_forecast,,Weather Source,bigquery-public-data.covid19_weathersource_com.postal_code_day_history
bigquery-public-data.covid19_weathersource_com.postal_code_day_history,,Weather Source,
bigquery-public-data.covid19_weathersource_com.postal_code_day_history,,Weather Source,
bigquery-public-data.covid19_tracking.city_level_cases_and_deaths,x,,
bigquery-public-data.covid19_tracking.covid_racial_data_tracker,x,,
bigquery-public-data.covid19_tracking.national_testing_and_outcomes,x,,
bigquery-public-data.covid19_tracking.state_facility_level_long_term_care,x,,
bigquery-public-data.covid19_tracking.state_level_aggregate_long_term_care,x,,
bigquery-public-data.covid19_tracking.state_level_cumulative_long_term_care,x,,
bigquery-public-data.covid19_tracking.state_level_current_outbreak_long_term_care,x,,
bigquery-public-data.covid19_tracking.state_screenshots,x,,
bigquery-public-data.covid19_tracking.state_testing_and_outcomes,x,,
1 table drop owners depends_on
92 bigquery-public-data.covid19_weathersource_com.county_day_forecast Weather Source bigquery-public-data.covid19_weathersource_com.county_day_history
93 bigquery-public-data.covid19_weathersource_com.county_day_history Weather Source bigquery-public-data.covid19_weathersource_com.postal_code_day_history
94 bigquery-public-data.covid19_weathersource_com.postal_code_day_forecast Weather Source bigquery-public-data.covid19_weathersource_com.postal_code_day_history
95 bigquery-public-data.covid19_weathersource_com.postal_code_day_history Weather Source
96 bigquery-public-data.covid19_tracking.city_level_cases_and_deaths x
97 bigquery-public-data.covid19_tracking.covid_racial_data_tracker x
98 bigquery-public-data.covid19_tracking.national_testing_and_outcomes x
99 bigquery-public-data.covid19_tracking.state_facility_level_long_term_care x
100 bigquery-public-data.covid19_tracking.state_level_aggregate_long_term_care x
101 bigquery-public-data.covid19_tracking.state_level_cumulative_long_term_care x
102 bigquery-public-data.covid19_tracking.state_level_current_outbreak_long_term_care x
103 bigquery-public-data.covid19_tracking.state_screenshots x
104 bigquery-public-data.covid19_tracking.state_testing_and_outcomes x

View File

@ -193,7 +193,13 @@ if __name__ == "__main__":
if not directive.drop
}
assert all(dataset.proposedSnapshot.urn in all_dataset_urns for dataset in datasets)
missing_dataset_directives = [
dataset.proposedSnapshot.urn
for dataset in datasets
if dataset.proposedSnapshot.urn not in all_dataset_urns
]
assert not missing_dataset_directives
filtered_dataset_mces = [
dataset for dataset in datasets if dataset.proposedSnapshot.urn in allowed_urns
]

View File

@ -1,15 +1,13 @@
#!/bin/bash
set -euxo pipefail
# This script will use the YML files in examples/demo_data to generate
# all_covid19_datasets.json, directives.csv, and finally demo_data.json.
set -euxo pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
FILE="$DIR/bigquery_covid19_to_file.yml"
if [ ! -f "$FILE" ]; then
# Fetch public COVID-19 datasets from BigQuery.
datahub ingest -c $FILE
fi
# Fetch public COVID-19 datasets from BigQuery.
datahub ingest -c "$DIR/bigquery_covid19_to_file.yml"
# Pull the directives CSV from Google sheets.
# See https://docs.google.com/spreadsheets/d/17c5SBiXEw5PuV7oEkC2uQnX55C6TPZTnr6XRQ6X-Qy0/edit#gid=0.