728x90
아래와 같은 예시 JSON 파일을 빅쿼리에 넣기 위한 DDL과 DML 입니다.
{
"type": "sales_report",
"source": "ecommerce_platform",
"crawl_id": "ecommerce_daily_2024-05-29T08:10:00+00:00",
"index_keys": {
"sale_type": "daily",
"product_category": "electronics",
"region": "US",
"sale_period_type": "daily",
"start_datetime": "2024-05-29T00:00:00",
"end_datetime": "2024-05-29T23:59:59"
},
"data": [
{
"rank": 1,
"product_id": "12345",
"product_name": "Smartphone X12",
"brand": "BrandA",
"quantity_sold": 875,
"revenue": 875540
},
{
"rank": 2,
"product_id": "12346",
"product_name": "Laptop Pro 15",
"brand": "BrandB",
"quantity_sold": 635,
"revenue": 635170
},
{
"rank": 3,
"product_id": "12347",
"product_name": "Smartwatch 4",
"brand": "BrandC",
"quantity_sold": 577,
"revenue": 577540
},
{
"rank": 4,
"product_id": "12348",
"product_name": "E-Reader ReadLight",
"brand": "BrandD",
"quantity_sold": 1021,
"revenue": 1021890
},
{
"rank": 5,
"product_id": "12349",
"product_name": "Tablet S10",
"brand": "BrandE",
"quantity_sold": 593,
"revenue": 593450
}
]
}
CREATE TABLE `ecommerce.ecommerce_staging_data.integrated_product_sales` (
`platform` STRING,
`sale_type` STRING,
`product_category` STRING,
`region` STRING,
`sale_period_type` STRING,
`sale_start_datetime` TIMESTAMP,
`sale_end_datetime` TIMESTAMP,
`quantity_sold` INT64,
`product_name` STRING,
`brand` STRING,
`product_id` STRING,
`sale_details` ARRAY<STRUCT<key STRING, value STRING>>,
ingestion_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP() NOT NULL OPTIONS(description='데이터 수집 타임스탬프')
)
insert into `ecommerce.ecommerce_staging_data.integrated_product_sales`
(
platform,
sale_type,
product_category,
region,
sale_period_type,
sale_start_datetime,
sale_end_datetime,
quantity_sold,
product_name,
brand,
product_id,
sale_details
)
WITH raw_data AS (
select
JSON_EXTRACT_SCALAR(full_data, '$.platform') AS platform,
JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_type') AS sale_type,
JSON_EXTRACT_SCALAR(full_data, '$.index_keys.product_category') AS product_category,
JSON_EXTRACT_SCALAR(full_data, '$.index_keys.region') AS region,
JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_period_type') AS sale_period_type,
JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_start_datetime') AS sale_start_datetime,
JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_end_datetime') AS sale_end_datetime,
JSON_EXTRACT_ARRAY(full_data, '$.data') AS json_array
from
`ecommercedev_ecommerce_raw_data.raw_product_sales`
where
platform = 'online_store'
AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_type') = 'flash_sale'
AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.product_category') = 'electronics'
AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.region') = 'US'
AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_period_type') = 'daily'
AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_start_datetime') = '2024-05-29T00:00:00'
)
select
platform,
sale_type,
product_category,
region,
sale_period_type,
datetime(sale_start_datetime) sale_start_datetime,
datetime(sale_end_datetime) sale_end_datetime,
CAST(JSON_EXTRACT_SCALAR(sale, '$.quantity_sold') AS INT64) AS quantity_sold,
JSON_EXTRACT_SCALAR(sale, '$.product_name') AS product_name,
JSON_EXTRACT_SCALAR(sale, '$.brand') AS brand,
JSON_EXTRACT_SCALAR(sale, '$.product_id') AS product_id,
CAST(NULL AS ARRAY<STRUCT<key STRING, value STRING>>) AS sale_details
from
raw_data,
unnest(json_array) as sale
728x90
'데이터베이스(Database) > BigQuery' 카테고리의 다른 글
시간대별로 데이터를 고르게 샘플링 하는 방법 (2) | 2024.09.09 |
---|