데이터베이스(Database)/BigQuery

Bigquery 테이블 생성과 입력 테스트

leebaro 2024. 6. 10.
728x90

 

아래와 같은 예시 JSON 파일을 빅쿼리에 넣기 위한 DDL과 DML 입니다.

 

{
    "type": "sales_report",
    "source": "ecommerce_platform",
    "crawl_id": "ecommerce_daily_2024-05-29T08:10:00+00:00",
    "index_keys": {
        "sale_type": "daily",
        "product_category": "electronics",
        "region": "US",
        "sale_period_type": "daily",
        "start_datetime": "2024-05-29T00:00:00",
        "end_datetime": "2024-05-29T23:59:59"
    },
    "data": [
        {
            "rank": 1,
            "product_id": "12345",
            "product_name": "Smartphone X12",
            "brand": "BrandA",
            "quantity_sold": 875,
            "revenue": 875540
        },
        {
            "rank": 2,
            "product_id": "12346",
            "product_name": "Laptop Pro 15",
            "brand": "BrandB",
            "quantity_sold": 635,
            "revenue": 635170
        },
        {
            "rank": 3,
            "product_id": "12347",
            "product_name": "Smartwatch 4",
            "brand": "BrandC",
            "quantity_sold": 577,
            "revenue": 577540
        },
        {
            "rank": 4,
            "product_id": "12348",
            "product_name": "E-Reader ReadLight",
            "brand": "BrandD",
            "quantity_sold": 1021,
            "revenue": 1021890
        },
        {
            "rank": 5,
            "product_id": "12349",
            "product_name": "Tablet S10",
            "brand": "BrandE",
            "quantity_sold": 593,
            "revenue": 593450
        }
    ]
}

 

 

 

 

CREATE TABLE `ecommerce.ecommerce_staging_data.integrated_product_sales` (
  `platform` STRING,
  `sale_type` STRING,
  `product_category` STRING,
  `region` STRING,
  `sale_period_type` STRING,
  `sale_start_datetime` TIMESTAMP,
  `sale_end_datetime` TIMESTAMP,
  `quantity_sold` INT64,
  `product_name` STRING,
  `brand` STRING,
  `product_id` STRING,
  `sale_details` ARRAY<STRUCT<key STRING, value STRING>>,
   ingestion_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP()  NOT NULL OPTIONS(description='데이터 수집 타임스탬프')
)

 

 

insert into `ecommerce.ecommerce_staging_data.integrated_product_sales`
(
        platform,
        sale_type,
        product_category,
        region,
        sale_period_type,
        sale_start_datetime,
        sale_end_datetime,
        quantity_sold,
        product_name,
        brand,
        product_id,
        sale_details
    )
WITH raw_data AS (
  select 
      JSON_EXTRACT_SCALAR(full_data, '$.platform') AS platform,
      JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_type') AS sale_type,
      JSON_EXTRACT_SCALAR(full_data, '$.index_keys.product_category') AS product_category,
      JSON_EXTRACT_SCALAR(full_data, '$.index_keys.region') AS region,
      JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_period_type') AS sale_period_type,
      JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_start_datetime') AS sale_start_datetime,
      JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_end_datetime') AS sale_end_datetime,
      JSON_EXTRACT_ARRAY(full_data, '$.data') AS json_array
  from
    `ecommercedev_ecommerce_raw_data.raw_product_sales`
  where
    platform = 'online_store'  
    AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_type') = 'flash_sale'
    AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.product_category') = 'electronics'
    AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.region') = 'US'
    AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_period_type') = 'daily'
    AND JSON_EXTRACT_SCALAR(full_data, '$.index_keys.sale_start_datetime') = '2024-05-29T00:00:00'
)
select
  platform,
  sale_type,
  product_category,
  region,
  sale_period_type,
  datetime(sale_start_datetime) sale_start_datetime,
  datetime(sale_end_datetime) sale_end_datetime,
  CAST(JSON_EXTRACT_SCALAR(sale, '$.quantity_sold') AS INT64) AS quantity_sold,
  JSON_EXTRACT_SCALAR(sale, '$.product_name') AS product_name,
  JSON_EXTRACT_SCALAR(sale, '$.brand') AS brand,
  JSON_EXTRACT_SCALAR(sale, '$.product_id') AS product_id,
  CAST(NULL AS ARRAY<STRUCT<key STRING, value STRING>>) AS sale_details
from
raw_data,
unnest(json_array) as sale

 

 

728x90