BigQuery
- Table usage
SELECT
date(creation_time) creation_date,
JOB_ID,
max(total_slot_ms) total_slot_ms,
max(total_bytes_billed) total_bytes_billed,
-- ARRAY_AGG(CONCAT(reference.project_id, '.', reference.dataset_id, '.', reference.table_id)
-- ORDER BY reference.project_id, reference.dataset_id, reference.table_id) AS table_combination,
ARRAY_TO_STRING(
ARRAY_AGG(
CONCAT(
reference.project_id, '.', reference.dataset_id,
'.', reference.table_id
)
ORDER BY
reference.project_id,
reference.dataset_id,
reference.table_id
),
','
) AS table_combination_str
FROM
`region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT,
UNNEST(referenced_tables) AS reference
WHERE
date(creation_time) between '2024-12-01'
and '2024-12-31'
and JOBS_BY_PROJECT.project_id = 'your project id'
and (
total_bytes_billed > 0
or total_slot_ms > 0
)
and array_length(referenced_tables) > 0
-- and query is not null
-- and parent_job_id is null
GROUP BY
all
WITH ExtractedTables AS (
select
creation_date,
JOB_ID,
total_slot_ms,
total_bytes_billed,
case when array_length(extracted_tables) > 0 then extracted_tables when array_length(extracted_tables_2) > 0 then extracted_tables_2 else [ 'no table extracted' ] end as extracted_tables
from
(
SELECT
date(creation_time) creation_date,
JOB_ID,
total_slot_ms,
total_bytes_billed,
REGEXP_EXTRACT_ALL(
replace(query, '`', ''),
r '((?:[a-zA-Z_-][a-zA-Z0-9_-]+[\.:]){2,3}[a-zA-Z0-9_-]+)'
) extracted_tables,
REGEXP_EXTRACT_ALL(
replace(query, '`', ''),
r '[fromFROM]{4}\s([a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+)'
) extracted_tables_2
FROM
`region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
WHERE
date(creation_time) between '2024-12-01'
and '2024-12-31'
and JOBS_BY_PROJECT.project_id = 'your project id'
and (
total_bytes_billed > 0
or total_slot_ms > 0
) -- and array_length(referenced_tables) > 0
and query is not null
and parent_job_id is null
)
)
SELECT
creation_date,
JOB_ID,
max(total_slot_ms) total_slot_ms,
max(total_bytes_billed) total_bytes_billed,
-- ARRAY_AGG(DISTINCT table_name ORDER BY table_name) AS table_combination,
ARRAY_TO_STRING(
ARRAY_AGG(
DISTINCT table_name
ORDER BY
table_name
),
','
) AS table_combination_str
FROM
ExtractedTables,
UNNEST(extracted_tables) AS table_name
GROUP BY
all
- pivot table
- select date, m_name, m_valuefrom test.kpi_dailyunpivot(m_value for m_name in (b2c_signup_cnt, b2b_signup_cnt))
- Format an array (bigQeury)
- format("%T", array_agg(date order by date))
Get the last value (bigQuery)
select array_agg(value order by time desc)[OFFSET(0)] value_last from ( select 1 time, 33 value union all select 2 time, 22 value union all select 3 time, 11 value )
Unnest multiple arrays (bigQuery)
WITH data AS( SELECT 1 n, ['a', 'b'] AS r, [1,2] b, ['a1','b2'] c UNION ALL SELECT 2, ['c', 'd', 'e'], [3,4,5], ['c3','d4','e5'] UNION ALL select 3, ['f'], [6], ['f6'] ) SELECT n, r, b, c FROM data, UNNEST(r) r WITH OFFSET pos1, unnest(b) b WITH OFFSET pos2, unnest(c) c WITH OFFSET pos3 where pos1=pos2 and pos2=pos3
Create dataset, execute a query and write to a table, export table to CSV in GCS (bigQuery)
Using the BigQuery Storage API to download large results (bigQuery)
- https://cloud.google.com/bigquery/docs/pandas-gbq-migration#using_the_to_download_large_results
import pandas sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`" # Use the BigQuery Storage API to download results more quickly. df = pandas.read_gbq(sql, dialect='standard', use_bqstorage_api=True)
- https://cloud.google.com/bigquery/docs/pandas-gbq-migration#using_the_to_download_large_results
Readability (bigQuery)
- format(“%d”, 1000) = 1,000
Login streak (bigQuery)
with
tmp
as
(
select
'a'
usn,
date
(
'2019-01-01'
) login_date
union
all
select
'a'
,
date
(
'2019-01-02'
)
union
all
select
'a'
,
date
(
'2019-01-04'
)
union
all
select
'a'
,
date
(
'2019-01-05'
)
union
all
select
'a'
,
date
(
'2019-01-06'
)
union
all
select
'b'
,
date
(
'2019-01-02'
)
union
all
select
'b'
,
date
(
'2019-01-03'
))
, tmp_user_min_login_date
as
(
select
usn,
min
(login_date) start_login_date
from
tmp
group
by
usn)
select
usn,
min
(login_date) std_dt,
max
(login_date) end_dt,
count
(*) cnt
from
(
select
ta.usn, login_date, row_number() over (
order
by
ta.usn, login_date) + date_diff(start_login_date, login_date,
day
) num
from
tmp ta
inner
join
tmp_user_min_login_date tb
on
ta.usn=tb.usn
)
group
by
usn, num
having
cnt > 1
order
by
usn, num
desc
Column number of a table (bigQuery)
select array_length(regexp_extract_all(to_json_string(`netmarble-gameservice-ai.rmt_stonemmotw_ml.feature_20190517`),"\":"))total_columns
from `netmarble-gameservice-ai.rmt_stonemmotw_ml.feature_20190517` limit 1
No comments:
Post a Comment
Note: Only a member of this blog may post a comment.