How To Fill Irregularly Missing Time-series Values With Linear Interepolation By Each User In BigQuery?
I have data which has missing time series values irregulaly for each users, and I'd like to convert it with a certain interval with liner interpolation using BigQuery Standard SQL.
Solution 1:
Below is for BigQuery SQL
#standardSQL
select name, time,
ifnull(value, start_value
+ (end_value - start_value) / timestamp_diff(end_tick, start_tick, minute) * timestamp_diff(time, start_tick, minute)
) as value_interpolated
from (
select name, time, value,
first_value(tick ignore nulls ) over win1 as start_tick,
first_value(value ignore nulls) over win1 as start_value,
first_value(tick ignore nulls ) over win2 as end_tick,
first_value(value ignore nulls) over win2 as end_value,
from (
select name, time, t.time as tick, value
from (
select name, generate_timestamp_array(min(time), max(time), interval 1 minute) times
from `project.dataset.table`
group by name
)
cross join unnest(times) time
left join `project.dataset.table` t
using(name, time)
)
window
win1 as (partition by name order by time desc rows between current row and unbounded following),
win2 as (partition by name order by time rows between current row and unbounded following)
)
if to apply to sample data from your question - output is
Solution 2:
This is not very different from your previous question. Starting from the accepted answer, you could do:
select name, time,
ifnull(value, start_value + (end_value - start_value) / (end_tick - start_tick) * (time - start_tick)) as value_interpolated
from (
select name, time, value,
first_value(tick ignore nulls ) over win1 as start_tick,
first_value(value ignore nulls) over win1 as start_value,
first_value(tick ignore nulls ) over win2 as end_tick,
first_value(value ignore nulls) over win2 as end_value,
from (
select name, time, t.time as tick, value
from (
select name, generate_array(min(time), max(time)) times
from `project.dataset.table`
group by name
)
cross join unnest(times) time
left join `project.dataset.table` t using(name, time)
)
window
win1 as (partition by name order by time desc rows between current row and unbounded following),
win2 as (partition by name order by time rows between current row and unbounded following)
)
Post a Comment for "How To Fill Irregularly Missing Time-series Values With Linear Interepolation By Each User In BigQuery?"