Note
Access to this page requires authorization. You can try signing in or changing directories.
Access to this page requires authorization. You can try changing directories.
Switch services using the Version drop-down list. Learn more about navigation.
Applies to: ✅ Azure Data Explorer ✅ Azure Monitor ✅ Microsoft Sentinel
Detect anomalous points in a daily series with monthly seasonality.
The function series_monthly_decompose_anomalies_fl() is a user-defined function (UDF) that detects anomalies in multiple time series that have monthly seasonality. The function is built on top of series_decompose_anomalies(). The challenge is that the length of a month is variable between 28 to 31 days, so building a baseline by using series_decompose_anomalies() out of the box detects fixed seasonality thus fails to match spikes or other patterns that occur in the 1st or other day in each month.
Syntax
series_monthly_decompose_anomalies_fl(threshold)
Learn more about syntax conventions.
Parameters
| Name | Type | Required | Description |
|---|---|---|---|
| threshold | real |
Anomaly threshold. Default is 1.5. |
Function definition
You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows:
Define the function using the following let statement. No permissions are required.
Important
A let statement can't run on its own. It must be followed by a tabular expression statement. To run a working example of series_clean_anomalies_fl(), see Example.
let series_monthly_decompose_anomalies_fl=(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
let _tbl=materialize(tbl
| extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
| extend _vdoy = 31*(_moy-1)+_dom // virtual day of year (assuming all months have 31 days)
);
let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
let keys = _tbl | summarize by _key | extend dummy=1;
let years = _tbl | summarize by _year | extend dummy=1;
let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
| join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
vdoys
| join kind=leftouter _tbl on _key, _year, _vdoy
| project-away _key1, _year1, _moy1, _vdoy1
| extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
| partition by _key (as T
| where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and
_vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
)
| join kind=inner median_tbl on _key, $left._vdom == $right._dom
| extend _vval = coalesce(_val, p50)
//| order by _key asc, _vadoy asc // for debugging
| make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
| extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
| mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
| project-away _vadoy
| project-rename _val=_vval
| where isnotnull(_date)
};
// Write your query to use the function here.
Example
The input table must contain _key, _date and _val columns. The query builds a set of time series of _val per each _key and adds anomalies, score and baseline columns.
To use a query-defined function, invoke it after the embedded function definition.
let series_monthly_decompose_anomalies_fl=(tbl:(_key:string, _date:datetime, _val:real), threshold:real=1.5)
{
let _tbl=materialize(tbl
| extend _year=getyear(_date), _dom = dayofmonth(_date), _moy=monthofyear(_date), _doy=dayofyear(_date)
| extend _vdoy = 31*(_moy-1)+_dom // virtual day of year (assuming all months have 31 days)
);
let median_tbl = _tbl | summarize p50=percentiles(_val, 50) by _key, _dom;
let keys = _tbl | summarize by _key | extend dummy=1;
let years = _tbl | summarize by _year | extend dummy=1;
let vdoys = range _vdoy from 0 to 31*12-1 step 1 | extend _moy=_vdoy/31+1, _vdom=_vdoy%31+1, _vdoy=_vdoy+1 | extend dummy=1
| join kind=fullouter years on dummy | join kind=fullouter keys on dummy | project-away dummy, dummy1, dummy2;
vdoys
| join kind=leftouter _tbl on _key, _year, _vdoy
| project-away _key1, _year1, _moy1, _vdoy1
| extend _adoy=31*12*_year+_doy, _vadoy = 31*12*_year+_vdoy
| partition by _key (as T
| where _vadoy >= toscalar(T | summarize (_adoy, _vadoy)=arg_min(_adoy, _vadoy) | project _vadoy) and
_vadoy <= toscalar(T | summarize (_adoy, _vadoy)=arg_max(_adoy, _vadoy) | project _vadoy)
)
| join kind=inner median_tbl on _key, $left._vdom == $right._dom
| extend _vval = coalesce(_val, p50)
//| order by _key asc, _vadoy asc // for debugging
| make-series _vval=avg(_vval), _date=any(_date) default=datetime(null) on _vadoy step 1 by _key
| extend (anomalies, score, baseline) = series_decompose_anomalies(_vval, threshold, 31)
| mv-expand _date to typeof(datetime), _vval to typeof(real), _vadoy to typeof(long), anomalies to typeof(int), score to typeof(real), baseline to typeof(real)
| project-away _vadoy
| project-rename _val=_vval
| where isnotnull(_date)
};
demo_monthly_ts
| project _key=key, _date=ts, _val=val
| invoke series_monthly_decompose_anomalies_fl()
| project-rename key=_key, ts=_date, val=_val
| render anomalychart with(anomalycolumns=anomalies, xcolumn=ts, ycolumns=val)
Output
Series A with monthly anomalies:
Series B with monthly anomalies: