tokenize_fl()

Switch services using the Version drop-down list. Learn more about navigation.
Applies to: ✅ Azure Data Explorer ✅ Azure Monitor ✅ Microsoft Sentinel

The function tokenize_fl() is a user-defined function (UDF) that tokenizes semi-structured text strings, such as log lines, into separate columns. It uses a regex-based approach to split text on non-alphanumeric delimiters while preserving common semantic characters like periods, hyphens, colons, slashes, and equals signs.

Syntax

T | invoke tokenize_fl(text_col)

Learn more about syntax conventions.

Parameters

Name Type Required Description
text_col string ✔️ The name of the string column containing the text to tokenize.

Function definition

You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows:

Define the function using the following let statement. No permissions are required.

Important

A let statement can't run on its own. It must be followed by a tabular expression statement. To run a working example of tokenize_fl(), see Example.

let tokenize_fl=(tbl:(*), text_col:string)
{
    let universal_regex = @"[^\w\.\-:/=]+";             //  Universal Token Splitter
    tbl
    | serialize _rid=row_number()
    | join (tbl
    | extend _text = column_ifexists(text_col, '')
    | extend norm = replace_regex(_text, universal_regex, "$$$")
    | extend tokens = split(norm, "$$$")
    | serialize _rid=row_number()
    | mv-expand with_itemindex = i tokens
    | summarize cols = make_bag(bag_pack(strcat("col", tostring(i + 1)), tokens)) by _rid
    | evaluate bag_unpack(cols)
    ) on _rid
    | project-away _rid, _rid1
};
// Write your query to use the function here.

Example

The following example uses the invoke operator to run the function.

To use a query-defined function, invoke it after the embedded function definition.

let tokenize_fl=(tbl:(*), text_col:string)
{
    let universal_regex = @"[^\w\.\-:/=]+";             //  Universal Token Splitter
    tbl
    | serialize _rid=row_number()
    | join (tbl
    | extend _text = column_ifexists(text_col, '')
    | extend norm = replace_regex(_text, universal_regex, "$$$")
    | extend tokens = split(norm, "$$$")
    | serialize _rid=row_number()
    | mv-expand with_itemindex = i tokens
    | summarize cols = make_bag(bag_pack(strcat("col", tostring(i + 1)), tokens)) by _rid
    | evaluate bag_unpack(cols)
    ) on _rid
    | project-away _rid, _rid1
};
let tbl = datatable(s:string)
[
  "INFO 2025-12-11T11:22:33Z device=cam01 temp=32.8C ip=10.0.0.5/24",
  "WARN user=adi path=/var/log/syslog error:disk-full id=aa:bb:cc:01",
];
tbl
| invoke tokenize_fl('s')

Output

s col1 col2 col3 col4 col5 col6
INFO 2025-12-11T11:22:33Z device=cam01 temp=32.8C ip=10.0.0.5/24 INFO 2025-12-11T11:22:33Z device=cam01 temp=32.8C ip=10.0.0.5/24
WARN user=adi path=/var/log/syslog error:disk-full id=aa:bb:cc:01 WARN user=adi path=/var/log/syslog error:disk-full id=aa:bb:cc:01