Rockset

    String Functions

    This page covers functions to manipulate strings in Rockset.

    String functions can be used in any part of a query where a string value is in scope, for example in the SELECT, WHERE, and HAVING clauses to name a few.

    Basic Functions

    CONCAT

    CONCAT(x, ...)

    Returns the concatenation of the strings given in the arguments. Supports variable number of arguments. null arguments are ignored.

    SELECT
        CONCAT('foo')
    'foo'
    SELECT
        CONCAT('foo', '')
    'foo'
    SELECT
        CONCAT('foo', 'bar')
    'foobar'
    SELECT
        CONCAT('foo', 'bar', 'baz')
    'foobarbaz'
    SELECT
        CONCAT('foo', 3)
    Error: 'Arguments to CONCAT() must be all bytes or all string'
    SELECT
        CONCAT('foo', null, 'bar')
    'foobar'

    Operator form equivalent to CONCAT, except that an argument of null will result in null.

    SELECT
        'foo' || ''
    'foo'
    SELECT
        'foo' || 'bar'
    'foobar'
    SELECT
        'foo' || 'bar' || 'baz'
    'foobarbaz'
    SELECT
        'foo' || 3
    Error: 'Arguments to CONCAT() must be all bytes or all string'
    SELECT
        'foo' || null || 'bar'
    null

    FORMAT

    FORMAT(format_str, ...)

    Formats the arguments according to the format string. Will work with integers, floating point numbers, and strings. Follows the format string syntax of fmt: https://fmt.dev/latest/syntax.html

    SELECT
        FORMAT('{} + {} + {}', 'one', 'two', 'three')
    'one + two + three'
    SELECT
        FORMAT('{:.2f}', 3.14159)
    '3.14'
    SELECT
        FORMAT('hex: {:#x}', 97)
    'hex: 0x61'

    JSON_FORMAT

    JSON_FORMAT(x)

    Converts given JSON to string.

    SELECT
        JSON_FORMAT(ARRAY_CREATE(3, 6, 9))
    '[3,6,9]'
    SELECT
        JSON_FORMAT('hello')
    '"hello"'
    SELECT
        JSON_FORMAT(true)
    'true'
    SELECT
        JSON_FORMAT(
            OBJECT(
                ARRAY_CREATE('name', 'age', 'city'),
                ARRAY_CREATE('John', 30, 'New York')
            )
        )
    '{"name":"John","city":"New York","age":30}'
    -- Format `null`.
    SELECT
        JSON_FORMAT(null)
    'null'
    -- Format `undefined`.
    SELECT
        JSON_FORMAT(undefined)
    '{"__rockset_type":"undefined"}'

    JSON_PARSE

    JSON_PARSE(x)

    Parses given string as JSON.

    SELECT
        JSON_PARSE('[3, 6, 9]')
    [3, 6, 9]
    SELECT
        JSON_PARSE('"hello"')
    'hello'
    SELECT
        JSON_PARSE('true')
    true
    SELECT
        JSON_PARSE(
            '{ "name": "John", "age": 30, "city": "New York" }'
        )
    {"name": "John", "age": 30, "city": "New York"}
    SELECT
        JSON_PARSE(
            '{ "name": "John", "age": 30, "city": "New York" }'
        ).city
    'New York'
    -- Parse `null`.
    SELECT
        TYPEOF(JSON_PARSE('null'))
    'null'
    -- Parse `undefined`.
    SELECT
        TYPEOF(JSON_PARSE('{ "__rockset_type": "undefined" }'))
    'undefined'
    SELECT
        JSON_PARSE('hello')
    Error: 'json parse error on line 0 near `hello`: expected json value'

    PG_ARRAY_PARSE

    PG_ARRAY_PARSE(x, typestr)

    Parses a string encoded using Postgres array output syntax. typestr should be a string holding the Postgres array type, such as 'integer[]' or 'text[]'. Int, float, and string arrays of any depth can be decoded.

    SELECT
        PG_ARRAY_PARSE('{foo,bar,NULL,null}', 'text[]')
    ["foo", "bar", null, null]
    SELECT
        PG_ARRAY_PARSE('{"foo \\","\"bar\"","NULL"}', 'char[]')
    ["foo \\", "\"bar\"", "NULL"]
    SELECT
        PG_ARRAY_PARSE('{{1,2},{3,4}}', 'integer[][]')
    [[1, 2], [3, 4]]
    SELECT
        PG_ARRAY_PARSE('{{{Infinity}},{{-Infinity}}}', 'float[][]')
    [[[Infinity]], [[-Infinity]]]

    Supported types for the second argument of PG_ARRAY_PARSE:

    • bigint[]
    • char[]
    • double precision[]
    • double[]
    • float[]
    • int64[]
    • int[]
    • integer[]
    • smallint[]
    • string[]
    • text[]
    • varchar[]

    DEC128_FORMAT

    DEC128_FORMAT(low, high)

    Converts from the IEEE 754 decimal128 floating point format to a string using scientific notation. low and high should be integers holding the bottom and top 64 bits of the binary representation, respectively.

    SELECT
        DEC128_FORMAT(10, 3476215962376601600)
    "+10E-1"
    SELECT
        DEC128_FORMAT(5000, 3475653012423180288)
    "+5000E-2"
    /* Casting to float may lose precision, but allows arithmetic */
    SELECT
        CAST(
            DEC128_FORMAT(5000, 3475653012423180288) AS float
        )
    50
    SELECT
        DEC128_FORMAT(0, 8646911284551352320)
    "+Inf"
    SELECT
        DEC128_FORMAT(0, 8935141660703064064)
    "+NaN"

    LENGTH

    LENGTH(obj)

    Returns number of elements in obj.

    SELECT
        LENGTH('foo')
    3
    SELECT
        LENGTH('')
    0
    SELECT
        LENGTH(null)
    null

    LOWER

    LOWER(x)

    Returns locale-independent lowercase string x.

    SELECT
        LOWER('foo')
    'foo'
    SELECT
        LOWER('FOO')
    'foo'
    SELECT
        LOWER('123')
    '123'
    SELECT
        LOWER('')
    ''
    SELECT
        LOWER(null)
    null

    LPAD

    LPAD(string, targetLength, padString)

    Pads string from the left to targetLength using padString. If targetLength is less than the length of string, the result will be truncated to the targetLength.

    SELECT
        LPAD('foo', 6, '!$')
    '!$!foo'
    SELECT
        LPAD('foo', 5, '!')
    '!!foo'
    SELECT
        LPAD('foobar', 3, '!')
    'foo'

    LTRIM

    LTRIM(string [, characters])

    Remove the longest string containing only characters from characters (a space by default) from the start of string

    SELECT
        LTRIM(' foo')
    'foo'
    SELECT
        LTRIM('foo ')
    'foo '
    SELECT
        LTRIM('
        foo')
    'foo'
    SELECT
        LTRIM('yyyyzabc', 'xyz')
    'abc'

    NORMALIZE

    NORMALIZE(string[, form])

    Returns Unicode-normalized form of string. form is an identifier and must be one of NFC, NFD, NFKC, NFKD, which are the four Unicode normalization methods; NFC is default.

    Note that NORMALIZE uses special syntax; form is an identifier, not a string.

    -- no quotes around NFC!
    SELECT
        NORMALIZE('hello', NFC)
    'hello'

    REPLACE

    REPLACE(string, search[, replacement])

    Returns a string with all instances of search replaced with replacement in string. replacement is optional, which if not specified removes all instance of search from string. When search is an empty string, string is not modified in any way.

    SELECT
        REPLACE('foobar', 'bar')
    'foo'
    SELECT
        REPLACE('foobar', 'bar', '')
    'foo'
    SELECT
        REPLACE('foobar', 'bar', 'baz')
    'foobaz'
    SELECT
        REPLACE('barfoobar', 'bar', 'baz')
    'bazfoobaz'
    SELECT
        REPLACE('foo', 'bar', '')
    'foo'
    SELECT
        REPLACE('foo', '', 'bar')
    'foo'
    SELECT
        REPLACE('foo', 'bar', null)
    null

    RPAD

    RPAD(string, targetLength, padString)

    Pads string from the right to targetLength using padString. If targetLength is less than the length of string, the result will be truncated to the targetLength.

    SELECT
        RPAD('foo', 6, '!$')
    'foo!$!'
    SELECT
        RPAD('foo', 5, '!')
    'foo!!'
    SELECT
        RPAD('foobar', 3, '!')
    'foo'

    RTRIM

    RTRIM(string[, characters])

    Remove the longest string containing only characters from characters (a space by default) from the end of string

    SELECT
        RTRIM('foo ')
    'foo'
    SELECT
        RTRIM(' foo')
    ' foo'
    SELECT
        RTRIM('  foo  ')
    '  foo'
    SELECT
        RTRIM('trimxxxx', 'x')
    'trim'

    SPLIT

    SPLIT(string, delimiter[, limit])

    Splits string on delimiter and returns an array. With limit, only the first limit - 1 delimiters are split upon, thereby returning an array of size at most limit. The last element in the array always contains everything left in the string in the case where there are >= limit occurrences of the delimiter in the string. limit must be a positive number.

    SELECT
        SPLIT('foo.bar', '.')
    ["foo", "bar"]
    SELECT
        SPLIT('foo.bar.', '.')
    ["foo", "bar", ""]
    SELECT
        SPLIT('foo.bar', 'foo')
    ["", ".bar"]
    SELECT
        SPLIT('foo.bar', '.', 1)
    ["foo.bar"]
    SELECT
        SPLIT('foo.bar', '.', 2)
    ["foo", "bar"]
    SELECT
        SPLIT('foo.bar', '.', 3)
    ["foo", "bar"]
    SELECT
        SPLIT('foo.bar.baz', '.', 2)
    ["foo", "bar.baz"]
    SELECT
        SPLIT('foo.bar.baz', '.', 3)
    ["foo", "bar", "baz"]
    SELECT
        SPLIT('foo.bar.baz', '.')
    ["foo", "bar", "baz"]

    STRPOS

    STRPOS(string, substring)

    Returns the starting position of the first instance of substring in string. Positions start with 1. If not found, 0 is returned.

    SELECT
        STRPOS('foo', 'foo')
    1
    SELECT
        STRPOS('foo', 'bar')
    0
    SELECT
        STRPOS('foo bar baz', 'bar')
    5

    -- STRPOS of empty string is always 1

    SELECT
        STRPOS('foo', '')
    1
    SELECT
        STRPOS('foo', null)
    null

    SUBSTR

    SUBSTR(string, start[, length])

    Returns substring of string starting at character at index given by start (1-based index) and of length length. If length is not given, returns the substring starting at start until the end of string. If start is negative, it rolls over to the end of the string and counts backwards from there.

    SELECT
        SUBSTR('abcdef', 2, 3)
    'bcd'
    SELECT
        SUBSTR('abcdef', 2)
    'bcdef'
    SELECT
        SUBSTR('abcdef', -1)
    'f'
    SELECT
        SUBSTR('abcdef', -4, 2)
    'cd'

    SUFFIXES

    SUFFIXES(string, truncate_length)

    Returns an array of all suffixes of string where each returned suffix is truncated to truncate_length. If not specified the default truncate_length used is 100. The max length allowed for the input string is 100,000.

    SELECT
        SUFFIXES('hello')
    ["hello", "ello", "llo", "lo", "o"]
    SELECT
        SUFFIXES('hello', 2)
    ["he", "el", "ll", "lo", "o"]
    SELECT
        SUFFIXES('hello', 3)
    ["hel", "ell", "llo", "lo", "o"]
    SELECT
        SUFFIXES(null)
    null

    NGRAMS

    NGRAMS(string, n_length)

    Returns an array of all ngrams of string of length n_length. The max length allowed for the input string is 100,000.

    SELECT
        NGRAMS('hello', 2)
    ["he", "el", "ll", "lo"]
    SELECT
        NGRAMS('hello', 1)
    ["h", "e", "l", "l", "o"]
    SELECT
        NGRAMS(null, 1)
    null
    NGRAMS(string, min_length, max_length)

    Returns an array of all ngrams of string that are between min_length and max_length in size. The max length allowed for the input string is 100,000.

    SELECT
        NGRAMS('hello', 1, 3)
    ["h", "he", "hel", "e", "el", "ell", "l", "ll", "llo", "l", "lo", "o"]
    SELECT
        NGRAMS('hello', 1, 1)
    ["h", "e", "l", "l", "o"]
    SELECT
        NGRAMS(null, 1, 1)
    null

    TRIM

    TRIM(string)

    Returns string with leading and trailing whitespace removed.

    SELECT
        TRIM(' foo ')
    'foo'
    SELECT
        TRIM('
        foo
     ')
    'foo'

    UPPER

    UPPER(x)

    Returns locale-independent uppercase string x.

    SELECT
        UPPER('foo')
    'FOO'
    SELECT
        UPPER('FOO')
    'FOO'
    SELECT
        UPPER('123')
    '123'
    SELECT
        UPPER('')
    ''
    SELECT
        UPPER(null)
    null

    Encoding Functions

    FROM_BASE64

    FROM_BASE64(s)

    Decodes the base64 string s into a bytes value.

    SELECT
        FROM_BASE64('Zm9v')
    'Zm9v'

    FROM_HEX

    FROM_HEX(s)

    Decodes the hex string s into a bytes value.

    SELECT
        FROM_HEX('666f6f')
    'Zm9v'
    SELECT
        FROM_HEX('626172')
    'YmFy'

    FROM_UTF8

    FROM_UTF8(b)

    If b is a bytes value that represents a valid UTF-8 string, return it as a string. Otherwise, raise an error.

    SELECT
        FROM_UTF8(bytes 'foo')
    'foo'
    SELECT
        FROM_UTF8(bytes 'bar')
    'bar'

    TO_BASE64

    TO_BASE64(b)

    Encodes the bytes value b into a base64 string representation.

    SELECT
        TO_BASE64(bytes 'foo')
    'Zm9v'

    TO_HEX

    TO_HEX(b)

    Encodes the bytes value b into a hex string representation.

    SELECT
        TO_HEX(bytes 'foo')
    '666f6f'
    SELECT
        TO_HEX(bytes 'bar')
    '626172'

    TO_UTF8

    TO_UTF8(s)

    Return the bytes UTF-8 representation of the string value s.

    SELECT
        TO_UTF8('foo')
    'Zm9v'
    SELECT
        TO_UTF8('bar')
    'YmFy'

    URL_ENCODE

    URL_ENCODE(value)

    Encodes the string into a percent-encoded ASCII text representation

    SELECT
        URL_ENCODE('foo bar?')
    'foo%20bar%3f'

    URL_DECODE

    URL_ENCODE(value)

    Encodes the string into a percent-encoded ASCII text representation

    SELECT
        URL_DECODE('foo%20bar%3f')
    'foo bar?'

    Regular Expression Functions

    Rockset uses RE2 syntax for specifying regular expressions. Named capturing groups is not supported, only numbered groups.

    REGEXP_EXTRACT

    REGEXP_EXTRACT(string, pattern[, group])

    Returns the first match of pattern in string, or null if the pattern does not match. If group is specified and greater than zero, returns the groupth capturing group; if group is not specified or is zero, returns the full match.

    SELECT
        REGEXP_EXTRACT('foo bar baz', '[a-z]+')
    'foo'
    SELECT
        REGEXP_EXTRACT('foo bar baz', '.*bar')
    'foo bar'
    SELECT
        REGEXP_EXTRACT('foo bar baz', ' [a-z]+ ')
    ' bar '
    SELECT
        REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 1)
    'foo'
    SELECT
        REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 2)
    'baz'

    REGEXP_EXTRACT_ALL

    REGEXP_EXTRACT_ALL(string, pattern[, group])

    Returns an array containing all matches of pattern in string. If group is specified and greater than zero, extracts the groupth capturing group from each match; if group is not specified or is zero, returns the full matches.

    SELECT
        REGEXP_EXTRACT_ALL('foo bar baz', '[a-z]+')
    ["foo", "bar", "baz"]
    SELECT
        REGEXP_EXTRACT_ALL('foo bar baz', 'ba([a-z])', 1)
    ["r", "z"]
    SELECT
        REGEXP_EXTRACT_ALL('foo bar baz', '([a-z])([a-z]+)', 2)
    ["oo", "ar", "az"]

    REGEXP_LIKE

    REGEXP_LIKE(string, pattern)

    Returns true if string matches the regular expression pattern, false otherwise.

    SELECT
        REGEXP_LIKE('foo bar baz', '.*')
    true
    SELECT
        REGEXP_LIKE('foo bar baz', '^$')
    false
    SELECT
        REGEXP_LIKE('foo bar baz', '^foo')
    true
    SELECT
        REGEXP_LIKE('foo bar baz', '[a-zA-Z0-9 ]+')
    true
    SELECT
        REGEXP_LIKE('foo bar baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
    false
    SELECT
        REGEXP_LIKE('foo@bar.baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
    true

    REGEXP_REPLACE

    REGEXP_REPLACE(string, pattern[, replacement])

    Returns string with all places where pattern matches replaced with replacement (or erased if replacement is not specified). replacement may use \1 .. \9 escape sequences to refer to capturing groups, or \0 to refer to the entire match.

    SELECT
        REGEXP_REPLACE('foo bar baz', ' ', '')
    'foobarbaz'
    SELECT
        REGEXP_REPLACE('foo bar baz', 'b[a-z]+ ', '')
    'foo baz'
    SELECT
        REGEXP_REPLACE('foo bar baz', '.* b', 'b')
    'baz'
    SELECT
        REGEXP_REPLACE(
            'John A. Doe',
            '([A-Z][a-z]+) ([A-Z]\.?) ([A-Z][a-z]+)',
            '\3, \1 \2'
        )
    'Doe, John A.'

    REGEXP_SPLIT

    REGEXP_SPLIT(string, pattern)

    Returns an array with the components of string when split by pattern.

    SELECT
        REGEXP_SPLIT('foo bar baz', ' ')
    ["foo", "bar", "baz"]
    SELECT
        REGEXP_SPLIT('foo bar baz', ' +')
    ["foo", "bar", "baz"]
    SELECT
        REGEXP_SPLIT('foo123bar456baz', '[0-9]+')
    ["foo", "bar", "baz"]

    SPLIT_PART

    SPLIT_PART(string, delimiter, index)

    Splits string on delimiter and returns the field index. Field indexes start with 1. If the index is larger than than the number of fields, then null is returned.

    SELECT
        SPLIT_PART('a:b:c:d', ':', 2)
    'b'
    SELECT
        SPLIT_PART('a:b:c', ',', 3)
    null
    SELECT
        SPLIT_PART('a:b:c:d', ':', 10)
    null
    SELECT
        SPLIT_PART('a::c:d', ':', 2)
    ''
    SELECT
        SPLIT_PART('a:b:c', ',', 1)
    'a:b:c'

    CHR

    CHR(n)

    Returns a single character string made up of the character whose Unicode code point is n.

    SELECT
        CHR(42)
    '*'

    ASCII

    ASCII(string)

    Returns the Unicode code point value of the first character in string, or zero if the string is empty.

    SELECT
        ASCII('*')
    42

    REVERSE

    REVERSE(string)

    Returns string with its characters in reverse order.

    SELECT
        REVERSE('abcd')
    'dcba'