String Functions
This page covers functions to manipulate strings in Rockset.
String functions can be used in any part of a query where a string value is in scope, for example in the SELECT, WHERE, and HAVING clauses to name a few.
Basic Functions
CONCAT
CONCAT(x, ...)
Returns the concatenation of the strings given in the arguments. Supports variable number of arguments. null
arguments are ignored.
SELECT
CONCAT('foo')
'foo'
SELECT
CONCAT('foo', '')
'foo'
SELECT
CONCAT('foo', 'bar')
'foobar'
SELECT
CONCAT('foo', 'bar', 'baz')
'foobarbaz'
SELECT
CONCAT('foo', null, 'bar')
'foobar'
Operator form equivalent to CONCAT
, except that an argument of null
will result in null
.
SELECT
'foo' || ''
'foo'
SELECT
'foo' || 'bar'
'foobar'
SELECT
'foo' || 'bar' || 'baz'
'foobarbaz'
SELECT
'foo' || null || 'bar'
null
FORMAT
FORMAT(format_str, ...)
Formats the arguments according to the format string. Will work with integers, floating point numbers, and strings. Follows the format string syntax of fmt
: https://fmt.dev/latest/syntax.html
SELECT
FORMAT('{} + {} + {}', 'one', 'two', 'three')
'one + two + three'
SELECT
FORMAT('{:.2f}', 3.14159)
'3.14'
SELECT
FORMAT('hex: {:#x}', 97)
'hex: 0x61'
JSON_FORMAT
JSON_FORMAT(x)
Converts given JSON to string.
SELECT
JSON_FORMAT(ARRAY_CREATE(3, 6, 9))
'[3,6,9]'
SELECT
JSON_FORMAT('hello')
'"hello"'
SELECT
JSON_FORMAT(true)
'true'
SELECT
JSON_FORMAT(
OBJECT(
ARRAY_CREATE('name', 'age', 'city'),
ARRAY_CREATE('John', 30, 'New York')
)
)
'{"name":"John","city":"New York","age":30}'
-- Format `null`.
SELECT
JSON_FORMAT(null)
'null'
-- Format `undefined`.
SELECT
JSON_FORMAT(undefined)
'{"__rockset_type":"undefined"}'
JSON_PARSE
JSON_PARSE(x)
Parses given string as JSON.
SELECT
JSON_PARSE('[3, 6, 9]')
[3, 6, 9]
SELECT
JSON_PARSE('"hello"')
'hello'
SELECT
JSON_PARSE('true')
true
SELECT
JSON_PARSE(
'{ "name": "John", "age": 30, "city": "New York" }'
)
{"name": "John", "age": 30, "city": "New York"}
SELECT
JSON_PARSE(
'{ "name": "John", "age": 30, "city": "New York" }'
).city
'New York'
-- Parse `null`.
SELECT
TYPEOF(JSON_PARSE('null'))
'null'
-- Parse `undefined`.
SELECT
TYPEOF(JSON_PARSE('{ "__rockset_type": "undefined" }'))
'undefined'
SELECT
JSON_PARSE('hello')
Error: 'json parse error on line 0 near `hello`: expected json value'
PG_ARRAY_PARSE
PG_ARRAY_PARSE(x, typestr)
Parses a string encoded using Postgres array output syntax. typestr
should be a string holding the Postgres array type, such as 'integer[]'
or 'text[]'
. Int, float, and string arrays of any depth can be decoded.
SELECT
PG_ARRAY_PARSE('{foo,bar,NULL,null}', 'text[]')
["foo", "bar", null, null]
SELECT
PG_ARRAY_PARSE('{"foo \\","\"bar\"","NULL"}', 'char[]')
["foo \\", "\"bar\"", "NULL"]
SELECT
PG_ARRAY_PARSE('{{1,2},{3,4}}', 'integer[][]')
[[1, 2], [3, 4]]
SELECT
PG_ARRAY_PARSE('{{{Infinity}},{{-Infinity}}}', 'float[][]')
[[[Infinity]], [[-Infinity]]]
Supported types for the second argument of PG_ARRAY_PARSE
:
bigint[]
char[]
double precision[]
double[]
float[]
int64[]
int[]
integer[]
smallint[]
string[]
text[]
varchar[]
DEC128_FORMAT
DEC128_FORMAT(low, high)
Converts from the IEEE 754 decimal128 floating point format to a string using scientific notation. low
and high
should be integers holding the bottom and top 64 bits of the binary representation, respectively.
SELECT
DEC128_FORMAT(10, 3476215962376601600)
"+10E-1"
SELECT
DEC128_FORMAT(5000, 3475653012423180288)
"+5000E-2"
/* Casting to float may lose precision, but allows arithmetic */
SELECT
CAST(
DEC128_FORMAT(5000, 3475653012423180288) AS float
)
50
SELECT
DEC128_FORMAT(0, 8646911284551352320)
"+Inf"
SELECT
DEC128_FORMAT(0, 8935141660703064064)
"+NaN"
LENGTH
LENGTH(obj)
Returns number of elements in obj
.
SELECT
LENGTH('foo')
3
SELECT
LENGTH('')
0
SELECT
LENGTH(null)
null
LOWER
LOWER(x)
Returns locale-independent lowercase string x
.
SELECT
LOWER('foo')
'foo'
SELECT
LOWER('FOO')
'foo'
SELECT
LOWER('123')
'123'
SELECT
LOWER('')
''
SELECT
LOWER(null)
null
LPAD
LPAD(string, targetLength, padString)
Pads string
from the left to targetLength
using padString
. If targetLength
is less than the length of string
, the result will be truncated to the targetLength
.
SELECT
LPAD('foo', 6, '!$')
'!$!foo'
SELECT
LPAD('foo', 5, '!')
'!!foo'
SELECT
LPAD('foobar', 3, '!')
'foo'
LTRIM
LTRIM(string [, characters])
Remove the longest string containing only characters from characters
(a space by default) from the start of string
SELECT
LTRIM(' foo')
'foo'
SELECT
LTRIM('foo ')
'foo '
SELECT
LTRIM('
foo')
'foo'
SELECT
LTRIM('yyyyzabc', 'xyz')
'abc'
NORMALIZE
NORMALIZE(string[, form])
Returns Unicode-normalized form of string
. form
is an identifier and must be one of NFC
, NFD
, NFKC
, NFKD
, which are the four Unicode normalization methods; NFC
is default.
Note that NORMALIZE
uses special syntax; form
is an identifier, not a string.
-- no quotes around NFC!
SELECT
NORMALIZE('hello', NFC)
'hello'
REPLACE
REPLACE(string, search[, replacement])
Returns a string with all instances of search
replaced with replacement
in string
. replacement
is optional, which if not specified removes all instance of search
from string
. When search
is an empty string, string
is not modified in any way.
SELECT
REPLACE('foobar', 'bar')
'foo'
SELECT
REPLACE('foobar', 'bar', '')
'foo'
SELECT
REPLACE('foobar', 'bar', 'baz')
'foobaz'
SELECT
REPLACE('barfoobar', 'bar', 'baz')
'bazfoobaz'
SELECT
REPLACE('foo', 'bar', '')
'foo'
SELECT
REPLACE('foo', '', 'bar')
'foo'
SELECT
REPLACE('foo', 'bar', null)
null
RPAD
RPAD(string, targetLength, padString)
Pads string
from the right to targetLength
using padString
. If targetLength
is less than the length of string
, the result will be truncated to the targetLength
.
SELECT
RPAD('foo', 6, '!$')
'foo!$!'
SELECT
RPAD('foo', 5, '!')
'foo!!'
SELECT
RPAD('foobar', 3, '!')
'foo'
RTRIM
RTRIM(string[, characters])
Remove the longest string containing only characters from characters
(a space by default) from the end of string
SELECT
RTRIM('foo ')
'foo'
SELECT
RTRIM(' foo')
' foo'
SELECT
RTRIM(' foo ')
' foo'
SELECT
RTRIM('trimxxxx', 'x')
'trim'
SPLIT
SPLIT(string, delimiter[, limit])
Splits string
on delimiter
and returns an array. With limit
, only the first limit - 1
delimiters are split upon, thereby returning an array of size at most limit
. The last element in the array always contains everything left in the string in the case where there are >= limit
occurrences of the delimiter in the string. limit
must be a positive number.
SELECT
SPLIT('foo.bar', '.')
["foo", "bar"]
SELECT
SPLIT('foo.bar.', '.')
["foo", "bar", ""]
SELECT
SPLIT('foo.bar', 'foo')
["", ".bar"]
SELECT
SPLIT('foo.bar', '.', 1)
["foo.bar"]
SELECT
SPLIT('foo.bar', '.', 2)
["foo", "bar"]
SELECT
SPLIT('foo.bar', '.', 3)
["foo", "bar"]
SELECT
SPLIT('foo.bar.baz', '.', 2)
["foo", "bar.baz"]
SELECT
SPLIT('foo.bar.baz', '.', 3)
["foo", "bar", "baz"]
SELECT
SPLIT('foo.bar.baz', '.')
["foo", "bar", "baz"]
STRPOS
STRPOS(string, substring)
Returns the starting position of the first instance of substring
in string
. Positions start with 1. If not found, 0 is returned.
SELECT
STRPOS('foo', 'foo')
1
SELECT
STRPOS('foo', 'bar')
0
SELECT
STRPOS('foo bar baz', 'bar')
5
-- STRPOS of empty string is always 1
SELECT
STRPOS('foo', '')
1
SELECT
STRPOS('foo', null)
null
SUBSTR
SUBSTR(string, start[, length])
Returns substring of string
starting at character at index given by start
(1-based index) and of length length
. If length
is not given, returns the substring starting at start
until the end of string
. If start
is negative, it rolls over to the end of the string and counts backwards from there.
SELECT
SUBSTR('abcdef', 2, 3)
'bcd'
SELECT
SUBSTR('abcdef', 2)
'bcdef'
SELECT
SUBSTR('abcdef', -1)
'f'
SELECT
SUBSTR('abcdef', -4, 2)
'cd'
SUFFIXES
SUFFIXES(string, truncate_length)
Returns an array of all suffixes of string
where each returned suffix is truncated to truncate_length
. If not specified, the default truncate_length
used is 100. The max length allowed for the input string
is 100,000.
SELECT
SUFFIXES('hello')
["hello", "ello", "llo", "lo", "o"]
SELECT
SUFFIXES('hello', 2)
["he", "el", "ll", "lo", "o"]
SELECT
SUFFIXES('hello', 3)
["hel", "ell", "llo", "lo", "o"]
SELECT
SUFFIXES(null)
null
PREFIXES
PREFIXES(string, max_length)
Returns an array of all prefixes of string
 whose length is less than or equal to max_length
. If not specified, the default max_length
used is 100. The max length allowed for the input string
is 100,000.
SELECT
PREFIXES('hello')
["h", "he", "hel", "hell", "hello"]
SELECT
PREFIXES('hello', 2)
["h", "he"]
SELECT
PREFIXES(null, 1)
null
NGRAMS
NGRAMS(string, n_length)
Returns an array of all ngrams of string
of length n_length
. The max length allowed for the input string
is 100,000.
SELECT
NGRAMS('hello', 2)
["he", "el", "ll", "lo"]
SELECT
NGRAMS('hello', 1)
["h", "e", "l", "l", "o"]
SELECT
NGRAMS(null, 1)
null
NGRAMS(string, min_length, max_length)
Returns an array of all ngrams of string
that are between min_length
and max_length
in size. The max length allowed for the input string
is 100,000.
SELECT
NGRAMS('hello', 1, 3)
["h", "he", "hel", "e", "el", "ell", "l", "ll", "llo", "l", "lo", "o"]
SELECT
NGRAMS('hello', 1, 1)
["h", "e", "l", "l", "o"]
SELECT
NGRAMS(null, 1, 1)
null
TRIM
TRIM(string)
Returns string
with leading and trailing whitespace removed.
SELECT
TRIM(' foo ')
'foo'
SELECT
TRIM('
foo
')
'foo'
UPPER
UPPER(x)
Returns locale-independent uppercase string x
.
SELECT
UPPER('foo')
'FOO'
SELECT
UPPER('FOO')
'FOO'
SELECT
UPPER('123')
'123'
SELECT
UPPER('')
''
SELECT
UPPER(null)
null
Encoding Functions
FROM_BASE64
FROM_BASE64(s)
Decodes the base64 string s
into a bytes
value.
SELECT
FROM_BASE64('Zm9v')
'Zm9v'
FROM_HEX
FROM_HEX(s)
Decodes the hex string s
into a bytes
value.
SELECT
FROM_HEX('666f6f')
'Zm9v'
SELECT
FROM_HEX('626172')
'YmFy'
FROM_UTF8
FROM_UTF8(b)
If b
is a bytes
value that represents a valid UTF-8 string, return it as a string
. Otherwise, raise an error.
SELECT
FROM_UTF8(bytes 'foo')
'foo'
SELECT
FROM_UTF8(bytes 'bar')
'bar'
TO_BASE64
TO_BASE64(b)
Encodes the bytes
value b
into a base64 string
representation.
SELECT
TO_BASE64(bytes 'foo')
'Zm9v'
TO_HEX
TO_HEX(b)
Encodes the bytes
value b
into a hex string
representation.
SELECT
TO_HEX(bytes 'foo')
'666f6f'
SELECT
TO_HEX(bytes 'bar')
'626172'
TO_UTF8
TO_UTF8(s)
Return the bytes
UTF-8 representation of the string
value s
.
SELECT
TO_UTF8('foo')
'Zm9v'
SELECT
TO_UTF8('bar')
'YmFy'
URL_ENCODE
URL_ENCODE(value)
Encodes the string into a percent-encoded ASCII text representation
SELECT
URL_ENCODE('foo bar?')
'foo%20bar%3f'
URL_DECODE
URL_ENCODE(value)
Encodes the string into a percent-encoded ASCII text representation
SELECT
URL_DECODE('foo%20bar%3f')
'foo bar?'
Regular Expression Functions
Rockset uses RE2 syntax for specifying regular expressions. Named capturing groups is not supported, only numbered groups.
REGEXP_EXTRACT
REGEXP_EXTRACT(string, pattern[, group])
Returns the first match of pattern
in string
, or null
if the pattern does not match. If group
is specified and greater than zero, returns the group
th capturing group; if group
is not specified or is zero, returns the full match.
SELECT
REGEXP_EXTRACT('foo bar baz', '[a-z]+')
'foo'
SELECT
REGEXP_EXTRACT('foo bar baz', '.*bar')
'foo bar'
SELECT
REGEXP_EXTRACT('foo bar baz', ' [a-z]+ ')
' bar '
SELECT
REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 1)
'foo'
SELECT
REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 2)
'baz'
REGEXP_EXTRACT_ALL
REGEXP_EXTRACT_ALL(string, pattern[, group])
Returns an array containing all matches of pattern
in string
. If group
is specified and greater than zero, extracts the group
th capturing group from each match; if group
is not specified or is zero, returns the full matches.
SELECT
REGEXP_EXTRACT_ALL('foo bar baz', '[a-z]+')
["foo", "bar", "baz"]
SELECT
REGEXP_EXTRACT_ALL('foo bar baz', 'ba([a-z])', 1)
["r", "z"]
SELECT
REGEXP_EXTRACT_ALL('foo bar baz', '([a-z])([a-z]+)', 2)
["oo", "ar", "az"]
REGEXP_LIKE
REGEXP_LIKE(string, pattern)
Returns true
if string
matches the regular expression pattern
, false
otherwise.
SELECT
REGEXP_LIKE('foo bar baz', '.*')
true
SELECT
REGEXP_LIKE('foo bar baz', '^$')
false
SELECT
REGEXP_LIKE('foo bar baz', '^foo')
true
SELECT
REGEXP_LIKE('foo bar baz', '[a-zA-Z0-9 ]+')
true
SELECT
REGEXP_LIKE('foo bar baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
false
SELECT
REGEXP_LIKE('foo@bar.baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
true
REGEXP_REPLACE
REGEXP_REPLACE(string, pattern[, replacement])
Returns string
with all places where pattern
matches replaced with replacement
(or erased if replacement
is not specified). replacement
may use \1
.. \9
escape sequences to refer to capturing groups, or \0
to refer to the entire match.
SELECT
REGEXP_REPLACE('foo bar baz', ' ', '')
'foobarbaz'
SELECT
REGEXP_REPLACE('foo bar baz', 'b[a-z]+ ', '')
'foo baz'
SELECT
REGEXP_REPLACE('foo bar baz', '.* b', 'b')
'baz'
SELECT
REGEXP_REPLACE(
'John A. Doe',
'([A-Z][a-z]+) ([A-Z]\.?) ([A-Z][a-z]+)',
'\3, \1 \2'
)
'Doe, John A.'
REGEXP_SPLIT
REGEXP_SPLIT(string, pattern)
Returns an array with the components of string
when split by pattern
.
SELECT
REGEXP_SPLIT('foo bar baz', ' ')
["foo", "bar", "baz"]
SELECT
REGEXP_SPLIT('foo bar baz', ' +')
["foo", "bar", "baz"]
SELECT
REGEXP_SPLIT('foo123bar456baz', '[0-9]+')
["foo", "bar", "baz"]
SPLIT_PART
SPLIT_PART(string, delimiter, index)
Splits string
on delimiter
and returns the field index
. Field indexes start with 1
. If the index is larger than than the number of fields, then null
is returned.
SELECT
SPLIT_PART('a:b:c:d', ':', 2)
'b'
SELECT
SPLIT_PART('a:b:c', ',', 3)
null
SELECT
SPLIT_PART('a:b:c:d', ':', 10)
null
SELECT
SPLIT_PART('a::c:d', ':', 2)
''
SELECT
SPLIT_PART('a:b:c', ',', 1)
'a:b:c'
CHR
CHR(n)
Returns a single character string made up of the character whose Unicode code point is n
.
SELECT
CHR(42)
'*'
ASCII
ASCII(string)
Returns the Unicode code point value of the first character in string
, or zero if the string is empty.
SELECT
ASCII('*')
42
REVERSE
REVERSE(string)
Returns string
with its characters in reverse order.
SELECT
REVERSE('abcd')
'dcba'