SQL functions
Function | Example | Result | Type |
ADD_MONTHS | SELECT ADD_MONTHS(‘2020-06-30’, 1) FROM <TABLE> LIMIT 1 | 2020-07-31T00:00:00.000Z | timestamp |
ABS | SELECT ABS(-1) FROM <TABLE> LIMIT 1 | 1 | integer |
ASCII | SELECT ascii(‘222’) from <TABLE> LIMIT 1 | 50 | integer |
ASIN | SELECT ASIN(0) from <TABLE> LIMIT 1 | 0 | float |
CHAR | SELECT char(65) from <TABLE> LIMIT 1 | “A” | string |
CHR | SELECT char(65) from <TABLE> LIMIT 1 | “A” | string |
DATE_DIFF
|
SELECT datediff(‘2009-07-31’, ‘2009-07-30’)from <TABLE> LIMIT 1; | 1 | integer |
SELECT datediff(‘2009-07-30’, ‘2009-07-31’) from <TABLE> LIMIT 1; | -1 | integer | |
DATE_FORMAT | SELECT date_format(‘2020-04-08’, ‘y’) from <TABLE> LIMIT 1 | 2020 | string |
DATE_TRUNC
|
SELECT date_trunc(‘YEAR’, ‘2015-03-05T09:32:05.359’) from <TABLE> 1; | 2015-01-01T00:00:00.000-08:00 | timestamp |
SELECT date_trunc(‘MM’, ‘2015-03-05T09:32:05.359’) from <TABLE> LIMIT 1; | 2015-03-01T00:00:00.000-07:00 | timestamp | |
SELECT date_trunc(‘DD’, ‘2015-03-05T09:32:05.359’) from <TABLE> LIMIT 1; | 2015-03-05T00:00:00.000-07:00 | timestamp | |
SELECT date_trunc(‘HOUR’, ‘2015-03-05T09:32:05.359’) from <TABLE> LIMIT 1; | 2015-03-05T09:00:00.000-07:00 | timestamp | |
DAYOFWEEK | SELECT dayofweek(‘2009-07-30’) from <TABLE> LIMIT 1; | 5 | integer |
DAYOFYEAR | SELECT dayofyear(‘2016-04-09’) from <TABLE> LIMIT 1; | 100 | integer |
FIND_IN_SET | SELECT find_in_set(‘ab’,’abc,b,ab,c,def’) from <TABLE> LIMIT 1; | 3 | integer |
FORMAT_NUMBER
|
SELECT format_number(12332.123456, 4) from <TABLE> LIMIT 1; | 12,332.12 | string |
SELECT format_number(12332.123456, ‘##################.###’) from <TABLE> LIMIT 1; | 12332.123 | string | |
FROM_UNIXTIME | SELECT from_unixtime(0, ‘yyyy-MM-dd HH:mm:ss’) from <TABLE> LIMIT 1 | 1969-12-31 16:00:00 | string |
FROM_UTC_TIMESTAMP | SELECT from_utc_timestamp(‘2016-08-31’, ‘Asia/Seoul’) from <TABLE> LIMIT 1 | 2016-08-31T09:00:00.000-07:00 | timestamp |
GREATEST
|
SELECT greatest(10, 9, 2, 4, 3) from FORECAST_CANONICAL LIMIT 1 | 10 | integer |
SELECT greatest(“abc”, “bcd”) from FORECAST_CANONICAL LIMIT 1 | bcd | string | |
SELECT greatest(TRUE, FALSE) from FORECAST_CANONICAL LIMIT 1 | TRUE | boolean | |
SELECT greatest(FALSE, NULL) from FORECAST_CANONICAL LIMIT 1 | FALSE | boolean | |
SELECT greatest(TRUE, NULL) from FORECAST_CANONICAL LIMIT 1 | TRUE | boolean | |
HOUR | SELECT hour(‘2009-07-30 12:58:59’) from <TABLE> LIMIT 1 | 12 | integer |
INITCAP | SELECT initcap(‘sPark sql’) from <TABLE> LIMIT 1 | Spark Sql | string |
LAST_DAY | SELECT last_day(‘2009-01-12’) from <TABLE> LIMIT 1 | 2009-01-31T00:00:00.000Z | timestamp |
LEFT | SELECT left(‘Spark SQL’, 3) from <TABLE> LIMIT 1 | Spa | string |
MAX
|
SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col); | FAILS | |
SELECT max(AVG_MISDEMEANOR) from MISDEMEANOR_RATE; | 11598.6 | ||
MINUTE | SELECT minute(‘2009-07-30 12:58:59’) FROM <TABLE>; | 58 | int |
MD5 | SELECT md5(‘Xcalar’) FROM <TABLE>; | d5997d139a9944f3ea8d2533c0929175 | string |
MOD
|
SELECT 2 % 1.8 FROM <TABLE>; | 0.2 | float |
SELECT MOD(2, 1.8) FROM <TABLE>; | 0.2 | float | |
MONTH | SELECT month(‘2016-07-30’) FROM <TABLE>; | 7 | int |
MONTHS_BETWEEN | SELECT months_between(‘1997-02-28 10:30:00’, ‘1996-10-30’) FROM <TABLE>; | 3.94959677 | float |
NANVL | SELECT nanvl(cast(‘NaN’ as double), 123) FROM <TABLE>; | 123 | float |
NEGATIVE | SELECT negative(1) FROM <TABLE>; | -1 | int |
NEXT_DAY | SELECT next_day(‘2015-01-14’, ‘TU’) FROM <TABLE>; | 2015-01-20T00:00:00.000Z | timestamp |
NOW | SELECT now() FROM MISDEMEANOR_RATE; | 2020-11-13T17:20:41.292Z | timestamp |
NULLIF | SELECT nullif(2, 2) FROM <TABLE>; | FNF | |
NVL2 | SELECT nvl2(NULL, 2, 1) FROM <TABLE>; | 1 | int |
OCTET_LENGTH | SELECT octet_length(‘Xcalar’) FROM <TABLE>; | 6 | int |
PERCENT_RANK | SELECT PERCENT_RANK(PRECINCT) FROM MISDEMEANOR_RATE; | column of floats ([0.77,…]) | float |
PI | SELECT pi() FROM <TABLE>; | 3.14…. | float |
PMOD | SELECT pmod(-10, 3) FROM <TABLE>; | 2 | int |
POSITION | SELECT position(‘bar’, ‘foobarbar’) FROM <TABLE>; | 4 | int |
POW | SELECT pow(2, 3) FROM <TABLE>; | 8 | float |
POWER | SELECT power(2, 3) FROM <TABLE>; | 8 | float |
PRINTF | SELECT printf(“Hello World %d %s”, 100, “days”) FROM <TABLE>; | Hello World 100 days | string |
QUARTER | SELECT quarter(‘2016-08-31’) FROM <TABLE>; | 3 | int |
RADIANS | SELECT radians(180) FROM <TABLE>; | 3.14… | float |
RAND
|
SELECT rand() FROM <TABLE>; | col of floats | float |
SELECT rand(0) FROM <TABLE>; | col of floats | float | |
SELECT rand(null) FROM <TABLE>; | col of floats | float | |
RANK | SELECT RANK(COL) FROM <TABLE>; | column of integers | int |
REGEXP_EXTRACT | SELECT regexp_extract(‘100-200’, ‘(\\d+)-(\\d+)’, 1) FROM <TABLE>; | 100 | string |
REGEXP_REPLACE | SELECT regexp_replace(‘100-200’, ‘(\\d+)’, ‘num’) FROM <TABLE>; | num-num | string |
REPEAT | SELECT repeat(‘123’, 2) FROM <TABLE>; | 123123 | string |
REPLACE | SELECT replace(‘ABCabc’, ‘abc’, ‘DEF’) FROM <TABLE>; | ABCDEF | string |
RIGHT | SELECT right(‘Spark SQL’, 3) FROM <TABLE>; | SQL | string |
RINT | SELECT rint(12.3456) FROM <TABLE>; | 12 | float |
RLIKE | SELECT ‘SystemDriveUsersJohn’ rlike ‘SystemDriveUsers.*’ FROM <TABLE>; | TRUE | boolean |
ROUND | SELECT round(2.5, 0) FROM <TABLE>; | 3 | float |
RPAD | SELECT rpad(‘hi’, 5, ‘??’) FROM <TABLE>; | hi??? | string |
RTRIM | SELECT rtrim(‘ SparkSQL ‘) FROM <TABLE>; | SparkSQL | string |
SCHEMA_OF_JSON | SELECT schema_of_json(‘[{“col”:0}]’) FROM <TABLE>; | array<struct<col:bigint>> | string |
SECOND | SELECT second(‘2009-07-30 12:58:59’) FROM <TABLE>; | 59 | int |
SHA | SELECT sha(‘Spark’) FROM <TABLE>; | 19ba73209de2712b78b800260417130a49538513 | string |
SHA1 | SELECT sha1(‘Xcalar’) FROM <TABLE>; | 19ba73209de2712b78b800260417130a49538513 | string |
SHIFTLEFT | SELECT shiftleft(2, 1) FROM <TABLE>; | 4 | int |
SHIFTRIGHT | SELECT shiftright(4, 1) FROM <TABLE>; | 2 | int |
SHIFTRIGHTUNSIGNED | SELECT shiftrightunsigned(4, 1) FROM <TABLE>; | 2 | int |
SIGN | SELECT sign(40) FROM <TABLE>; | 1 | int |
SIGNUM | SELECT signum(40) FROM <TABLE>; | 1 | int |
SIN | SELECT sin(0) FROM <TABLE>; | 0 | int |
SINH | SELECT sinh(0) FROM <TABLE>; | 0 | int |
SIZE | SELECT size(array(‘b’, ‘d’, ‘c’, ‘a’)) FROM <TABLE>; | 4 | int |
SMALLINT | SELECT smallint(COL) FROM <TABLE>; | COL of INTs | INT |
SOUNDEX | SELECT soundex(‘Miller’) FROM <TABLE>; | M460 | string |
SPACE | SELECT concat(space(2), ‘1’) FROM <TABLE>; | 1 | |
SQRT | SELECT sqrt(4) FROM <TABLE>; | 2 | int |
STD | SELECT std(COL) from <TABLE>; | 20.9444…. | float |
STDDEV | SELECT stddev(COL) from <TABLE>; | 20.9444…. | float |
STDDEV_POP | SELECT stddev_pop(COL) from <TABLE>; | 20.93240805 | float |
STDDEV_SAMP | SELECT stddev_samp(COL) from <TABLE>; | 20.94440714 | float |
STRING | SELECT STRING(1) FROM <TABLE>; | 1 | string |
SUBSTR | SELECT substr(‘Spark SQL’, 5) FROM <TABLE>; | k SQL | string |
SUBSTRING | SELECT substring(‘Spark SQL’, -3) FROM <TABLE>; | SQL | string |
SUBSTRING_INDEX | SELECT substring_index(‘www.apache.org’, ‘.’, 2) FROM <TABLE>; | www.apache | string |
SUM | SELECT SUM(COL) FROM <TABLE>; | 59657 | int |
TAN | SELECT tan(0) FROM <TABLE>; | 0 | float |
TANH | SELECT tanh(0) FROM <TABLE>; | 0 | float |
TO_DATE | SELECT to_date(‘2009-07-30 04:17:52’) FROM <TABLE>; | 2009-07-30T00:00:00.000Z | timestamp |
TO_JSON | SELECT to_json(named_struct(‘a’, 1, ‘b’, 2)) FROM <TABLE>; | {“a”:1,”b”:2} | string |
TO_TIMESTAMP | SELECT to_timestamp(‘2016-12-31 00:12:00’) FROM <TABLE>; | 2016-12-31T00:12:00.000Z | timestamp |
TO_UNIX_TIMESTAMP | SELECT to_unix_timestamp(‘2016-04-08’, ‘yyyy-MM-dd’) FROM <TABLE>; | 1460073600 | int |
TO_UTC_TIMESTAMP | SELECT to_utc_timestamp(‘2016-08-31’, ‘Asia/Seoul’) FROM <TABLE>; | 2016-08-30T15:00:00.000Z | timestamp |
TRANSLATE | SELECT translate(‘AaBbCc’, ‘abc’, ‘123’) FROM <TABLE>; | A1B2C3 | string |
TRIM | SELECT trim(TRAILING ‘SL’ FROM ‘SSparkSQLS’) FROM <TABLE>; | SSparkSQ | string |