sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 build_formatted_time, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 var_map_sql, 23) 24from sqlglot.helper import flatten, is_float, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_datetime( 33 name: str, kind: exp.DataType.Type, safe: bool = False 34) -> t.Callable[[t.List], exp.Func]: 35 def _builder(args: t.List) -> exp.Func: 36 value = seq_get(args, 0) 37 int_value = value is not None and is_int(value.name) 38 39 if isinstance(value, exp.Literal): 40 # Converts calls like `TO_TIME('01:02:03')` into casts 41 if len(args) == 1 and value.is_string and not int_value: 42 return exp.cast(value, kind) 43 44 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 45 # cases so we can transpile them, since they're relatively common 46 if kind == exp.DataType.Type.TIMESTAMP: 47 if int_value: 48 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 49 if not is_float(value.this): 50 return build_formatted_time(exp.StrToTime, "snowflake")(args) 51 52 if kind == exp.DataType.Type.DATE and not int_value: 53 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 54 formatted_exp.set("safe", safe) 55 return formatted_exp 56 57 return exp.Anonymous(this=name, expressions=args) 58 59 return _builder 60 61 62def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 63 expression = parser.build_var_map(args) 64 65 if isinstance(expression, exp.StarMap): 66 return expression 67 68 return exp.Struct( 69 expressions=[ 70 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 71 ] 72 ) 73 74 75def _build_datediff(args: t.List) -> exp.DateDiff: 76 return exp.DateDiff( 77 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 78 ) 79 80 81def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 82 def _builder(args: t.List) -> E: 83 return expr_type( 84 this=seq_get(args, 2), 85 expression=seq_get(args, 1), 86 unit=_map_date_part(seq_get(args, 0)), 87 ) 88 89 return _builder 90 91 92# https://docs.snowflake.com/en/sql-reference/functions/div0 93def _build_if_from_div0(args: t.List) -> exp.If: 94 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 95 true = exp.Literal.number(0) 96 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 97 return exp.If(this=cond, true=true, false=false) 98 99 100# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 101def _build_if_from_zeroifnull(args: t.List) -> exp.If: 102 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 103 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 104 105 106# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 107def _build_if_from_nullifzero(args: t.List) -> exp.If: 108 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 109 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 110 111 112def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 113 flag = expression.text("flag") 114 115 if "i" not in flag: 116 flag += "i" 117 118 return self.func( 119 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 120 ) 121 122 123def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 124 if len(args) == 3: 125 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 126 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 127 128 129def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 130 regexp_replace = exp.RegexpReplace.from_arg_list(args) 131 132 if not regexp_replace.args.get("replacement"): 133 regexp_replace.set("replacement", exp.Literal.string("")) 134 135 return regexp_replace 136 137 138def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 139 def _parse(self: Snowflake.Parser) -> exp.Show: 140 return self._parse_show_snowflake(*args, **kwargs) 141 142 return _parse 143 144 145DATE_PART_MAPPING = { 146 "Y": "YEAR", 147 "YY": "YEAR", 148 "YYY": "YEAR", 149 "YYYY": "YEAR", 150 "YR": "YEAR", 151 "YEARS": "YEAR", 152 "YRS": "YEAR", 153 "MM": "MONTH", 154 "MON": "MONTH", 155 "MONS": "MONTH", 156 "MONTHS": "MONTH", 157 "D": "DAY", 158 "DD": "DAY", 159 "DAYS": "DAY", 160 "DAYOFMONTH": "DAY", 161 "WEEKDAY": "DAYOFWEEK", 162 "DOW": "DAYOFWEEK", 163 "DW": "DAYOFWEEK", 164 "WEEKDAY_ISO": "DAYOFWEEKISO", 165 "DOW_ISO": "DAYOFWEEKISO", 166 "DW_ISO": "DAYOFWEEKISO", 167 "YEARDAY": "DAYOFYEAR", 168 "DOY": "DAYOFYEAR", 169 "DY": "DAYOFYEAR", 170 "W": "WEEK", 171 "WK": "WEEK", 172 "WEEKOFYEAR": "WEEK", 173 "WOY": "WEEK", 174 "WY": "WEEK", 175 "WEEK_ISO": "WEEKISO", 176 "WEEKOFYEARISO": "WEEKISO", 177 "WEEKOFYEAR_ISO": "WEEKISO", 178 "Q": "QUARTER", 179 "QTR": "QUARTER", 180 "QTRS": "QUARTER", 181 "QUARTERS": "QUARTER", 182 "H": "HOUR", 183 "HH": "HOUR", 184 "HR": "HOUR", 185 "HOURS": "HOUR", 186 "HRS": "HOUR", 187 "M": "MINUTE", 188 "MI": "MINUTE", 189 "MIN": "MINUTE", 190 "MINUTES": "MINUTE", 191 "MINS": "MINUTE", 192 "S": "SECOND", 193 "SEC": "SECOND", 194 "SECONDS": "SECOND", 195 "SECS": "SECOND", 196 "MS": "MILLISECOND", 197 "MSEC": "MILLISECOND", 198 "MILLISECONDS": "MILLISECOND", 199 "US": "MICROSECOND", 200 "USEC": "MICROSECOND", 201 "MICROSECONDS": "MICROSECOND", 202 "NS": "NANOSECOND", 203 "NSEC": "NANOSECOND", 204 "NANOSEC": "NANOSECOND", 205 "NSECOND": "NANOSECOND", 206 "NSECONDS": "NANOSECOND", 207 "NANOSECS": "NANOSECOND", 208 "EPOCH": "EPOCH_SECOND", 209 "EPOCH_SECONDS": "EPOCH_SECOND", 210 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 211 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 212 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 213 "TZH": "TIMEZONE_HOUR", 214 "TZM": "TIMEZONE_MINUTE", 215} 216 217 218@t.overload 219def _map_date_part(part: exp.Expression) -> exp.Var: 220 pass 221 222 223@t.overload 224def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 225 pass 226 227 228def _map_date_part(part): 229 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 230 return exp.var(mapped) if mapped else part 231 232 233def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 234 trunc = date_trunc_to_time(args) 235 trunc.set("unit", _map_date_part(trunc.args["unit"])) 236 return trunc 237 238 239def _build_timestamp_from_parts(args: t.List) -> exp.Func: 240 if len(args) == 2: 241 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 242 # so we parse this into Anonymous for now instead of introducing complexity 243 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 244 245 return exp.TimestampFromParts.from_arg_list(args) 246 247 248def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 249 """ 250 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 251 so we need to unqualify them. 252 253 Example: 254 >>> from sqlglot import parse_one 255 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 256 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 257 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 258 """ 259 if isinstance(expression, exp.Pivot) and expression.unpivot: 260 expression = transforms.unqualify_columns(expression) 261 262 return expression 263 264 265def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 266 assert isinstance(expression, exp.Create) 267 268 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 269 if expression.this in exp.DataType.NESTED_TYPES: 270 expression.set("expressions", None) 271 return expression 272 273 props = expression.args.get("properties") 274 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 275 for schema_expression in expression.this.expressions: 276 if isinstance(schema_expression, exp.ColumnDef): 277 column_type = schema_expression.kind 278 if isinstance(column_type, exp.DataType): 279 column_type.transform(_flatten_structured_type, copy=False) 280 281 return expression 282 283 284class Snowflake(Dialect): 285 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 286 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 287 NULL_ORDERING = "nulls_are_large" 288 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 289 SUPPORTS_USER_DEFINED_TYPES = False 290 SUPPORTS_SEMI_ANTI_JOIN = False 291 PREFER_CTE_ALIAS_COLUMN = True 292 TABLESAMPLE_SIZE_IS_PERCENT = True 293 COPY_PARAMS_ARE_CSV = False 294 295 TIME_MAPPING = { 296 "YYYY": "%Y", 297 "yyyy": "%Y", 298 "YY": "%y", 299 "yy": "%y", 300 "MMMM": "%B", 301 "mmmm": "%B", 302 "MON": "%b", 303 "mon": "%b", 304 "MM": "%m", 305 "mm": "%m", 306 "DD": "%d", 307 "dd": "%-d", 308 "DY": "%a", 309 "dy": "%w", 310 "HH24": "%H", 311 "hh24": "%H", 312 "HH12": "%I", 313 "hh12": "%I", 314 "MI": "%M", 315 "mi": "%M", 316 "SS": "%S", 317 "ss": "%S", 318 "FF": "%f", 319 "ff": "%f", 320 "FF6": "%f", 321 "ff6": "%f", 322 } 323 324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify) 335 336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 340 ID_VAR_TOKENS = { 341 *parser.Parser.ID_VAR_TOKENS, 342 TokenType.MATCH_CONDITION, 343 } 344 345 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 346 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 347 348 FUNCTIONS = { 349 **parser.Parser.FUNCTIONS, 350 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 351 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 352 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 353 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 354 this=seq_get(args, 1), expression=seq_get(args, 0) 355 ), 356 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 357 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 358 start=seq_get(args, 0), 359 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 360 step=seq_get(args, 2), 361 ), 362 "BITXOR": binary_from_function(exp.BitwiseXor), 363 "BIT_XOR": binary_from_function(exp.BitwiseXor), 364 "BOOLXOR": binary_from_function(exp.Xor), 365 "CONVERT_TIMEZONE": _build_convert_timezone, 366 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 367 "DATE_TRUNC": _date_trunc_to_time, 368 "DATEADD": _build_date_time_add(exp.DateAdd), 369 "DATEDIFF": _build_datediff, 370 "DIV0": _build_if_from_div0, 371 "FLATTEN": exp.Explode.from_arg_list, 372 "GET_PATH": lambda args, dialect: exp.JSONExtract( 373 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 374 ), 375 "IFF": exp.If.from_arg_list, 376 "LAST_DAY": lambda args: exp.LastDay( 377 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 378 ), 379 "LISTAGG": exp.GroupConcat.from_arg_list, 380 "MEDIAN": lambda args: exp.PercentileCont( 381 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 382 ), 383 "NULLIFZERO": _build_if_from_nullifzero, 384 "OBJECT_CONSTRUCT": _build_object_construct, 385 "REGEXP_REPLACE": _build_regexp_replace, 386 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 387 "RLIKE": exp.RegexpLike.from_arg_list, 388 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 389 "TIMEADD": _build_date_time_add(exp.TimeAdd), 390 "TIMEDIFF": _build_datediff, 391 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 392 "TIMESTAMPDIFF": _build_datediff, 393 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 394 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 395 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 396 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 397 "TO_NUMBER": lambda args: exp.ToNumber( 398 this=seq_get(args, 0), 399 format=seq_get(args, 1), 400 precision=seq_get(args, 2), 401 scale=seq_get(args, 3), 402 ), 403 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 404 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 405 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 406 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 408 "TO_VARCHAR": exp.ToChar.from_arg_list, 409 "ZEROIFNULL": _build_if_from_zeroifnull, 410 } 411 412 FUNCTION_PARSERS = { 413 **parser.Parser.FUNCTION_PARSERS, 414 "DATE_PART": lambda self: self._parse_date_part(), 415 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 416 } 417 FUNCTION_PARSERS.pop("TRIM") 418 419 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 420 421 RANGE_PARSERS = { 422 **parser.Parser.RANGE_PARSERS, 423 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 424 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 425 } 426 427 ALTER_PARSERS = { 428 **parser.Parser.ALTER_PARSERS, 429 "UNSET": lambda self: self.expression( 430 exp.Set, 431 tag=self._match_text_seq("TAG"), 432 expressions=self._parse_csv(self._parse_id_var), 433 unset=True, 434 ), 435 "SWAP": lambda self: self._parse_alter_table_swap(), 436 } 437 438 STATEMENT_PARSERS = { 439 **parser.Parser.STATEMENT_PARSERS, 440 TokenType.SHOW: lambda self: self._parse_show(), 441 } 442 443 PROPERTY_PARSERS = { 444 **parser.Parser.PROPERTY_PARSERS, 445 "LOCATION": lambda self: self._parse_location_property(), 446 } 447 448 TYPE_CONVERTER = { 449 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 450 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 451 } 452 453 SHOW_PARSERS = { 454 "SCHEMAS": _show_parser("SCHEMAS"), 455 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 456 "OBJECTS": _show_parser("OBJECTS"), 457 "TERSE OBJECTS": _show_parser("OBJECTS"), 458 "TABLES": _show_parser("TABLES"), 459 "TERSE TABLES": _show_parser("TABLES"), 460 "VIEWS": _show_parser("VIEWS"), 461 "TERSE VIEWS": _show_parser("VIEWS"), 462 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 463 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 465 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 467 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "SEQUENCES": _show_parser("SEQUENCES"), 469 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 470 "COLUMNS": _show_parser("COLUMNS"), 471 "USERS": _show_parser("USERS"), 472 "TERSE USERS": _show_parser("USERS"), 473 } 474 475 STAGED_FILE_SINGLE_TOKENS = { 476 TokenType.DOT, 477 TokenType.MOD, 478 TokenType.SLASH, 479 } 480 481 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 482 483 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 484 485 def _parse_create(self) -> exp.Create | exp.Command: 486 expression = super()._parse_create() 487 if isinstance(expression, exp.Create) and expression.kind == "TAG": 488 # Replace the Table node with the enclosed Identifier 489 expression.this.replace(expression.this.this) 490 491 return expression 492 493 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 494 this = super()._parse_column_ops(this) 495 496 casts = [] 497 json_path = [] 498 499 while self._match(TokenType.COLON): 500 path = super()._parse_column_ops(self._parse_field(any_token=True)) 501 502 # The cast :: operator has a lower precedence than the extraction operator :, so 503 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 504 while isinstance(path, exp.Cast): 505 casts.append(path.to) 506 path = path.this 507 508 if path: 509 json_path.append(path.sql(dialect="snowflake", copy=False)) 510 511 if json_path: 512 this = self.expression( 513 exp.JSONExtract, 514 this=this, 515 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 516 ) 517 518 while casts: 519 this = self.expression(exp.Cast, this=this, to=casts.pop()) 520 521 return this 522 523 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 524 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 525 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 526 this = self._parse_var() or self._parse_type() 527 528 if not this: 529 return None 530 531 self._match(TokenType.COMMA) 532 expression = self._parse_bitwise() 533 this = _map_date_part(this) 534 name = this.name.upper() 535 536 if name.startswith("EPOCH"): 537 if name == "EPOCH_MILLISECOND": 538 scale = 10**3 539 elif name == "EPOCH_MICROSECOND": 540 scale = 10**6 541 elif name == "EPOCH_NANOSECOND": 542 scale = 10**9 543 else: 544 scale = None 545 546 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 547 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 548 549 if scale: 550 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 551 552 return to_unix 553 554 return self.expression(exp.Extract, this=this, expression=expression) 555 556 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 557 if is_map: 558 # Keys are strings in Snowflake's objects, see also: 559 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 560 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 561 return self._parse_slice(self._parse_string()) 562 563 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 564 565 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 566 lateral = super()._parse_lateral() 567 if not lateral: 568 return lateral 569 570 if isinstance(lateral.this, exp.Explode): 571 table_alias = lateral.args.get("alias") 572 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 573 if table_alias and not table_alias.args.get("columns"): 574 table_alias.set("columns", columns) 575 elif not table_alias: 576 exp.alias_(lateral, "_flattened", table=columns, copy=False) 577 578 return lateral 579 580 def _parse_at_before(self, table: exp.Table) -> exp.Table: 581 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 582 index = self._index 583 if self._match_texts(("AT", "BEFORE")): 584 this = self._prev.text.upper() 585 kind = ( 586 self._match(TokenType.L_PAREN) 587 and self._match_texts(self.HISTORICAL_DATA_KIND) 588 and self._prev.text.upper() 589 ) 590 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 591 592 if expression: 593 self._match_r_paren() 594 when = self.expression( 595 exp.HistoricalData, this=this, kind=kind, expression=expression 596 ) 597 table.set("when", when) 598 else: 599 self._retreat(index) 600 601 return table 602 603 def _parse_table_parts( 604 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 605 ) -> exp.Table: 606 # https://docs.snowflake.com/en/user-guide/querying-stage 607 if self._match(TokenType.STRING, advance=False): 608 table = self._parse_string() 609 elif self._match_text_seq("@", advance=False): 610 table = self._parse_location_path() 611 else: 612 table = None 613 614 if table: 615 file_format = None 616 pattern = None 617 618 wrapped = self._match(TokenType.L_PAREN) 619 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 620 if self._match_text_seq("FILE_FORMAT", "=>"): 621 file_format = self._parse_string() or super()._parse_table_parts( 622 is_db_reference=is_db_reference 623 ) 624 elif self._match_text_seq("PATTERN", "=>"): 625 pattern = self._parse_string() 626 else: 627 break 628 629 self._match(TokenType.COMMA) 630 631 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 632 else: 633 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 634 635 return self._parse_at_before(table) 636 637 def _parse_id_var( 638 self, 639 any_token: bool = True, 640 tokens: t.Optional[t.Collection[TokenType]] = None, 641 ) -> t.Optional[exp.Expression]: 642 if self._match_text_seq("IDENTIFIER", "("): 643 identifier = ( 644 super()._parse_id_var(any_token=any_token, tokens=tokens) 645 or self._parse_string() 646 ) 647 self._match_r_paren() 648 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 649 650 return super()._parse_id_var(any_token=any_token, tokens=tokens) 651 652 def _parse_show_snowflake(self, this: str) -> exp.Show: 653 scope = None 654 scope_kind = None 655 656 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 657 # which is syntactically valid but has no effect on the output 658 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 659 660 history = self._match_text_seq("HISTORY") 661 662 like = self._parse_string() if self._match(TokenType.LIKE) else None 663 664 if self._match(TokenType.IN): 665 if self._match_text_seq("ACCOUNT"): 666 scope_kind = "ACCOUNT" 667 elif self._match_set(self.DB_CREATABLES): 668 scope_kind = self._prev.text.upper() 669 if self._curr: 670 scope = self._parse_table_parts() 671 elif self._curr: 672 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 673 scope = self._parse_table_parts() 674 675 return self.expression( 676 exp.Show, 677 **{ 678 "terse": terse, 679 "this": this, 680 "history": history, 681 "like": like, 682 "scope": scope, 683 "scope_kind": scope_kind, 684 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 685 "limit": self._parse_limit(), 686 "from": self._parse_string() if self._match(TokenType.FROM) else None, 687 }, 688 ) 689 690 def _parse_alter_table_swap(self) -> exp.SwapTable: 691 self._match_text_seq("WITH") 692 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 693 694 def _parse_location_property(self) -> exp.LocationProperty: 695 self._match(TokenType.EQ) 696 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 697 698 def _parse_file_location(self) -> t.Optional[exp.Expression]: 699 # Parse either a subquery or a staged file 700 return ( 701 self._parse_select(table=True) 702 if self._match(TokenType.L_PAREN, advance=False) 703 else self._parse_table_parts() 704 ) 705 706 def _parse_location_path(self) -> exp.Var: 707 parts = [self._advance_any(ignore_reserved=True)] 708 709 # We avoid consuming a comma token because external tables like @foo and @bar 710 # can be joined in a query with a comma separator, as well as closing paren 711 # in case of subqueries 712 while self._is_connected() and not self._match_set( 713 (TokenType.COMMA, TokenType.R_PAREN), advance=False 714 ): 715 parts.append(self._advance_any(ignore_reserved=True)) 716 717 return exp.var("".join(part.text for part in parts if part)) 718 719 class Tokenizer(tokens.Tokenizer): 720 STRING_ESCAPES = ["\\", "'"] 721 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 722 RAW_STRINGS = ["$$"] 723 COMMENTS = ["--", "//", ("/*", "*/")] 724 725 KEYWORDS = { 726 **tokens.Tokenizer.KEYWORDS, 727 "BYTEINT": TokenType.INT, 728 "CHAR VARYING": TokenType.VARCHAR, 729 "CHARACTER VARYING": TokenType.VARCHAR, 730 "EXCLUDE": TokenType.EXCEPT, 731 "ILIKE ANY": TokenType.ILIKE_ANY, 732 "LIKE ANY": TokenType.LIKE_ANY, 733 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 734 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 735 "MINUS": TokenType.EXCEPT, 736 "NCHAR VARYING": TokenType.VARCHAR, 737 "PUT": TokenType.COMMAND, 738 "REMOVE": TokenType.COMMAND, 739 "RM": TokenType.COMMAND, 740 "SAMPLE": TokenType.TABLE_SAMPLE, 741 "SQL_DOUBLE": TokenType.DOUBLE, 742 "SQL_VARCHAR": TokenType.VARCHAR, 743 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 744 "TAG": TokenType.TAG, 745 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 746 "TOP": TokenType.TOP, 747 } 748 749 SINGLE_TOKENS = { 750 **tokens.Tokenizer.SINGLE_TOKENS, 751 "$": TokenType.PARAMETER, 752 } 753 754 VAR_SINGLE_TOKENS = {"$"} 755 756 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 757 758 class Generator(generator.Generator): 759 PARAMETER_TOKEN = "$" 760 MATCHED_BY_SOURCE = False 761 SINGLE_STRING_INTERVAL = True 762 JOIN_HINTS = False 763 TABLE_HINTS = False 764 QUERY_HINTS = False 765 AGGREGATE_FILTER_SUPPORTED = False 766 SUPPORTS_TABLE_COPY = False 767 COLLATE_IS_FUNC = True 768 LIMIT_ONLY_LITERALS = True 769 JSON_KEY_VALUE_PAIR_SEP = "," 770 INSERT_OVERWRITE = " OVERWRITE INTO" 771 STRUCT_DELIMITER = ("(", ")") 772 COPY_PARAMS_ARE_WRAPPED = False 773 COPY_PARAMS_EQ_REQUIRED = True 774 STAR_EXCEPT = "EXCLUDE" 775 776 TRANSFORMS = { 777 **generator.Generator.TRANSFORMS, 778 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 779 exp.ArgMax: rename_func("MAX_BY"), 780 exp.ArgMin: rename_func("MIN_BY"), 781 exp.Array: inline_array_sql, 782 exp.ArrayConcat: rename_func("ARRAY_CAT"), 783 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 784 exp.AtTimeZone: lambda self, e: self.func( 785 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 786 ), 787 exp.BitwiseXor: rename_func("BITXOR"), 788 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 789 exp.DateAdd: date_delta_sql("DATEADD"), 790 exp.DateDiff: date_delta_sql("DATEDIFF"), 791 exp.DateStrToDate: datestrtodate_sql, 792 exp.DayOfMonth: rename_func("DAYOFMONTH"), 793 exp.DayOfWeek: rename_func("DAYOFWEEK"), 794 exp.DayOfYear: rename_func("DAYOFYEAR"), 795 exp.Explode: rename_func("FLATTEN"), 796 exp.Extract: rename_func("DATE_PART"), 797 exp.FromTimeZone: lambda self, e: self.func( 798 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 799 ), 800 exp.GenerateSeries: lambda self, e: self.func( 801 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 802 ), 803 exp.GroupConcat: rename_func("LISTAGG"), 804 exp.If: if_sql(name="IFF", false_value="NULL"), 805 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 806 exp.JSONExtractScalar: lambda self, e: self.func( 807 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 808 ), 809 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 810 exp.JSONPathRoot: lambda *_: "", 811 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 812 exp.LogicalOr: rename_func("BOOLOR_AGG"), 813 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 814 exp.Max: max_or_greatest, 815 exp.Min: min_or_least, 816 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 817 exp.PercentileCont: transforms.preprocess( 818 [transforms.add_within_group_for_percentiles] 819 ), 820 exp.PercentileDisc: transforms.preprocess( 821 [transforms.add_within_group_for_percentiles] 822 ), 823 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 824 exp.RegexpILike: _regexpilike_sql, 825 exp.Rand: rename_func("RANDOM"), 826 exp.Select: transforms.preprocess( 827 [ 828 transforms.eliminate_distinct_on, 829 transforms.explode_to_unnest(), 830 transforms.eliminate_semi_and_anti_joins, 831 ] 832 ), 833 exp.SHA: rename_func("SHA1"), 834 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 835 exp.StartsWith: rename_func("STARTSWITH"), 836 exp.StrPosition: lambda self, e: self.func( 837 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 838 ), 839 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 840 exp.Stuff: rename_func("INSERT"), 841 exp.TimeAdd: date_delta_sql("TIMEADD"), 842 exp.TimestampDiff: lambda self, e: self.func( 843 "TIMESTAMPDIFF", e.unit, e.expression, e.this 844 ), 845 exp.TimestampTrunc: timestamptrunc_sql(), 846 exp.TimeStrToTime: timestrtotime_sql, 847 exp.TimeToStr: lambda self, e: self.func( 848 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 849 ), 850 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 851 exp.ToArray: rename_func("TO_ARRAY"), 852 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 853 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 854 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 855 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 856 exp.TsOrDsToDate: lambda self, e: self.func( 857 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 858 ), 859 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 860 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 861 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 862 exp.Xor: rename_func("BOOLXOR"), 863 } 864 865 SUPPORTED_JSON_PATH_PARTS = { 866 exp.JSONPathKey, 867 exp.JSONPathRoot, 868 exp.JSONPathSubscript, 869 } 870 871 TYPE_MAPPING = { 872 **generator.Generator.TYPE_MAPPING, 873 exp.DataType.Type.NESTED: "OBJECT", 874 exp.DataType.Type.STRUCT: "OBJECT", 875 } 876 877 PROPERTIES_LOCATION = { 878 **generator.Generator.PROPERTIES_LOCATION, 879 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 880 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 881 } 882 883 UNSUPPORTED_VALUES_EXPRESSIONS = { 884 exp.Map, 885 exp.StarMap, 886 exp.Struct, 887 exp.VarMap, 888 } 889 890 def with_properties(self, properties: exp.Properties) -> str: 891 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 892 893 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 894 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 895 values_as_table = False 896 897 return super().values_sql(expression, values_as_table=values_as_table) 898 899 def datatype_sql(self, expression: exp.DataType) -> str: 900 expressions = expression.expressions 901 if ( 902 expressions 903 and expression.is_type(*exp.DataType.STRUCT_TYPES) 904 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 905 ): 906 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 907 return "OBJECT" 908 909 return super().datatype_sql(expression) 910 911 def tonumber_sql(self, expression: exp.ToNumber) -> str: 912 return self.func( 913 "TO_NUMBER", 914 expression.this, 915 expression.args.get("format"), 916 expression.args.get("precision"), 917 expression.args.get("scale"), 918 ) 919 920 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 921 milli = expression.args.get("milli") 922 if milli is not None: 923 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 924 expression.set("nano", milli_to_nano) 925 926 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 927 928 def trycast_sql(self, expression: exp.TryCast) -> str: 929 value = expression.this 930 931 if value.type is None: 932 from sqlglot.optimizer.annotate_types import annotate_types 933 934 value = annotate_types(value) 935 936 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 937 return super().trycast_sql(expression) 938 939 # TRY_CAST only works for string values in Snowflake 940 return self.cast_sql(expression) 941 942 def log_sql(self, expression: exp.Log) -> str: 943 if not expression.expression: 944 return self.func("LN", expression.this) 945 946 return super().log_sql(expression) 947 948 def unnest_sql(self, expression: exp.Unnest) -> str: 949 unnest_alias = expression.args.get("alias") 950 offset = expression.args.get("offset") 951 952 columns = [ 953 exp.to_identifier("seq"), 954 exp.to_identifier("key"), 955 exp.to_identifier("path"), 956 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 957 seq_get(unnest_alias.columns if unnest_alias else [], 0) 958 or exp.to_identifier("value"), 959 exp.to_identifier("this"), 960 ] 961 962 if unnest_alias: 963 unnest_alias.set("columns", columns) 964 else: 965 unnest_alias = exp.TableAlias(this="_u", columns=columns) 966 967 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 968 alias = self.sql(unnest_alias) 969 alias = f" AS {alias}" if alias else "" 970 return f"{explode}{alias}" 971 972 def show_sql(self, expression: exp.Show) -> str: 973 terse = "TERSE " if expression.args.get("terse") else "" 974 history = " HISTORY" if expression.args.get("history") else "" 975 like = self.sql(expression, "like") 976 like = f" LIKE {like}" if like else "" 977 978 scope = self.sql(expression, "scope") 979 scope = f" {scope}" if scope else "" 980 981 scope_kind = self.sql(expression, "scope_kind") 982 if scope_kind: 983 scope_kind = f" IN {scope_kind}" 984 985 starts_with = self.sql(expression, "starts_with") 986 if starts_with: 987 starts_with = f" STARTS WITH {starts_with}" 988 989 limit = self.sql(expression, "limit") 990 991 from_ = self.sql(expression, "from") 992 if from_: 993 from_ = f" FROM {from_}" 994 995 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 996 997 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 998 # Other dialects don't support all of the following parameters, so we need to 999 # generate default values as necessary to ensure the transpilation is correct 1000 group = expression.args.get("group") 1001 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1002 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1003 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1004 1005 return self.func( 1006 "REGEXP_SUBSTR", 1007 expression.this, 1008 expression.expression, 1009 position, 1010 occurrence, 1011 parameters, 1012 group, 1013 ) 1014 1015 def except_op(self, expression: exp.Except) -> str: 1016 if not expression.args.get("distinct"): 1017 self.unsupported("EXCEPT with All is not supported in Snowflake") 1018 return super().except_op(expression) 1019 1020 def intersect_op(self, expression: exp.Intersect) -> str: 1021 if not expression.args.get("distinct"): 1022 self.unsupported("INTERSECT with All is not supported in Snowflake") 1023 return super().intersect_op(expression) 1024 1025 def describe_sql(self, expression: exp.Describe) -> str: 1026 # Default to table if kind is unknown 1027 kind_value = expression.args.get("kind") or "TABLE" 1028 kind = f" {kind_value}" if kind_value else "" 1029 this = f" {self.sql(expression, 'this')}" 1030 expressions = self.expressions(expression, flat=True) 1031 expressions = f" {expressions}" if expressions else "" 1032 return f"DESCRIBE{kind}{this}{expressions}" 1033 1034 def generatedasidentitycolumnconstraint_sql( 1035 self, expression: exp.GeneratedAsIdentityColumnConstraint 1036 ) -> str: 1037 start = expression.args.get("start") 1038 start = f" START {start}" if start else "" 1039 increment = expression.args.get("increment") 1040 increment = f" INCREMENT {increment}" if increment else "" 1041 return f"AUTOINCREMENT{start}{increment}" 1042 1043 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1044 this = self.sql(expression, "this") 1045 return f"SWAP WITH {this}" 1046 1047 def cluster_sql(self, expression: exp.Cluster) -> str: 1048 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1049 1050 def struct_sql(self, expression: exp.Struct) -> str: 1051 keys = [] 1052 values = [] 1053 1054 for i, e in enumerate(expression.expressions): 1055 if isinstance(e, exp.PropertyEQ): 1056 keys.append( 1057 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1058 ) 1059 values.append(e.expression) 1060 else: 1061 keys.append(exp.Literal.string(f"_{i}")) 1062 values.append(e) 1063 1064 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1065 1066 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1067 if expression.args.get("weight") or expression.args.get("accuracy"): 1068 self.unsupported( 1069 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1070 ) 1071 1072 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1073 1074 def alterset_sql(self, expression: exp.AlterSet) -> str: 1075 exprs = self.expressions(expression, flat=True) 1076 exprs = f" {exprs}" if exprs else "" 1077 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1078 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1079 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1080 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1081 tag = self.expressions(expression, key="tag", flat=True) 1082 tag = f" TAG {tag}" if tag else "" 1083 1084 return f"SET{exprs}{file_format}{copy_options}{tag}"
285class Snowflake(Dialect): 286 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 287 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 288 NULL_ORDERING = "nulls_are_large" 289 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 290 SUPPORTS_USER_DEFINED_TYPES = False 291 SUPPORTS_SEMI_ANTI_JOIN = False 292 PREFER_CTE_ALIAS_COLUMN = True 293 TABLESAMPLE_SIZE_IS_PERCENT = True 294 COPY_PARAMS_ARE_CSV = False 295 296 TIME_MAPPING = { 297 "YYYY": "%Y", 298 "yyyy": "%Y", 299 "YY": "%y", 300 "yy": "%y", 301 "MMMM": "%B", 302 "mmmm": "%B", 303 "MON": "%b", 304 "mon": "%b", 305 "MM": "%m", 306 "mm": "%m", 307 "DD": "%d", 308 "dd": "%-d", 309 "DY": "%a", 310 "dy": "%w", 311 "HH24": "%H", 312 "hh24": "%H", 313 "HH12": "%I", 314 "hh12": "%I", 315 "MI": "%M", 316 "mi": "%M", 317 "SS": "%S", 318 "ss": "%S", 319 "FF": "%f", 320 "ff": "%f", 321 "FF6": "%f", 322 "ff6": "%f", 323 } 324 325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify) 336 337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 TYPE_CONVERTER = { 450 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 451 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 452 } 453 454 SHOW_PARSERS = { 455 "SCHEMAS": _show_parser("SCHEMAS"), 456 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 457 "OBJECTS": _show_parser("OBJECTS"), 458 "TERSE OBJECTS": _show_parser("OBJECTS"), 459 "TABLES": _show_parser("TABLES"), 460 "TERSE TABLES": _show_parser("TABLES"), 461 "VIEWS": _show_parser("VIEWS"), 462 "TERSE VIEWS": _show_parser("VIEWS"), 463 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "SEQUENCES": _show_parser("SEQUENCES"), 470 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 471 "COLUMNS": _show_parser("COLUMNS"), 472 "USERS": _show_parser("USERS"), 473 "TERSE USERS": _show_parser("USERS"), 474 } 475 476 STAGED_FILE_SINGLE_TOKENS = { 477 TokenType.DOT, 478 TokenType.MOD, 479 TokenType.SLASH, 480 } 481 482 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 483 484 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 485 486 def _parse_create(self) -> exp.Create | exp.Command: 487 expression = super()._parse_create() 488 if isinstance(expression, exp.Create) and expression.kind == "TAG": 489 # Replace the Table node with the enclosed Identifier 490 expression.this.replace(expression.this.this) 491 492 return expression 493 494 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 495 this = super()._parse_column_ops(this) 496 497 casts = [] 498 json_path = [] 499 500 while self._match(TokenType.COLON): 501 path = super()._parse_column_ops(self._parse_field(any_token=True)) 502 503 # The cast :: operator has a lower precedence than the extraction operator :, so 504 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 505 while isinstance(path, exp.Cast): 506 casts.append(path.to) 507 path = path.this 508 509 if path: 510 json_path.append(path.sql(dialect="snowflake", copy=False)) 511 512 if json_path: 513 this = self.expression( 514 exp.JSONExtract, 515 this=this, 516 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 517 ) 518 519 while casts: 520 this = self.expression(exp.Cast, this=this, to=casts.pop()) 521 522 return this 523 524 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 525 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 526 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 527 this = self._parse_var() or self._parse_type() 528 529 if not this: 530 return None 531 532 self._match(TokenType.COMMA) 533 expression = self._parse_bitwise() 534 this = _map_date_part(this) 535 name = this.name.upper() 536 537 if name.startswith("EPOCH"): 538 if name == "EPOCH_MILLISECOND": 539 scale = 10**3 540 elif name == "EPOCH_MICROSECOND": 541 scale = 10**6 542 elif name == "EPOCH_NANOSECOND": 543 scale = 10**9 544 else: 545 scale = None 546 547 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 548 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 549 550 if scale: 551 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 552 553 return to_unix 554 555 return self.expression(exp.Extract, this=this, expression=expression) 556 557 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 558 if is_map: 559 # Keys are strings in Snowflake's objects, see also: 560 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 561 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 562 return self._parse_slice(self._parse_string()) 563 564 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 565 566 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 567 lateral = super()._parse_lateral() 568 if not lateral: 569 return lateral 570 571 if isinstance(lateral.this, exp.Explode): 572 table_alias = lateral.args.get("alias") 573 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 574 if table_alias and not table_alias.args.get("columns"): 575 table_alias.set("columns", columns) 576 elif not table_alias: 577 exp.alias_(lateral, "_flattened", table=columns, copy=False) 578 579 return lateral 580 581 def _parse_at_before(self, table: exp.Table) -> exp.Table: 582 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 583 index = self._index 584 if self._match_texts(("AT", "BEFORE")): 585 this = self._prev.text.upper() 586 kind = ( 587 self._match(TokenType.L_PAREN) 588 and self._match_texts(self.HISTORICAL_DATA_KIND) 589 and self._prev.text.upper() 590 ) 591 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 592 593 if expression: 594 self._match_r_paren() 595 when = self.expression( 596 exp.HistoricalData, this=this, kind=kind, expression=expression 597 ) 598 table.set("when", when) 599 else: 600 self._retreat(index) 601 602 return table 603 604 def _parse_table_parts( 605 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 606 ) -> exp.Table: 607 # https://docs.snowflake.com/en/user-guide/querying-stage 608 if self._match(TokenType.STRING, advance=False): 609 table = self._parse_string() 610 elif self._match_text_seq("@", advance=False): 611 table = self._parse_location_path() 612 else: 613 table = None 614 615 if table: 616 file_format = None 617 pattern = None 618 619 wrapped = self._match(TokenType.L_PAREN) 620 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 621 if self._match_text_seq("FILE_FORMAT", "=>"): 622 file_format = self._parse_string() or super()._parse_table_parts( 623 is_db_reference=is_db_reference 624 ) 625 elif self._match_text_seq("PATTERN", "=>"): 626 pattern = self._parse_string() 627 else: 628 break 629 630 self._match(TokenType.COMMA) 631 632 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 633 else: 634 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 635 636 return self._parse_at_before(table) 637 638 def _parse_id_var( 639 self, 640 any_token: bool = True, 641 tokens: t.Optional[t.Collection[TokenType]] = None, 642 ) -> t.Optional[exp.Expression]: 643 if self._match_text_seq("IDENTIFIER", "("): 644 identifier = ( 645 super()._parse_id_var(any_token=any_token, tokens=tokens) 646 or self._parse_string() 647 ) 648 self._match_r_paren() 649 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 650 651 return super()._parse_id_var(any_token=any_token, tokens=tokens) 652 653 def _parse_show_snowflake(self, this: str) -> exp.Show: 654 scope = None 655 scope_kind = None 656 657 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 658 # which is syntactically valid but has no effect on the output 659 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 660 661 history = self._match_text_seq("HISTORY") 662 663 like = self._parse_string() if self._match(TokenType.LIKE) else None 664 665 if self._match(TokenType.IN): 666 if self._match_text_seq("ACCOUNT"): 667 scope_kind = "ACCOUNT" 668 elif self._match_set(self.DB_CREATABLES): 669 scope_kind = self._prev.text.upper() 670 if self._curr: 671 scope = self._parse_table_parts() 672 elif self._curr: 673 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 674 scope = self._parse_table_parts() 675 676 return self.expression( 677 exp.Show, 678 **{ 679 "terse": terse, 680 "this": this, 681 "history": history, 682 "like": like, 683 "scope": scope, 684 "scope_kind": scope_kind, 685 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 686 "limit": self._parse_limit(), 687 "from": self._parse_string() if self._match(TokenType.FROM) else None, 688 }, 689 ) 690 691 def _parse_alter_table_swap(self) -> exp.SwapTable: 692 self._match_text_seq("WITH") 693 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 694 695 def _parse_location_property(self) -> exp.LocationProperty: 696 self._match(TokenType.EQ) 697 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 698 699 def _parse_file_location(self) -> t.Optional[exp.Expression]: 700 # Parse either a subquery or a staged file 701 return ( 702 self._parse_select(table=True) 703 if self._match(TokenType.L_PAREN, advance=False) 704 else self._parse_table_parts() 705 ) 706 707 def _parse_location_path(self) -> exp.Var: 708 parts = [self._advance_any(ignore_reserved=True)] 709 710 # We avoid consuming a comma token because external tables like @foo and @bar 711 # can be joined in a query with a comma separator, as well as closing paren 712 # in case of subqueries 713 while self._is_connected() and not self._match_set( 714 (TokenType.COMMA, TokenType.R_PAREN), advance=False 715 ): 716 parts.append(self._advance_any(ignore_reserved=True)) 717 718 return exp.var("".join(part.text for part in parts if part)) 719 720 class Tokenizer(tokens.Tokenizer): 721 STRING_ESCAPES = ["\\", "'"] 722 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 723 RAW_STRINGS = ["$$"] 724 COMMENTS = ["--", "//", ("/*", "*/")] 725 726 KEYWORDS = { 727 **tokens.Tokenizer.KEYWORDS, 728 "BYTEINT": TokenType.INT, 729 "CHAR VARYING": TokenType.VARCHAR, 730 "CHARACTER VARYING": TokenType.VARCHAR, 731 "EXCLUDE": TokenType.EXCEPT, 732 "ILIKE ANY": TokenType.ILIKE_ANY, 733 "LIKE ANY": TokenType.LIKE_ANY, 734 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 735 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 736 "MINUS": TokenType.EXCEPT, 737 "NCHAR VARYING": TokenType.VARCHAR, 738 "PUT": TokenType.COMMAND, 739 "REMOVE": TokenType.COMMAND, 740 "RM": TokenType.COMMAND, 741 "SAMPLE": TokenType.TABLE_SAMPLE, 742 "SQL_DOUBLE": TokenType.DOUBLE, 743 "SQL_VARCHAR": TokenType.VARCHAR, 744 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 745 "TAG": TokenType.TAG, 746 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 747 "TOP": TokenType.TOP, 748 } 749 750 SINGLE_TOKENS = { 751 **tokens.Tokenizer.SINGLE_TOKENS, 752 "$": TokenType.PARAMETER, 753 } 754 755 VAR_SINGLE_TOKENS = {"$"} 756 757 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 758 759 class Generator(generator.Generator): 760 PARAMETER_TOKEN = "$" 761 MATCHED_BY_SOURCE = False 762 SINGLE_STRING_INTERVAL = True 763 JOIN_HINTS = False 764 TABLE_HINTS = False 765 QUERY_HINTS = False 766 AGGREGATE_FILTER_SUPPORTED = False 767 SUPPORTS_TABLE_COPY = False 768 COLLATE_IS_FUNC = True 769 LIMIT_ONLY_LITERALS = True 770 JSON_KEY_VALUE_PAIR_SEP = "," 771 INSERT_OVERWRITE = " OVERWRITE INTO" 772 STRUCT_DELIMITER = ("(", ")") 773 COPY_PARAMS_ARE_WRAPPED = False 774 COPY_PARAMS_EQ_REQUIRED = True 775 STAR_EXCEPT = "EXCLUDE" 776 777 TRANSFORMS = { 778 **generator.Generator.TRANSFORMS, 779 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 780 exp.ArgMax: rename_func("MAX_BY"), 781 exp.ArgMin: rename_func("MIN_BY"), 782 exp.Array: inline_array_sql, 783 exp.ArrayConcat: rename_func("ARRAY_CAT"), 784 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 785 exp.AtTimeZone: lambda self, e: self.func( 786 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 787 ), 788 exp.BitwiseXor: rename_func("BITXOR"), 789 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 790 exp.DateAdd: date_delta_sql("DATEADD"), 791 exp.DateDiff: date_delta_sql("DATEDIFF"), 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 807 exp.JSONExtractScalar: lambda self, e: self.func( 808 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 809 ), 810 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 811 exp.JSONPathRoot: lambda *_: "", 812 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 813 exp.LogicalOr: rename_func("BOOLOR_AGG"), 814 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 815 exp.Max: max_or_greatest, 816 exp.Min: min_or_least, 817 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 818 exp.PercentileCont: transforms.preprocess( 819 [transforms.add_within_group_for_percentiles] 820 ), 821 exp.PercentileDisc: transforms.preprocess( 822 [transforms.add_within_group_for_percentiles] 823 ), 824 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 825 exp.RegexpILike: _regexpilike_sql, 826 exp.Rand: rename_func("RANDOM"), 827 exp.Select: transforms.preprocess( 828 [ 829 transforms.eliminate_distinct_on, 830 transforms.explode_to_unnest(), 831 transforms.eliminate_semi_and_anti_joins, 832 ] 833 ), 834 exp.SHA: rename_func("SHA1"), 835 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 836 exp.StartsWith: rename_func("STARTSWITH"), 837 exp.StrPosition: lambda self, e: self.func( 838 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 839 ), 840 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 841 exp.Stuff: rename_func("INSERT"), 842 exp.TimeAdd: date_delta_sql("TIMEADD"), 843 exp.TimestampDiff: lambda self, e: self.func( 844 "TIMESTAMPDIFF", e.unit, e.expression, e.this 845 ), 846 exp.TimestampTrunc: timestamptrunc_sql(), 847 exp.TimeStrToTime: timestrtotime_sql, 848 exp.TimeToStr: lambda self, e: self.func( 849 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 850 ), 851 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 852 exp.ToArray: rename_func("TO_ARRAY"), 853 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 854 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 855 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 856 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 857 exp.TsOrDsToDate: lambda self, e: self.func( 858 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 859 ), 860 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 861 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 862 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 863 exp.Xor: rename_func("BOOLXOR"), 864 } 865 866 SUPPORTED_JSON_PATH_PARTS = { 867 exp.JSONPathKey, 868 exp.JSONPathRoot, 869 exp.JSONPathSubscript, 870 } 871 872 TYPE_MAPPING = { 873 **generator.Generator.TYPE_MAPPING, 874 exp.DataType.Type.NESTED: "OBJECT", 875 exp.DataType.Type.STRUCT: "OBJECT", 876 } 877 878 PROPERTIES_LOCATION = { 879 **generator.Generator.PROPERTIES_LOCATION, 880 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 881 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 882 } 883 884 UNSUPPORTED_VALUES_EXPRESSIONS = { 885 exp.Map, 886 exp.StarMap, 887 exp.Struct, 888 exp.VarMap, 889 } 890 891 def with_properties(self, properties: exp.Properties) -> str: 892 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 893 894 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 895 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 896 values_as_table = False 897 898 return super().values_sql(expression, values_as_table=values_as_table) 899 900 def datatype_sql(self, expression: exp.DataType) -> str: 901 expressions = expression.expressions 902 if ( 903 expressions 904 and expression.is_type(*exp.DataType.STRUCT_TYPES) 905 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 906 ): 907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 908 return "OBJECT" 909 910 return super().datatype_sql(expression) 911 912 def tonumber_sql(self, expression: exp.ToNumber) -> str: 913 return self.func( 914 "TO_NUMBER", 915 expression.this, 916 expression.args.get("format"), 917 expression.args.get("precision"), 918 expression.args.get("scale"), 919 ) 920 921 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 922 milli = expression.args.get("milli") 923 if milli is not None: 924 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 925 expression.set("nano", milli_to_nano) 926 927 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 928 929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 value = expression.this 931 932 if value.type is None: 933 from sqlglot.optimizer.annotate_types import annotate_types 934 935 value = annotate_types(value) 936 937 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 938 return super().trycast_sql(expression) 939 940 # TRY_CAST only works for string values in Snowflake 941 return self.cast_sql(expression) 942 943 def log_sql(self, expression: exp.Log) -> str: 944 if not expression.expression: 945 return self.func("LN", expression.this) 946 947 return super().log_sql(expression) 948 949 def unnest_sql(self, expression: exp.Unnest) -> str: 950 unnest_alias = expression.args.get("alias") 951 offset = expression.args.get("offset") 952 953 columns = [ 954 exp.to_identifier("seq"), 955 exp.to_identifier("key"), 956 exp.to_identifier("path"), 957 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 958 seq_get(unnest_alias.columns if unnest_alias else [], 0) 959 or exp.to_identifier("value"), 960 exp.to_identifier("this"), 961 ] 962 963 if unnest_alias: 964 unnest_alias.set("columns", columns) 965 else: 966 unnest_alias = exp.TableAlias(this="_u", columns=columns) 967 968 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 969 alias = self.sql(unnest_alias) 970 alias = f" AS {alias}" if alias else "" 971 return f"{explode}{alias}" 972 973 def show_sql(self, expression: exp.Show) -> str: 974 terse = "TERSE " if expression.args.get("terse") else "" 975 history = " HISTORY" if expression.args.get("history") else "" 976 like = self.sql(expression, "like") 977 like = f" LIKE {like}" if like else "" 978 979 scope = self.sql(expression, "scope") 980 scope = f" {scope}" if scope else "" 981 982 scope_kind = self.sql(expression, "scope_kind") 983 if scope_kind: 984 scope_kind = f" IN {scope_kind}" 985 986 starts_with = self.sql(expression, "starts_with") 987 if starts_with: 988 starts_with = f" STARTS WITH {starts_with}" 989 990 limit = self.sql(expression, "limit") 991 992 from_ = self.sql(expression, "from") 993 if from_: 994 from_ = f" FROM {from_}" 995 996 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 997 998 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 999 # Other dialects don't support all of the following parameters, so we need to 1000 # generate default values as necessary to ensure the transpilation is correct 1001 group = expression.args.get("group") 1002 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1003 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1004 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1005 1006 return self.func( 1007 "REGEXP_SUBSTR", 1008 expression.this, 1009 expression.expression, 1010 position, 1011 occurrence, 1012 parameters, 1013 group, 1014 ) 1015 1016 def except_op(self, expression: exp.Except) -> str: 1017 if not expression.args.get("distinct"): 1018 self.unsupported("EXCEPT with All is not supported in Snowflake") 1019 return super().except_op(expression) 1020 1021 def intersect_op(self, expression: exp.Intersect) -> str: 1022 if not expression.args.get("distinct"): 1023 self.unsupported("INTERSECT with All is not supported in Snowflake") 1024 return super().intersect_op(expression) 1025 1026 def describe_sql(self, expression: exp.Describe) -> str: 1027 # Default to table if kind is unknown 1028 kind_value = expression.args.get("kind") or "TABLE" 1029 kind = f" {kind_value}" if kind_value else "" 1030 this = f" {self.sql(expression, 'this')}" 1031 expressions = self.expressions(expression, flat=True) 1032 expressions = f" {expressions}" if expressions else "" 1033 return f"DESCRIBE{kind}{this}{expressions}" 1034 1035 def generatedasidentitycolumnconstraint_sql( 1036 self, expression: exp.GeneratedAsIdentityColumnConstraint 1037 ) -> str: 1038 start = expression.args.get("start") 1039 start = f" START {start}" if start else "" 1040 increment = expression.args.get("increment") 1041 increment = f" INCREMENT {increment}" if increment else "" 1042 return f"AUTOINCREMENT{start}{increment}" 1043 1044 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1045 this = self.sql(expression, "this") 1046 return f"SWAP WITH {this}" 1047 1048 def cluster_sql(self, expression: exp.Cluster) -> str: 1049 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1050 1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1066 1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1074 1075 def alterset_sql(self, expression: exp.AlterSet) -> str: 1076 exprs = self.expressions(expression, flat=True) 1077 exprs = f" {exprs}" if exprs else "" 1078 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1079 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1080 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1081 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1082 tag = self.expressions(expression, key="tag", flat=True) 1083 tag = f" TAG {tag}" if tag else "" 1084 1085 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 TYPE_CONVERTER = { 450 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 451 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 452 } 453 454 SHOW_PARSERS = { 455 "SCHEMAS": _show_parser("SCHEMAS"), 456 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 457 "OBJECTS": _show_parser("OBJECTS"), 458 "TERSE OBJECTS": _show_parser("OBJECTS"), 459 "TABLES": _show_parser("TABLES"), 460 "TERSE TABLES": _show_parser("TABLES"), 461 "VIEWS": _show_parser("VIEWS"), 462 "TERSE VIEWS": _show_parser("VIEWS"), 463 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "SEQUENCES": _show_parser("SEQUENCES"), 470 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 471 "COLUMNS": _show_parser("COLUMNS"), 472 "USERS": _show_parser("USERS"), 473 "TERSE USERS": _show_parser("USERS"), 474 } 475 476 STAGED_FILE_SINGLE_TOKENS = { 477 TokenType.DOT, 478 TokenType.MOD, 479 TokenType.SLASH, 480 } 481 482 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 483 484 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 485 486 def _parse_create(self) -> exp.Create | exp.Command: 487 expression = super()._parse_create() 488 if isinstance(expression, exp.Create) and expression.kind == "TAG": 489 # Replace the Table node with the enclosed Identifier 490 expression.this.replace(expression.this.this) 491 492 return expression 493 494 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 495 this = super()._parse_column_ops(this) 496 497 casts = [] 498 json_path = [] 499 500 while self._match(TokenType.COLON): 501 path = super()._parse_column_ops(self._parse_field(any_token=True)) 502 503 # The cast :: operator has a lower precedence than the extraction operator :, so 504 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 505 while isinstance(path, exp.Cast): 506 casts.append(path.to) 507 path = path.this 508 509 if path: 510 json_path.append(path.sql(dialect="snowflake", copy=False)) 511 512 if json_path: 513 this = self.expression( 514 exp.JSONExtract, 515 this=this, 516 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 517 ) 518 519 while casts: 520 this = self.expression(exp.Cast, this=this, to=casts.pop()) 521 522 return this 523 524 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 525 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 526 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 527 this = self._parse_var() or self._parse_type() 528 529 if not this: 530 return None 531 532 self._match(TokenType.COMMA) 533 expression = self._parse_bitwise() 534 this = _map_date_part(this) 535 name = this.name.upper() 536 537 if name.startswith("EPOCH"): 538 if name == "EPOCH_MILLISECOND": 539 scale = 10**3 540 elif name == "EPOCH_MICROSECOND": 541 scale = 10**6 542 elif name == "EPOCH_NANOSECOND": 543 scale = 10**9 544 else: 545 scale = None 546 547 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 548 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 549 550 if scale: 551 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 552 553 return to_unix 554 555 return self.expression(exp.Extract, this=this, expression=expression) 556 557 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 558 if is_map: 559 # Keys are strings in Snowflake's objects, see also: 560 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 561 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 562 return self._parse_slice(self._parse_string()) 563 564 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 565 566 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 567 lateral = super()._parse_lateral() 568 if not lateral: 569 return lateral 570 571 if isinstance(lateral.this, exp.Explode): 572 table_alias = lateral.args.get("alias") 573 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 574 if table_alias and not table_alias.args.get("columns"): 575 table_alias.set("columns", columns) 576 elif not table_alias: 577 exp.alias_(lateral, "_flattened", table=columns, copy=False) 578 579 return lateral 580 581 def _parse_at_before(self, table: exp.Table) -> exp.Table: 582 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 583 index = self._index 584 if self._match_texts(("AT", "BEFORE")): 585 this = self._prev.text.upper() 586 kind = ( 587 self._match(TokenType.L_PAREN) 588 and self._match_texts(self.HISTORICAL_DATA_KIND) 589 and self._prev.text.upper() 590 ) 591 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 592 593 if expression: 594 self._match_r_paren() 595 when = self.expression( 596 exp.HistoricalData, this=this, kind=kind, expression=expression 597 ) 598 table.set("when", when) 599 else: 600 self._retreat(index) 601 602 return table 603 604 def _parse_table_parts( 605 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 606 ) -> exp.Table: 607 # https://docs.snowflake.com/en/user-guide/querying-stage 608 if self._match(TokenType.STRING, advance=False): 609 table = self._parse_string() 610 elif self._match_text_seq("@", advance=False): 611 table = self._parse_location_path() 612 else: 613 table = None 614 615 if table: 616 file_format = None 617 pattern = None 618 619 wrapped = self._match(TokenType.L_PAREN) 620 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 621 if self._match_text_seq("FILE_FORMAT", "=>"): 622 file_format = self._parse_string() or super()._parse_table_parts( 623 is_db_reference=is_db_reference 624 ) 625 elif self._match_text_seq("PATTERN", "=>"): 626 pattern = self._parse_string() 627 else: 628 break 629 630 self._match(TokenType.COMMA) 631 632 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 633 else: 634 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 635 636 return self._parse_at_before(table) 637 638 def _parse_id_var( 639 self, 640 any_token: bool = True, 641 tokens: t.Optional[t.Collection[TokenType]] = None, 642 ) -> t.Optional[exp.Expression]: 643 if self._match_text_seq("IDENTIFIER", "("): 644 identifier = ( 645 super()._parse_id_var(any_token=any_token, tokens=tokens) 646 or self._parse_string() 647 ) 648 self._match_r_paren() 649 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 650 651 return super()._parse_id_var(any_token=any_token, tokens=tokens) 652 653 def _parse_show_snowflake(self, this: str) -> exp.Show: 654 scope = None 655 scope_kind = None 656 657 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 658 # which is syntactically valid but has no effect on the output 659 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 660 661 history = self._match_text_seq("HISTORY") 662 663 like = self._parse_string() if self._match(TokenType.LIKE) else None 664 665 if self._match(TokenType.IN): 666 if self._match_text_seq("ACCOUNT"): 667 scope_kind = "ACCOUNT" 668 elif self._match_set(self.DB_CREATABLES): 669 scope_kind = self._prev.text.upper() 670 if self._curr: 671 scope = self._parse_table_parts() 672 elif self._curr: 673 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 674 scope = self._parse_table_parts() 675 676 return self.expression( 677 exp.Show, 678 **{ 679 "terse": terse, 680 "this": this, 681 "history": history, 682 "like": like, 683 "scope": scope, 684 "scope_kind": scope_kind, 685 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 686 "limit": self._parse_limit(), 687 "from": self._parse_string() if self._match(TokenType.FROM) else None, 688 }, 689 ) 690 691 def _parse_alter_table_swap(self) -> exp.SwapTable: 692 self._match_text_seq("WITH") 693 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 694 695 def _parse_location_property(self) -> exp.LocationProperty: 696 self._match(TokenType.EQ) 697 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 698 699 def _parse_file_location(self) -> t.Optional[exp.Expression]: 700 # Parse either a subquery or a staged file 701 return ( 702 self._parse_select(table=True) 703 if self._match(TokenType.L_PAREN, advance=False) 704 else self._parse_table_parts() 705 ) 706 707 def _parse_location_path(self) -> exp.Var: 708 parts = [self._advance_any(ignore_reserved=True)] 709 710 # We avoid consuming a comma token because external tables like @foo and @bar 711 # can be joined in a query with a comma separator, as well as closing paren 712 # in case of subqueries 713 while self._is_connected() and not self._match_set( 714 (TokenType.COMMA, TokenType.R_PAREN), advance=False 715 ): 716 parts.append(self._advance_any(ignore_reserved=True)) 717 718 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
720 class Tokenizer(tokens.Tokenizer): 721 STRING_ESCAPES = ["\\", "'"] 722 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 723 RAW_STRINGS = ["$$"] 724 COMMENTS = ["--", "//", ("/*", "*/")] 725 726 KEYWORDS = { 727 **tokens.Tokenizer.KEYWORDS, 728 "BYTEINT": TokenType.INT, 729 "CHAR VARYING": TokenType.VARCHAR, 730 "CHARACTER VARYING": TokenType.VARCHAR, 731 "EXCLUDE": TokenType.EXCEPT, 732 "ILIKE ANY": TokenType.ILIKE_ANY, 733 "LIKE ANY": TokenType.LIKE_ANY, 734 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 735 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 736 "MINUS": TokenType.EXCEPT, 737 "NCHAR VARYING": TokenType.VARCHAR, 738 "PUT": TokenType.COMMAND, 739 "REMOVE": TokenType.COMMAND, 740 "RM": TokenType.COMMAND, 741 "SAMPLE": TokenType.TABLE_SAMPLE, 742 "SQL_DOUBLE": TokenType.DOUBLE, 743 "SQL_VARCHAR": TokenType.VARCHAR, 744 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 745 "TAG": TokenType.TAG, 746 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 747 "TOP": TokenType.TOP, 748 } 749 750 SINGLE_TOKENS = { 751 **tokens.Tokenizer.SINGLE_TOKENS, 752 "$": TokenType.PARAMETER, 753 } 754 755 VAR_SINGLE_TOKENS = {"$"} 756 757 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
759 class Generator(generator.Generator): 760 PARAMETER_TOKEN = "$" 761 MATCHED_BY_SOURCE = False 762 SINGLE_STRING_INTERVAL = True 763 JOIN_HINTS = False 764 TABLE_HINTS = False 765 QUERY_HINTS = False 766 AGGREGATE_FILTER_SUPPORTED = False 767 SUPPORTS_TABLE_COPY = False 768 COLLATE_IS_FUNC = True 769 LIMIT_ONLY_LITERALS = True 770 JSON_KEY_VALUE_PAIR_SEP = "," 771 INSERT_OVERWRITE = " OVERWRITE INTO" 772 STRUCT_DELIMITER = ("(", ")") 773 COPY_PARAMS_ARE_WRAPPED = False 774 COPY_PARAMS_EQ_REQUIRED = True 775 STAR_EXCEPT = "EXCLUDE" 776 777 TRANSFORMS = { 778 **generator.Generator.TRANSFORMS, 779 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 780 exp.ArgMax: rename_func("MAX_BY"), 781 exp.ArgMin: rename_func("MIN_BY"), 782 exp.Array: inline_array_sql, 783 exp.ArrayConcat: rename_func("ARRAY_CAT"), 784 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 785 exp.AtTimeZone: lambda self, e: self.func( 786 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 787 ), 788 exp.BitwiseXor: rename_func("BITXOR"), 789 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 790 exp.DateAdd: date_delta_sql("DATEADD"), 791 exp.DateDiff: date_delta_sql("DATEDIFF"), 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 807 exp.JSONExtractScalar: lambda self, e: self.func( 808 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 809 ), 810 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 811 exp.JSONPathRoot: lambda *_: "", 812 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 813 exp.LogicalOr: rename_func("BOOLOR_AGG"), 814 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 815 exp.Max: max_or_greatest, 816 exp.Min: min_or_least, 817 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 818 exp.PercentileCont: transforms.preprocess( 819 [transforms.add_within_group_for_percentiles] 820 ), 821 exp.PercentileDisc: transforms.preprocess( 822 [transforms.add_within_group_for_percentiles] 823 ), 824 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 825 exp.RegexpILike: _regexpilike_sql, 826 exp.Rand: rename_func("RANDOM"), 827 exp.Select: transforms.preprocess( 828 [ 829 transforms.eliminate_distinct_on, 830 transforms.explode_to_unnest(), 831 transforms.eliminate_semi_and_anti_joins, 832 ] 833 ), 834 exp.SHA: rename_func("SHA1"), 835 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 836 exp.StartsWith: rename_func("STARTSWITH"), 837 exp.StrPosition: lambda self, e: self.func( 838 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 839 ), 840 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 841 exp.Stuff: rename_func("INSERT"), 842 exp.TimeAdd: date_delta_sql("TIMEADD"), 843 exp.TimestampDiff: lambda self, e: self.func( 844 "TIMESTAMPDIFF", e.unit, e.expression, e.this 845 ), 846 exp.TimestampTrunc: timestamptrunc_sql(), 847 exp.TimeStrToTime: timestrtotime_sql, 848 exp.TimeToStr: lambda self, e: self.func( 849 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 850 ), 851 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 852 exp.ToArray: rename_func("TO_ARRAY"), 853 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 854 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 855 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 856 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 857 exp.TsOrDsToDate: lambda self, e: self.func( 858 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 859 ), 860 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 861 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 862 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 863 exp.Xor: rename_func("BOOLXOR"), 864 } 865 866 SUPPORTED_JSON_PATH_PARTS = { 867 exp.JSONPathKey, 868 exp.JSONPathRoot, 869 exp.JSONPathSubscript, 870 } 871 872 TYPE_MAPPING = { 873 **generator.Generator.TYPE_MAPPING, 874 exp.DataType.Type.NESTED: "OBJECT", 875 exp.DataType.Type.STRUCT: "OBJECT", 876 } 877 878 PROPERTIES_LOCATION = { 879 **generator.Generator.PROPERTIES_LOCATION, 880 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 881 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 882 } 883 884 UNSUPPORTED_VALUES_EXPRESSIONS = { 885 exp.Map, 886 exp.StarMap, 887 exp.Struct, 888 exp.VarMap, 889 } 890 891 def with_properties(self, properties: exp.Properties) -> str: 892 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 893 894 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 895 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 896 values_as_table = False 897 898 return super().values_sql(expression, values_as_table=values_as_table) 899 900 def datatype_sql(self, expression: exp.DataType) -> str: 901 expressions = expression.expressions 902 if ( 903 expressions 904 and expression.is_type(*exp.DataType.STRUCT_TYPES) 905 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 906 ): 907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 908 return "OBJECT" 909 910 return super().datatype_sql(expression) 911 912 def tonumber_sql(self, expression: exp.ToNumber) -> str: 913 return self.func( 914 "TO_NUMBER", 915 expression.this, 916 expression.args.get("format"), 917 expression.args.get("precision"), 918 expression.args.get("scale"), 919 ) 920 921 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 922 milli = expression.args.get("milli") 923 if milli is not None: 924 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 925 expression.set("nano", milli_to_nano) 926 927 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 928 929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 value = expression.this 931 932 if value.type is None: 933 from sqlglot.optimizer.annotate_types import annotate_types 934 935 value = annotate_types(value) 936 937 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 938 return super().trycast_sql(expression) 939 940 # TRY_CAST only works for string values in Snowflake 941 return self.cast_sql(expression) 942 943 def log_sql(self, expression: exp.Log) -> str: 944 if not expression.expression: 945 return self.func("LN", expression.this) 946 947 return super().log_sql(expression) 948 949 def unnest_sql(self, expression: exp.Unnest) -> str: 950 unnest_alias = expression.args.get("alias") 951 offset = expression.args.get("offset") 952 953 columns = [ 954 exp.to_identifier("seq"), 955 exp.to_identifier("key"), 956 exp.to_identifier("path"), 957 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 958 seq_get(unnest_alias.columns if unnest_alias else [], 0) 959 or exp.to_identifier("value"), 960 exp.to_identifier("this"), 961 ] 962 963 if unnest_alias: 964 unnest_alias.set("columns", columns) 965 else: 966 unnest_alias = exp.TableAlias(this="_u", columns=columns) 967 968 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 969 alias = self.sql(unnest_alias) 970 alias = f" AS {alias}" if alias else "" 971 return f"{explode}{alias}" 972 973 def show_sql(self, expression: exp.Show) -> str: 974 terse = "TERSE " if expression.args.get("terse") else "" 975 history = " HISTORY" if expression.args.get("history") else "" 976 like = self.sql(expression, "like") 977 like = f" LIKE {like}" if like else "" 978 979 scope = self.sql(expression, "scope") 980 scope = f" {scope}" if scope else "" 981 982 scope_kind = self.sql(expression, "scope_kind") 983 if scope_kind: 984 scope_kind = f" IN {scope_kind}" 985 986 starts_with = self.sql(expression, "starts_with") 987 if starts_with: 988 starts_with = f" STARTS WITH {starts_with}" 989 990 limit = self.sql(expression, "limit") 991 992 from_ = self.sql(expression, "from") 993 if from_: 994 from_ = f" FROM {from_}" 995 996 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 997 998 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 999 # Other dialects don't support all of the following parameters, so we need to 1000 # generate default values as necessary to ensure the transpilation is correct 1001 group = expression.args.get("group") 1002 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1003 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1004 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1005 1006 return self.func( 1007 "REGEXP_SUBSTR", 1008 expression.this, 1009 expression.expression, 1010 position, 1011 occurrence, 1012 parameters, 1013 group, 1014 ) 1015 1016 def except_op(self, expression: exp.Except) -> str: 1017 if not expression.args.get("distinct"): 1018 self.unsupported("EXCEPT with All is not supported in Snowflake") 1019 return super().except_op(expression) 1020 1021 def intersect_op(self, expression: exp.Intersect) -> str: 1022 if not expression.args.get("distinct"): 1023 self.unsupported("INTERSECT with All is not supported in Snowflake") 1024 return super().intersect_op(expression) 1025 1026 def describe_sql(self, expression: exp.Describe) -> str: 1027 # Default to table if kind is unknown 1028 kind_value = expression.args.get("kind") or "TABLE" 1029 kind = f" {kind_value}" if kind_value else "" 1030 this = f" {self.sql(expression, 'this')}" 1031 expressions = self.expressions(expression, flat=True) 1032 expressions = f" {expressions}" if expressions else "" 1033 return f"DESCRIBE{kind}{this}{expressions}" 1034 1035 def generatedasidentitycolumnconstraint_sql( 1036 self, expression: exp.GeneratedAsIdentityColumnConstraint 1037 ) -> str: 1038 start = expression.args.get("start") 1039 start = f" START {start}" if start else "" 1040 increment = expression.args.get("increment") 1041 increment = f" INCREMENT {increment}" if increment else "" 1042 return f"AUTOINCREMENT{start}{increment}" 1043 1044 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1045 this = self.sql(expression, "this") 1046 return f"SWAP WITH {this}" 1047 1048 def cluster_sql(self, expression: exp.Cluster) -> str: 1049 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1050 1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1066 1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1074 1075 def alterset_sql(self, expression: exp.AlterSet) -> str: 1076 exprs = self.expressions(expression, flat=True) 1077 exprs = f" {exprs}" if exprs else "" 1078 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1079 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1080 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1081 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1082 tag = self.expressions(expression, key="tag", flat=True) 1083 tag = f" TAG {tag}" if tag else "" 1084 1085 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
900 def datatype_sql(self, expression: exp.DataType) -> str: 901 expressions = expression.expressions 902 if ( 903 expressions 904 and expression.is_type(*exp.DataType.STRUCT_TYPES) 905 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 906 ): 907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 908 return "OBJECT" 909 910 return super().datatype_sql(expression)
921 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 922 milli = expression.args.get("milli") 923 if milli is not None: 924 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 925 expression.set("nano", milli_to_nano) 926 927 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 value = expression.this 931 932 if value.type is None: 933 from sqlglot.optimizer.annotate_types import annotate_types 934 935 value = annotate_types(value) 936 937 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 938 return super().trycast_sql(expression) 939 940 # TRY_CAST only works for string values in Snowflake 941 return self.cast_sql(expression)
949 def unnest_sql(self, expression: exp.Unnest) -> str: 950 unnest_alias = expression.args.get("alias") 951 offset = expression.args.get("offset") 952 953 columns = [ 954 exp.to_identifier("seq"), 955 exp.to_identifier("key"), 956 exp.to_identifier("path"), 957 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 958 seq_get(unnest_alias.columns if unnest_alias else [], 0) 959 or exp.to_identifier("value"), 960 exp.to_identifier("this"), 961 ] 962 963 if unnest_alias: 964 unnest_alias.set("columns", columns) 965 else: 966 unnest_alias = exp.TableAlias(this="_u", columns=columns) 967 968 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 969 alias = self.sql(unnest_alias) 970 alias = f" AS {alias}" if alias else "" 971 return f"{explode}{alias}"
973 def show_sql(self, expression: exp.Show) -> str: 974 terse = "TERSE " if expression.args.get("terse") else "" 975 history = " HISTORY" if expression.args.get("history") else "" 976 like = self.sql(expression, "like") 977 like = f" LIKE {like}" if like else "" 978 979 scope = self.sql(expression, "scope") 980 scope = f" {scope}" if scope else "" 981 982 scope_kind = self.sql(expression, "scope_kind") 983 if scope_kind: 984 scope_kind = f" IN {scope_kind}" 985 986 starts_with = self.sql(expression, "starts_with") 987 if starts_with: 988 starts_with = f" STARTS WITH {starts_with}" 989 990 limit = self.sql(expression, "limit") 991 992 from_ = self.sql(expression, "from") 993 if from_: 994 from_ = f" FROM {from_}" 995 996 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
998 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 999 # Other dialects don't support all of the following parameters, so we need to 1000 # generate default values as necessary to ensure the transpilation is correct 1001 group = expression.args.get("group") 1002 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1003 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1004 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1005 1006 return self.func( 1007 "REGEXP_SUBSTR", 1008 expression.this, 1009 expression.expression, 1010 position, 1011 occurrence, 1012 parameters, 1013 group, 1014 )
1026 def describe_sql(self, expression: exp.Describe) -> str: 1027 # Default to table if kind is unknown 1028 kind_value = expression.args.get("kind") or "TABLE" 1029 kind = f" {kind_value}" if kind_value else "" 1030 this = f" {self.sql(expression, 'this')}" 1031 expressions = self.expressions(expression, flat=True) 1032 expressions = f" {expressions}" if expressions else "" 1033 return f"DESCRIBE{kind}{this}{expressions}"
1035 def generatedasidentitycolumnconstraint_sql( 1036 self, expression: exp.GeneratedAsIdentityColumnConstraint 1037 ) -> str: 1038 start = expression.args.get("start") 1039 start = f" START {start}" if start else "" 1040 increment = expression.args.get("increment") 1041 increment = f" INCREMENT {increment}" if increment else "" 1042 return f"AUTOINCREMENT{start}{increment}"
1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1075 def alterset_sql(self, expression: exp.AlterSet) -> str: 1076 exprs = self.expressions(expression, flat=True) 1077 exprs = f" {exprs}" if exprs else "" 1078 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1079 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1080 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1081 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1082 tag = self.expressions(expression, key="tag", flat=True) 1083 tag = f" TAG {tag}" if tag else "" 1084 1085 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql