sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 65 arg = seq_get(args, 0) 66 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 67 68 69def build_lower(args: t.List) -> exp.Lower | exp.Hex: 70 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 73 74 75def build_upper(args: t.List) -> exp.Upper | exp.Hex: 76 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 79 80 81def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 82 def _builder(args: t.List, dialect: Dialect) -> E: 83 expression = expr_type( 84 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 85 ) 86 if len(args) > 2 and expr_type is exp.JSONExtract: 87 expression.set("expressions", args[2:]) 88 89 return expression 90 91 return _builder 92 93 94def build_mod(args: t.List) -> exp.Mod: 95 this = seq_get(args, 0) 96 expression = seq_get(args, 1) 97 98 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 99 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 100 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 101 102 return exp.Mod(this=this, expression=expression) 103 104 105class _Parser(type): 106 def __new__(cls, clsname, bases, attrs): 107 klass = super().__new__(cls, clsname, bases, attrs) 108 109 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 110 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 111 112 return klass 113 114 115class Parser(metaclass=_Parser): 116 """ 117 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 118 119 Args: 120 error_level: The desired error level. 121 Default: ErrorLevel.IMMEDIATE 122 error_message_context: The amount of context to capture from a query string when displaying 123 the error message (in number of characters). 124 Default: 100 125 max_errors: Maximum number of error messages to include in a raised ParseError. 126 This is only relevant if error_level is ErrorLevel.RAISE. 127 Default: 3 128 """ 129 130 FUNCTIONS: t.Dict[str, t.Callable] = { 131 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 132 "CONCAT": lambda args, dialect: exp.Concat( 133 expressions=args, 134 safe=not dialect.STRICT_STRING_CONCAT, 135 coalesce=dialect.CONCAT_COALESCE, 136 ), 137 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 138 expressions=args, 139 safe=not dialect.STRICT_STRING_CONCAT, 140 coalesce=dialect.CONCAT_COALESCE, 141 ), 142 "DATE_TO_DATE_STR": lambda args: exp.Cast( 143 this=seq_get(args, 0), 144 to=exp.DataType(this=exp.DataType.Type.TEXT), 145 ), 146 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 147 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 148 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 149 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 150 "LIKE": build_like, 151 "LOG": build_logarithm, 152 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 153 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 154 "MOD": build_mod, 155 "TIME_TO_TIME_STR": lambda args: exp.Cast( 156 this=seq_get(args, 0), 157 to=exp.DataType(this=exp.DataType.Type.TEXT), 158 ), 159 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 160 this=exp.Cast( 161 this=seq_get(args, 0), 162 to=exp.DataType(this=exp.DataType.Type.TEXT), 163 ), 164 start=exp.Literal.number(1), 165 length=exp.Literal.number(10), 166 ), 167 "VAR_MAP": build_var_map, 168 "LOWER": build_lower, 169 "UPPER": build_upper, 170 "HEX": build_hex, 171 "TO_HEX": build_hex, 172 } 173 174 NO_PAREN_FUNCTIONS = { 175 TokenType.CURRENT_DATE: exp.CurrentDate, 176 TokenType.CURRENT_DATETIME: exp.CurrentDate, 177 TokenType.CURRENT_TIME: exp.CurrentTime, 178 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 179 TokenType.CURRENT_USER: exp.CurrentUser, 180 } 181 182 STRUCT_TYPE_TOKENS = { 183 TokenType.NESTED, 184 TokenType.OBJECT, 185 TokenType.STRUCT, 186 } 187 188 NESTED_TYPE_TOKENS = { 189 TokenType.ARRAY, 190 TokenType.LOWCARDINALITY, 191 TokenType.MAP, 192 TokenType.NULLABLE, 193 *STRUCT_TYPE_TOKENS, 194 } 195 196 ENUM_TYPE_TOKENS = { 197 TokenType.ENUM, 198 TokenType.ENUM8, 199 TokenType.ENUM16, 200 } 201 202 AGGREGATE_TYPE_TOKENS = { 203 TokenType.AGGREGATEFUNCTION, 204 TokenType.SIMPLEAGGREGATEFUNCTION, 205 } 206 207 TYPE_TOKENS = { 208 TokenType.BIT, 209 TokenType.BOOLEAN, 210 TokenType.TINYINT, 211 TokenType.UTINYINT, 212 TokenType.SMALLINT, 213 TokenType.USMALLINT, 214 TokenType.INT, 215 TokenType.UINT, 216 TokenType.BIGINT, 217 TokenType.UBIGINT, 218 TokenType.INT128, 219 TokenType.UINT128, 220 TokenType.INT256, 221 TokenType.UINT256, 222 TokenType.MEDIUMINT, 223 TokenType.UMEDIUMINT, 224 TokenType.FIXEDSTRING, 225 TokenType.FLOAT, 226 TokenType.DOUBLE, 227 TokenType.CHAR, 228 TokenType.NCHAR, 229 TokenType.VARCHAR, 230 TokenType.NVARCHAR, 231 TokenType.BPCHAR, 232 TokenType.TEXT, 233 TokenType.MEDIUMTEXT, 234 TokenType.LONGTEXT, 235 TokenType.MEDIUMBLOB, 236 TokenType.LONGBLOB, 237 TokenType.BINARY, 238 TokenType.VARBINARY, 239 TokenType.JSON, 240 TokenType.JSONB, 241 TokenType.INTERVAL, 242 TokenType.TINYBLOB, 243 TokenType.TINYTEXT, 244 TokenType.TIME, 245 TokenType.TIMETZ, 246 TokenType.TIMESTAMP, 247 TokenType.TIMESTAMP_S, 248 TokenType.TIMESTAMP_MS, 249 TokenType.TIMESTAMP_NS, 250 TokenType.TIMESTAMPTZ, 251 TokenType.TIMESTAMPLTZ, 252 TokenType.TIMESTAMPNTZ, 253 TokenType.DATETIME, 254 TokenType.DATETIME64, 255 TokenType.DATE, 256 TokenType.DATE32, 257 TokenType.INT4RANGE, 258 TokenType.INT4MULTIRANGE, 259 TokenType.INT8RANGE, 260 TokenType.INT8MULTIRANGE, 261 TokenType.NUMRANGE, 262 TokenType.NUMMULTIRANGE, 263 TokenType.TSRANGE, 264 TokenType.TSMULTIRANGE, 265 TokenType.TSTZRANGE, 266 TokenType.TSTZMULTIRANGE, 267 TokenType.DATERANGE, 268 TokenType.DATEMULTIRANGE, 269 TokenType.DECIMAL, 270 TokenType.UDECIMAL, 271 TokenType.BIGDECIMAL, 272 TokenType.UUID, 273 TokenType.GEOGRAPHY, 274 TokenType.GEOMETRY, 275 TokenType.HLLSKETCH, 276 TokenType.HSTORE, 277 TokenType.PSEUDO_TYPE, 278 TokenType.SUPER, 279 TokenType.SERIAL, 280 TokenType.SMALLSERIAL, 281 TokenType.BIGSERIAL, 282 TokenType.XML, 283 TokenType.YEAR, 284 TokenType.UNIQUEIDENTIFIER, 285 TokenType.USERDEFINED, 286 TokenType.MONEY, 287 TokenType.SMALLMONEY, 288 TokenType.ROWVERSION, 289 TokenType.IMAGE, 290 TokenType.VARIANT, 291 TokenType.OBJECT, 292 TokenType.OBJECT_IDENTIFIER, 293 TokenType.INET, 294 TokenType.IPADDRESS, 295 TokenType.IPPREFIX, 296 TokenType.IPV4, 297 TokenType.IPV6, 298 TokenType.UNKNOWN, 299 TokenType.NULL, 300 TokenType.NAME, 301 TokenType.TDIGEST, 302 *ENUM_TYPE_TOKENS, 303 *NESTED_TYPE_TOKENS, 304 *AGGREGATE_TYPE_TOKENS, 305 } 306 307 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 308 TokenType.BIGINT: TokenType.UBIGINT, 309 TokenType.INT: TokenType.UINT, 310 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 311 TokenType.SMALLINT: TokenType.USMALLINT, 312 TokenType.TINYINT: TokenType.UTINYINT, 313 TokenType.DECIMAL: TokenType.UDECIMAL, 314 } 315 316 SUBQUERY_PREDICATES = { 317 TokenType.ANY: exp.Any, 318 TokenType.ALL: exp.All, 319 TokenType.EXISTS: exp.Exists, 320 TokenType.SOME: exp.Any, 321 } 322 323 RESERVED_TOKENS = { 324 *Tokenizer.SINGLE_TOKENS.values(), 325 TokenType.SELECT, 326 } - {TokenType.IDENTIFIER} 327 328 DB_CREATABLES = { 329 TokenType.DATABASE, 330 TokenType.DICTIONARY, 331 TokenType.MODEL, 332 TokenType.SCHEMA, 333 TokenType.SEQUENCE, 334 TokenType.STORAGE_INTEGRATION, 335 TokenType.TABLE, 336 TokenType.TAG, 337 TokenType.VIEW, 338 } 339 340 CREATABLES = { 341 TokenType.COLUMN, 342 TokenType.CONSTRAINT, 343 TokenType.FOREIGN_KEY, 344 TokenType.FUNCTION, 345 TokenType.INDEX, 346 TokenType.PROCEDURE, 347 *DB_CREATABLES, 348 } 349 350 # Tokens that can represent identifiers 351 ID_VAR_TOKENS = { 352 TokenType.VAR, 353 TokenType.ANTI, 354 TokenType.APPLY, 355 TokenType.ASC, 356 TokenType.ASOF, 357 TokenType.AUTO_INCREMENT, 358 TokenType.BEGIN, 359 TokenType.BPCHAR, 360 TokenType.CACHE, 361 TokenType.CASE, 362 TokenType.COLLATE, 363 TokenType.COMMAND, 364 TokenType.COMMENT, 365 TokenType.COMMIT, 366 TokenType.CONSTRAINT, 367 TokenType.COPY, 368 TokenType.DEFAULT, 369 TokenType.DELETE, 370 TokenType.DESC, 371 TokenType.DESCRIBE, 372 TokenType.DICTIONARY, 373 TokenType.DIV, 374 TokenType.END, 375 TokenType.EXECUTE, 376 TokenType.ESCAPE, 377 TokenType.FALSE, 378 TokenType.FIRST, 379 TokenType.FILTER, 380 TokenType.FINAL, 381 TokenType.FORMAT, 382 TokenType.FULL, 383 TokenType.IDENTIFIER, 384 TokenType.IS, 385 TokenType.ISNULL, 386 TokenType.INTERVAL, 387 TokenType.KEEP, 388 TokenType.KILL, 389 TokenType.LEFT, 390 TokenType.LOAD, 391 TokenType.MERGE, 392 TokenType.NATURAL, 393 TokenType.NEXT, 394 TokenType.OFFSET, 395 TokenType.OPERATOR, 396 TokenType.ORDINALITY, 397 TokenType.OVERLAPS, 398 TokenType.OVERWRITE, 399 TokenType.PARTITION, 400 TokenType.PERCENT, 401 TokenType.PIVOT, 402 TokenType.PRAGMA, 403 TokenType.RANGE, 404 TokenType.RECURSIVE, 405 TokenType.REFERENCES, 406 TokenType.REFRESH, 407 TokenType.REPLACE, 408 TokenType.RIGHT, 409 TokenType.ROLLUP, 410 TokenType.ROW, 411 TokenType.ROWS, 412 TokenType.SEMI, 413 TokenType.SET, 414 TokenType.SETTINGS, 415 TokenType.SHOW, 416 TokenType.TEMPORARY, 417 TokenType.TOP, 418 TokenType.TRUE, 419 TokenType.TRUNCATE, 420 TokenType.UNIQUE, 421 TokenType.UNPIVOT, 422 TokenType.UPDATE, 423 TokenType.USE, 424 TokenType.VOLATILE, 425 TokenType.WINDOW, 426 *CREATABLES, 427 *SUBQUERY_PREDICATES, 428 *TYPE_TOKENS, 429 *NO_PAREN_FUNCTIONS, 430 } 431 432 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 433 434 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 435 TokenType.ANTI, 436 TokenType.APPLY, 437 TokenType.ASOF, 438 TokenType.FULL, 439 TokenType.LEFT, 440 TokenType.LOCK, 441 TokenType.NATURAL, 442 TokenType.OFFSET, 443 TokenType.RIGHT, 444 TokenType.SEMI, 445 TokenType.WINDOW, 446 } 447 448 ALIAS_TOKENS = ID_VAR_TOKENS 449 450 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 451 452 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 453 454 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 455 456 FUNC_TOKENS = { 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.CURRENT_DATE, 460 TokenType.CURRENT_DATETIME, 461 TokenType.CURRENT_TIMESTAMP, 462 TokenType.CURRENT_TIME, 463 TokenType.CURRENT_USER, 464 TokenType.FILTER, 465 TokenType.FIRST, 466 TokenType.FORMAT, 467 TokenType.GLOB, 468 TokenType.IDENTIFIER, 469 TokenType.INDEX, 470 TokenType.ISNULL, 471 TokenType.ILIKE, 472 TokenType.INSERT, 473 TokenType.LIKE, 474 TokenType.MERGE, 475 TokenType.OFFSET, 476 TokenType.PRIMARY_KEY, 477 TokenType.RANGE, 478 TokenType.REPLACE, 479 TokenType.RLIKE, 480 TokenType.ROW, 481 TokenType.UNNEST, 482 TokenType.VAR, 483 TokenType.LEFT, 484 TokenType.RIGHT, 485 TokenType.SEQUENCE, 486 TokenType.DATE, 487 TokenType.DATETIME, 488 TokenType.TABLE, 489 TokenType.TIMESTAMP, 490 TokenType.TIMESTAMPTZ, 491 TokenType.TRUNCATE, 492 TokenType.WINDOW, 493 TokenType.XOR, 494 *TYPE_TOKENS, 495 *SUBQUERY_PREDICATES, 496 } 497 498 CONJUNCTION = { 499 TokenType.AND: exp.And, 500 TokenType.OR: exp.Or, 501 } 502 503 EQUALITY = { 504 TokenType.EQ: exp.EQ, 505 TokenType.NEQ: exp.NEQ, 506 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 507 } 508 509 COMPARISON = { 510 TokenType.GT: exp.GT, 511 TokenType.GTE: exp.GTE, 512 TokenType.LT: exp.LT, 513 TokenType.LTE: exp.LTE, 514 } 515 516 BITWISE = { 517 TokenType.AMP: exp.BitwiseAnd, 518 TokenType.CARET: exp.BitwiseXor, 519 TokenType.PIPE: exp.BitwiseOr, 520 } 521 522 TERM = { 523 TokenType.DASH: exp.Sub, 524 TokenType.PLUS: exp.Add, 525 TokenType.MOD: exp.Mod, 526 TokenType.COLLATE: exp.Collate, 527 } 528 529 FACTOR = { 530 TokenType.DIV: exp.IntDiv, 531 TokenType.LR_ARROW: exp.Distance, 532 TokenType.SLASH: exp.Div, 533 TokenType.STAR: exp.Mul, 534 } 535 536 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 537 538 TIMES = { 539 TokenType.TIME, 540 TokenType.TIMETZ, 541 } 542 543 TIMESTAMPS = { 544 TokenType.TIMESTAMP, 545 TokenType.TIMESTAMPTZ, 546 TokenType.TIMESTAMPLTZ, 547 *TIMES, 548 } 549 550 SET_OPERATIONS = { 551 TokenType.UNION, 552 TokenType.INTERSECT, 553 TokenType.EXCEPT, 554 } 555 556 JOIN_METHODS = { 557 TokenType.ASOF, 558 TokenType.NATURAL, 559 TokenType.POSITIONAL, 560 } 561 562 JOIN_SIDES = { 563 TokenType.LEFT, 564 TokenType.RIGHT, 565 TokenType.FULL, 566 } 567 568 JOIN_KINDS = { 569 TokenType.INNER, 570 TokenType.OUTER, 571 TokenType.CROSS, 572 TokenType.SEMI, 573 TokenType.ANTI, 574 } 575 576 JOIN_HINTS: t.Set[str] = set() 577 578 LAMBDAS = { 579 TokenType.ARROW: lambda self, expressions: self.expression( 580 exp.Lambda, 581 this=self._replace_lambda( 582 self._parse_conjunction(), 583 {node.name for node in expressions}, 584 ), 585 expressions=expressions, 586 ), 587 TokenType.FARROW: lambda self, expressions: self.expression( 588 exp.Kwarg, 589 this=exp.var(expressions[0].name), 590 expression=self._parse_conjunction(), 591 ), 592 } 593 594 COLUMN_OPERATORS = { 595 TokenType.DOT: None, 596 TokenType.DCOLON: lambda self, this, to: self.expression( 597 exp.Cast if self.STRICT_CAST else exp.TryCast, 598 this=this, 599 to=to, 600 ), 601 TokenType.ARROW: lambda self, this, path: self.expression( 602 exp.JSONExtract, 603 this=this, 604 expression=self.dialect.to_json_path(path), 605 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 606 ), 607 TokenType.DARROW: lambda self, this, path: self.expression( 608 exp.JSONExtractScalar, 609 this=this, 610 expression=self.dialect.to_json_path(path), 611 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 612 ), 613 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 614 exp.JSONBExtract, 615 this=this, 616 expression=path, 617 ), 618 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 619 exp.JSONBExtractScalar, 620 this=this, 621 expression=path, 622 ), 623 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 624 exp.JSONBContains, 625 this=this, 626 expression=key, 627 ), 628 } 629 630 EXPRESSION_PARSERS = { 631 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 632 exp.Column: lambda self: self._parse_column(), 633 exp.Condition: lambda self: self._parse_conjunction(), 634 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 635 exp.Expression: lambda self: self._parse_expression(), 636 exp.From: lambda self: self._parse_from(joins=True), 637 exp.Group: lambda self: self._parse_group(), 638 exp.Having: lambda self: self._parse_having(), 639 exp.Identifier: lambda self: self._parse_id_var(), 640 exp.Join: lambda self: self._parse_join(), 641 exp.Lambda: lambda self: self._parse_lambda(), 642 exp.Lateral: lambda self: self._parse_lateral(), 643 exp.Limit: lambda self: self._parse_limit(), 644 exp.Offset: lambda self: self._parse_offset(), 645 exp.Order: lambda self: self._parse_order(), 646 exp.Ordered: lambda self: self._parse_ordered(), 647 exp.Properties: lambda self: self._parse_properties(), 648 exp.Qualify: lambda self: self._parse_qualify(), 649 exp.Returning: lambda self: self._parse_returning(), 650 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 651 exp.Table: lambda self: self._parse_table_parts(), 652 exp.TableAlias: lambda self: self._parse_table_alias(), 653 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 654 exp.Where: lambda self: self._parse_where(), 655 exp.Window: lambda self: self._parse_named_window(), 656 exp.With: lambda self: self._parse_with(), 657 "JOIN_TYPE": lambda self: self._parse_join_parts(), 658 } 659 660 STATEMENT_PARSERS = { 661 TokenType.ALTER: lambda self: self._parse_alter(), 662 TokenType.BEGIN: lambda self: self._parse_transaction(), 663 TokenType.CACHE: lambda self: self._parse_cache(), 664 TokenType.COMMENT: lambda self: self._parse_comment(), 665 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 666 TokenType.COPY: lambda self: self._parse_copy(), 667 TokenType.CREATE: lambda self: self._parse_create(), 668 TokenType.DELETE: lambda self: self._parse_delete(), 669 TokenType.DESC: lambda self: self._parse_describe(), 670 TokenType.DESCRIBE: lambda self: self._parse_describe(), 671 TokenType.DROP: lambda self: self._parse_drop(), 672 TokenType.INSERT: lambda self: self._parse_insert(), 673 TokenType.KILL: lambda self: self._parse_kill(), 674 TokenType.LOAD: lambda self: self._parse_load(), 675 TokenType.MERGE: lambda self: self._parse_merge(), 676 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 677 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 678 TokenType.REFRESH: lambda self: self._parse_refresh(), 679 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 680 TokenType.SET: lambda self: self._parse_set(), 681 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 682 TokenType.UNCACHE: lambda self: self._parse_uncache(), 683 TokenType.UPDATE: lambda self: self._parse_update(), 684 TokenType.USE: lambda self: self.expression( 685 exp.Use, 686 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 687 this=self._parse_table(schema=False), 688 ), 689 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 690 } 691 692 UNARY_PARSERS = { 693 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 694 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 695 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 696 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 697 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 698 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 699 } 700 701 STRING_PARSERS = { 702 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 703 exp.RawString, this=token.text 704 ), 705 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 706 exp.National, this=token.text 707 ), 708 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 709 TokenType.STRING: lambda self, token: self.expression( 710 exp.Literal, this=token.text, is_string=True 711 ), 712 TokenType.UNICODE_STRING: lambda self, token: self.expression( 713 exp.UnicodeString, 714 this=token.text, 715 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 716 ), 717 } 718 719 NUMERIC_PARSERS = { 720 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 721 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 722 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 723 TokenType.NUMBER: lambda self, token: self.expression( 724 exp.Literal, this=token.text, is_string=False 725 ), 726 } 727 728 PRIMARY_PARSERS = { 729 **STRING_PARSERS, 730 **NUMERIC_PARSERS, 731 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 732 TokenType.NULL: lambda self, _: self.expression(exp.Null), 733 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 734 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 735 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 736 TokenType.STAR: lambda self, _: self.expression( 737 exp.Star, 738 **{ 739 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 740 "replace": self._parse_star_op("REPLACE"), 741 "rename": self._parse_star_op("RENAME"), 742 }, 743 ), 744 } 745 746 PLACEHOLDER_PARSERS = { 747 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 748 TokenType.PARAMETER: lambda self: self._parse_parameter(), 749 TokenType.COLON: lambda self: ( 750 self.expression(exp.Placeholder, this=self._prev.text) 751 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 752 else None 753 ), 754 } 755 756 RANGE_PARSERS = { 757 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 758 TokenType.GLOB: binary_range_parser(exp.Glob), 759 TokenType.ILIKE: binary_range_parser(exp.ILike), 760 TokenType.IN: lambda self, this: self._parse_in(this), 761 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 762 TokenType.IS: lambda self, this: self._parse_is(this), 763 TokenType.LIKE: binary_range_parser(exp.Like), 764 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 765 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 766 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 767 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 768 } 769 770 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 771 "ALLOWED_VALUES": lambda self: self.expression( 772 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 773 ), 774 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 775 "AUTO": lambda self: self._parse_auto_property(), 776 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 777 "BACKUP": lambda self: self.expression( 778 exp.BackupProperty, this=self._parse_var(any_token=True) 779 ), 780 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 781 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 782 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHECKSUM": lambda self: self._parse_checksum(), 784 "CLUSTER BY": lambda self: self._parse_cluster(), 785 "CLUSTERED": lambda self: self._parse_clustered_by(), 786 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 787 exp.CollateProperty, **kwargs 788 ), 789 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 790 "CONTAINS": lambda self: self._parse_contains_property(), 791 "COPY": lambda self: self._parse_copy_property(), 792 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 793 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 794 "DEFINER": lambda self: self._parse_definer(), 795 "DETERMINISTIC": lambda self: self.expression( 796 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 797 ), 798 "DISTKEY": lambda self: self._parse_distkey(), 799 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 800 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 801 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 802 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 803 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 804 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 805 "FREESPACE": lambda self: self._parse_freespace(), 806 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 807 "HEAP": lambda self: self.expression(exp.HeapProperty), 808 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 809 "IMMUTABLE": lambda self: self.expression( 810 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 811 ), 812 "INHERITS": lambda self: self.expression( 813 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 814 ), 815 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 816 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 817 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 818 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 819 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 820 "LIKE": lambda self: self._parse_create_like(), 821 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 822 "LOCK": lambda self: self._parse_locking(), 823 "LOCKING": lambda self: self._parse_locking(), 824 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 825 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 826 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 827 "MODIFIES": lambda self: self._parse_modifies_property(), 828 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 829 "NO": lambda self: self._parse_no_property(), 830 "ON": lambda self: self._parse_on_property(), 831 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 832 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 833 "PARTITION": lambda self: self._parse_partitioned_of(), 834 "PARTITION BY": lambda self: self._parse_partitioned_by(), 835 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 837 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 838 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 839 "READS": lambda self: self._parse_reads_property(), 840 "REMOTE": lambda self: self._parse_remote_with_connection(), 841 "RETURNS": lambda self: self._parse_returns(), 842 "STRICT": lambda self: self.expression(exp.StrictProperty), 843 "ROW": lambda self: self._parse_row(), 844 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 845 "SAMPLE": lambda self: self.expression( 846 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 847 ), 848 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 849 "SETTINGS": lambda self: self.expression( 850 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 851 ), 852 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 853 "SORTKEY": lambda self: self._parse_sortkey(), 854 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 855 "STABLE": lambda self: self.expression( 856 exp.StabilityProperty, this=exp.Literal.string("STABLE") 857 ), 858 "STORED": lambda self: self._parse_stored(), 859 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 860 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 861 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 862 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 863 "TO": lambda self: self._parse_to_table(), 864 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 865 "TRANSFORM": lambda self: self.expression( 866 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 867 ), 868 "TTL": lambda self: self._parse_ttl(), 869 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 870 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 871 "VOLATILE": lambda self: self._parse_volatile_property(), 872 "WITH": lambda self: self._parse_with_property(), 873 } 874 875 CONSTRAINT_PARSERS = { 876 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 877 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 878 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 879 "CHARACTER SET": lambda self: self.expression( 880 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 881 ), 882 "CHECK": lambda self: self.expression( 883 exp.CheckColumnConstraint, 884 this=self._parse_wrapped(self._parse_conjunction), 885 enforced=self._match_text_seq("ENFORCED"), 886 ), 887 "COLLATE": lambda self: self.expression( 888 exp.CollateColumnConstraint, this=self._parse_var() 889 ), 890 "COMMENT": lambda self: self.expression( 891 exp.CommentColumnConstraint, this=self._parse_string() 892 ), 893 "COMPRESS": lambda self: self._parse_compress(), 894 "CLUSTERED": lambda self: self.expression( 895 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 896 ), 897 "NONCLUSTERED": lambda self: self.expression( 898 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 899 ), 900 "DEFAULT": lambda self: self.expression( 901 exp.DefaultColumnConstraint, this=self._parse_bitwise() 902 ), 903 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 904 "EPHEMERAL": lambda self: self.expression( 905 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 906 ), 907 "EXCLUDE": lambda self: self.expression( 908 exp.ExcludeColumnConstraint, this=self._parse_index_params() 909 ), 910 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 911 "FORMAT": lambda self: self.expression( 912 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 913 ), 914 "GENERATED": lambda self: self._parse_generated_as_identity(), 915 "IDENTITY": lambda self: self._parse_auto_increment(), 916 "INLINE": lambda self: self._parse_inline(), 917 "LIKE": lambda self: self._parse_create_like(), 918 "NOT": lambda self: self._parse_not_constraint(), 919 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 920 "ON": lambda self: ( 921 self._match(TokenType.UPDATE) 922 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 923 ) 924 or self.expression(exp.OnProperty, this=self._parse_id_var()), 925 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 926 "PERIOD": lambda self: self._parse_period_for_system_time(), 927 "PRIMARY KEY": lambda self: self._parse_primary_key(), 928 "REFERENCES": lambda self: self._parse_references(match=False), 929 "TITLE": lambda self: self.expression( 930 exp.TitleColumnConstraint, this=self._parse_var_or_string() 931 ), 932 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 933 "UNIQUE": lambda self: self._parse_unique(), 934 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 935 "WITH": lambda self: self.expression( 936 exp.Properties, expressions=self._parse_wrapped_properties() 937 ), 938 } 939 940 ALTER_PARSERS = { 941 "ADD": lambda self: self._parse_alter_table_add(), 942 "ALTER": lambda self: self._parse_alter_table_alter(), 943 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 944 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 945 "DROP": lambda self: self._parse_alter_table_drop(), 946 "RENAME": lambda self: self._parse_alter_table_rename(), 947 "SET": lambda self: self._parse_alter_table_set(), 948 } 949 950 ALTER_ALTER_PARSERS = { 951 "DISTKEY": lambda self: self._parse_alter_diststyle(), 952 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 953 "SORTKEY": lambda self: self._parse_alter_sortkey(), 954 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 955 } 956 957 SCHEMA_UNNAMED_CONSTRAINTS = { 958 "CHECK", 959 "EXCLUDE", 960 "FOREIGN KEY", 961 "LIKE", 962 "PERIOD", 963 "PRIMARY KEY", 964 "UNIQUE", 965 } 966 967 NO_PAREN_FUNCTION_PARSERS = { 968 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 969 "CASE": lambda self: self._parse_case(), 970 "IF": lambda self: self._parse_if(), 971 "NEXT": lambda self: self._parse_next_value_for(), 972 } 973 974 INVALID_FUNC_NAME_TOKENS = { 975 TokenType.IDENTIFIER, 976 TokenType.STRING, 977 } 978 979 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 980 981 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 982 983 FUNCTION_PARSERS = { 984 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 985 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 986 "DECODE": lambda self: self._parse_decode(), 987 "EXTRACT": lambda self: self._parse_extract(), 988 "JSON_OBJECT": lambda self: self._parse_json_object(), 989 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 990 "JSON_TABLE": lambda self: self._parse_json_table(), 991 "MATCH": lambda self: self._parse_match_against(), 992 "OPENJSON": lambda self: self._parse_open_json(), 993 "POSITION": lambda self: self._parse_position(), 994 "PREDICT": lambda self: self._parse_predict(), 995 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 996 "STRING_AGG": lambda self: self._parse_string_agg(), 997 "SUBSTRING": lambda self: self._parse_substring(), 998 "TRIM": lambda self: self._parse_trim(), 999 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1000 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1001 } 1002 1003 QUERY_MODIFIER_PARSERS = { 1004 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1005 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1006 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1007 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1008 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1009 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1010 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1011 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1012 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1013 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1014 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1015 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1016 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1017 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1018 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1019 TokenType.CLUSTER_BY: lambda self: ( 1020 "cluster", 1021 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1022 ), 1023 TokenType.DISTRIBUTE_BY: lambda self: ( 1024 "distribute", 1025 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1026 ), 1027 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1028 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1029 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1030 } 1031 1032 SET_PARSERS = { 1033 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1034 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1035 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1036 "TRANSACTION": lambda self: self._parse_set_transaction(), 1037 } 1038 1039 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1040 1041 TYPE_LITERAL_PARSERS = { 1042 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1043 } 1044 1045 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1046 1047 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1048 1049 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1050 1051 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1052 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1053 "ISOLATION": ( 1054 ("LEVEL", "REPEATABLE", "READ"), 1055 ("LEVEL", "READ", "COMMITTED"), 1056 ("LEVEL", "READ", "UNCOMITTED"), 1057 ("LEVEL", "SERIALIZABLE"), 1058 ), 1059 "READ": ("WRITE", "ONLY"), 1060 } 1061 1062 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1063 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1064 ) 1065 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1066 1067 CREATE_SEQUENCE: OPTIONS_TYPE = { 1068 "SCALE": ("EXTEND", "NOEXTEND"), 1069 "SHARD": ("EXTEND", "NOEXTEND"), 1070 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1071 **dict.fromkeys( 1072 ( 1073 "SESSION", 1074 "GLOBAL", 1075 "KEEP", 1076 "NOKEEP", 1077 "ORDER", 1078 "NOORDER", 1079 "NOCACHE", 1080 "CYCLE", 1081 "NOCYCLE", 1082 "NOMINVALUE", 1083 "NOMAXVALUE", 1084 "NOSCALE", 1085 "NOSHARD", 1086 ), 1087 tuple(), 1088 ), 1089 } 1090 1091 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1092 1093 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1094 1095 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1096 1097 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1098 1099 CLONE_KEYWORDS = {"CLONE", "COPY"} 1100 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1101 1102 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1103 1104 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1105 1106 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1107 1108 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1109 1110 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1111 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1112 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1113 1114 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1115 1116 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1117 1118 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1119 1120 DISTINCT_TOKENS = {TokenType.DISTINCT} 1121 1122 NULL_TOKENS = {TokenType.NULL} 1123 1124 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1125 1126 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1127 1128 STRICT_CAST = True 1129 1130 PREFIXED_PIVOT_COLUMNS = False 1131 IDENTIFY_PIVOT_STRINGS = False 1132 1133 LOG_DEFAULTS_TO_LN = False 1134 1135 # Whether ADD is present for each column added by ALTER TABLE 1136 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1137 1138 # Whether the table sample clause expects CSV syntax 1139 TABLESAMPLE_CSV = False 1140 1141 # The default method used for table sampling 1142 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1143 1144 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1145 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1146 1147 # Whether the TRIM function expects the characters to trim as its first argument 1148 TRIM_PATTERN_FIRST = False 1149 1150 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1151 STRING_ALIASES = False 1152 1153 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1154 MODIFIERS_ATTACHED_TO_UNION = True 1155 UNION_MODIFIERS = {"order", "limit", "offset"} 1156 1157 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1158 NO_PAREN_IF_COMMANDS = True 1159 1160 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1161 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1162 1163 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1164 # If this is True and '(' is not found, the keyword will be treated as an identifier 1165 VALUES_FOLLOWED_BY_PAREN = True 1166 1167 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1168 SUPPORTS_IMPLICIT_UNNEST = False 1169 1170 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1171 INTERVAL_SPANS = True 1172 1173 # Whether a PARTITION clause can follow a table reference 1174 SUPPORTS_PARTITION_SELECTION = False 1175 1176 __slots__ = ( 1177 "error_level", 1178 "error_message_context", 1179 "max_errors", 1180 "dialect", 1181 "sql", 1182 "errors", 1183 "_tokens", 1184 "_index", 1185 "_curr", 1186 "_next", 1187 "_prev", 1188 "_prev_comments", 1189 ) 1190 1191 # Autofilled 1192 SHOW_TRIE: t.Dict = {} 1193 SET_TRIE: t.Dict = {} 1194 1195 def __init__( 1196 self, 1197 error_level: t.Optional[ErrorLevel] = None, 1198 error_message_context: int = 100, 1199 max_errors: int = 3, 1200 dialect: DialectType = None, 1201 ): 1202 from sqlglot.dialects import Dialect 1203 1204 self.error_level = error_level or ErrorLevel.IMMEDIATE 1205 self.error_message_context = error_message_context 1206 self.max_errors = max_errors 1207 self.dialect = Dialect.get_or_raise(dialect) 1208 self.reset() 1209 1210 def reset(self): 1211 self.sql = "" 1212 self.errors = [] 1213 self._tokens = [] 1214 self._index = 0 1215 self._curr = None 1216 self._next = None 1217 self._prev = None 1218 self._prev_comments = None 1219 1220 def parse( 1221 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1222 ) -> t.List[t.Optional[exp.Expression]]: 1223 """ 1224 Parses a list of tokens and returns a list of syntax trees, one tree 1225 per parsed SQL statement. 1226 1227 Args: 1228 raw_tokens: The list of tokens. 1229 sql: The original SQL string, used to produce helpful debug messages. 1230 1231 Returns: 1232 The list of the produced syntax trees. 1233 """ 1234 return self._parse( 1235 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1236 ) 1237 1238 def parse_into( 1239 self, 1240 expression_types: exp.IntoType, 1241 raw_tokens: t.List[Token], 1242 sql: t.Optional[str] = None, 1243 ) -> t.List[t.Optional[exp.Expression]]: 1244 """ 1245 Parses a list of tokens into a given Expression type. If a collection of Expression 1246 types is given instead, this method will try to parse the token list into each one 1247 of them, stopping at the first for which the parsing succeeds. 1248 1249 Args: 1250 expression_types: The expression type(s) to try and parse the token list into. 1251 raw_tokens: The list of tokens. 1252 sql: The original SQL string, used to produce helpful debug messages. 1253 1254 Returns: 1255 The target Expression. 1256 """ 1257 errors = [] 1258 for expression_type in ensure_list(expression_types): 1259 parser = self.EXPRESSION_PARSERS.get(expression_type) 1260 if not parser: 1261 raise TypeError(f"No parser registered for {expression_type}") 1262 1263 try: 1264 return self._parse(parser, raw_tokens, sql) 1265 except ParseError as e: 1266 e.errors[0]["into_expression"] = expression_type 1267 errors.append(e) 1268 1269 raise ParseError( 1270 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1271 errors=merge_errors(errors), 1272 ) from errors[-1] 1273 1274 def _parse( 1275 self, 1276 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1277 raw_tokens: t.List[Token], 1278 sql: t.Optional[str] = None, 1279 ) -> t.List[t.Optional[exp.Expression]]: 1280 self.reset() 1281 self.sql = sql or "" 1282 1283 total = len(raw_tokens) 1284 chunks: t.List[t.List[Token]] = [[]] 1285 1286 for i, token in enumerate(raw_tokens): 1287 if token.token_type == TokenType.SEMICOLON: 1288 if token.comments: 1289 chunks.append([token]) 1290 1291 if i < total - 1: 1292 chunks.append([]) 1293 else: 1294 chunks[-1].append(token) 1295 1296 expressions = [] 1297 1298 for tokens in chunks: 1299 self._index = -1 1300 self._tokens = tokens 1301 self._advance() 1302 1303 expressions.append(parse_method(self)) 1304 1305 if self._index < len(self._tokens): 1306 self.raise_error("Invalid expression / Unexpected token") 1307 1308 self.check_errors() 1309 1310 return expressions 1311 1312 def check_errors(self) -> None: 1313 """Logs or raises any found errors, depending on the chosen error level setting.""" 1314 if self.error_level == ErrorLevel.WARN: 1315 for error in self.errors: 1316 logger.error(str(error)) 1317 elif self.error_level == ErrorLevel.RAISE and self.errors: 1318 raise ParseError( 1319 concat_messages(self.errors, self.max_errors), 1320 errors=merge_errors(self.errors), 1321 ) 1322 1323 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1324 """ 1325 Appends an error in the list of recorded errors or raises it, depending on the chosen 1326 error level setting. 1327 """ 1328 token = token or self._curr or self._prev or Token.string("") 1329 start = token.start 1330 end = token.end + 1 1331 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1332 highlight = self.sql[start:end] 1333 end_context = self.sql[end : end + self.error_message_context] 1334 1335 error = ParseError.new( 1336 f"{message}. Line {token.line}, Col: {token.col}.\n" 1337 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1338 description=message, 1339 line=token.line, 1340 col=token.col, 1341 start_context=start_context, 1342 highlight=highlight, 1343 end_context=end_context, 1344 ) 1345 1346 if self.error_level == ErrorLevel.IMMEDIATE: 1347 raise error 1348 1349 self.errors.append(error) 1350 1351 def expression( 1352 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1353 ) -> E: 1354 """ 1355 Creates a new, validated Expression. 1356 1357 Args: 1358 exp_class: The expression class to instantiate. 1359 comments: An optional list of comments to attach to the expression. 1360 kwargs: The arguments to set for the expression along with their respective values. 1361 1362 Returns: 1363 The target expression. 1364 """ 1365 instance = exp_class(**kwargs) 1366 instance.add_comments(comments) if comments else self._add_comments(instance) 1367 return self.validate_expression(instance) 1368 1369 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1370 if expression and self._prev_comments: 1371 expression.add_comments(self._prev_comments) 1372 self._prev_comments = None 1373 1374 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1375 """ 1376 Validates an Expression, making sure that all its mandatory arguments are set. 1377 1378 Args: 1379 expression: The expression to validate. 1380 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1381 1382 Returns: 1383 The validated expression. 1384 """ 1385 if self.error_level != ErrorLevel.IGNORE: 1386 for error_message in expression.error_messages(args): 1387 self.raise_error(error_message) 1388 1389 return expression 1390 1391 def _find_sql(self, start: Token, end: Token) -> str: 1392 return self.sql[start.start : end.end + 1] 1393 1394 def _is_connected(self) -> bool: 1395 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1396 1397 def _advance(self, times: int = 1) -> None: 1398 self._index += times 1399 self._curr = seq_get(self._tokens, self._index) 1400 self._next = seq_get(self._tokens, self._index + 1) 1401 1402 if self._index > 0: 1403 self._prev = self._tokens[self._index - 1] 1404 self._prev_comments = self._prev.comments 1405 else: 1406 self._prev = None 1407 self._prev_comments = None 1408 1409 def _retreat(self, index: int) -> None: 1410 if index != self._index: 1411 self._advance(index - self._index) 1412 1413 def _warn_unsupported(self) -> None: 1414 if len(self._tokens) <= 1: 1415 return 1416 1417 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1418 # interested in emitting a warning for the one being currently processed. 1419 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1420 1421 logger.warning( 1422 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1423 ) 1424 1425 def _parse_command(self) -> exp.Command: 1426 self._warn_unsupported() 1427 return self.expression( 1428 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1429 ) 1430 1431 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1432 """ 1433 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1434 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1435 the parser state accordingly 1436 """ 1437 index = self._index 1438 error_level = self.error_level 1439 1440 self.error_level = ErrorLevel.IMMEDIATE 1441 try: 1442 this = parse_method() 1443 except ParseError: 1444 this = None 1445 finally: 1446 if not this or retreat: 1447 self._retreat(index) 1448 self.error_level = error_level 1449 1450 return this 1451 1452 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1453 start = self._prev 1454 exists = self._parse_exists() if allow_exists else None 1455 1456 self._match(TokenType.ON) 1457 1458 materialized = self._match_text_seq("MATERIALIZED") 1459 kind = self._match_set(self.CREATABLES) and self._prev 1460 if not kind: 1461 return self._parse_as_command(start) 1462 1463 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1464 this = self._parse_user_defined_function(kind=kind.token_type) 1465 elif kind.token_type == TokenType.TABLE: 1466 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1467 elif kind.token_type == TokenType.COLUMN: 1468 this = self._parse_column() 1469 else: 1470 this = self._parse_id_var() 1471 1472 self._match(TokenType.IS) 1473 1474 return self.expression( 1475 exp.Comment, 1476 this=this, 1477 kind=kind.text, 1478 expression=self._parse_string(), 1479 exists=exists, 1480 materialized=materialized, 1481 ) 1482 1483 def _parse_to_table( 1484 self, 1485 ) -> exp.ToTableProperty: 1486 table = self._parse_table_parts(schema=True) 1487 return self.expression(exp.ToTableProperty, this=table) 1488 1489 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1490 def _parse_ttl(self) -> exp.Expression: 1491 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1492 this = self._parse_bitwise() 1493 1494 if self._match_text_seq("DELETE"): 1495 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1496 if self._match_text_seq("RECOMPRESS"): 1497 return self.expression( 1498 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1499 ) 1500 if self._match_text_seq("TO", "DISK"): 1501 return self.expression( 1502 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1503 ) 1504 if self._match_text_seq("TO", "VOLUME"): 1505 return self.expression( 1506 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1507 ) 1508 1509 return this 1510 1511 expressions = self._parse_csv(_parse_ttl_action) 1512 where = self._parse_where() 1513 group = self._parse_group() 1514 1515 aggregates = None 1516 if group and self._match(TokenType.SET): 1517 aggregates = self._parse_csv(self._parse_set_item) 1518 1519 return self.expression( 1520 exp.MergeTreeTTL, 1521 expressions=expressions, 1522 where=where, 1523 group=group, 1524 aggregates=aggregates, 1525 ) 1526 1527 def _parse_statement(self) -> t.Optional[exp.Expression]: 1528 if self._curr is None: 1529 return None 1530 1531 if self._match_set(self.STATEMENT_PARSERS): 1532 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1533 1534 if self._match_set(self.dialect.tokenizer.COMMANDS): 1535 return self._parse_command() 1536 1537 expression = self._parse_expression() 1538 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1539 return self._parse_query_modifiers(expression) 1540 1541 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1542 start = self._prev 1543 temporary = self._match(TokenType.TEMPORARY) 1544 materialized = self._match_text_seq("MATERIALIZED") 1545 1546 kind = self._match_set(self.CREATABLES) and self._prev.text 1547 if not kind: 1548 return self._parse_as_command(start) 1549 1550 if_exists = exists or self._parse_exists() 1551 table = self._parse_table_parts( 1552 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1553 ) 1554 1555 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1556 1557 if self._match(TokenType.L_PAREN, advance=False): 1558 expressions = self._parse_wrapped_csv(self._parse_types) 1559 else: 1560 expressions = None 1561 1562 return self.expression( 1563 exp.Drop, 1564 comments=start.comments, 1565 exists=if_exists, 1566 this=table, 1567 expressions=expressions, 1568 kind=kind.upper(), 1569 temporary=temporary, 1570 materialized=materialized, 1571 cascade=self._match_text_seq("CASCADE"), 1572 constraints=self._match_text_seq("CONSTRAINTS"), 1573 purge=self._match_text_seq("PURGE"), 1574 cluster=cluster, 1575 ) 1576 1577 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1578 return ( 1579 self._match_text_seq("IF") 1580 and (not not_ or self._match(TokenType.NOT)) 1581 and self._match(TokenType.EXISTS) 1582 ) 1583 1584 def _parse_create(self) -> exp.Create | exp.Command: 1585 # Note: this can't be None because we've matched a statement parser 1586 start = self._prev 1587 comments = self._prev_comments 1588 1589 replace = ( 1590 start.token_type == TokenType.REPLACE 1591 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1592 or self._match_pair(TokenType.OR, TokenType.ALTER) 1593 ) 1594 1595 unique = self._match(TokenType.UNIQUE) 1596 1597 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1598 self._advance() 1599 1600 properties = None 1601 create_token = self._match_set(self.CREATABLES) and self._prev 1602 1603 if not create_token: 1604 # exp.Properties.Location.POST_CREATE 1605 properties = self._parse_properties() 1606 create_token = self._match_set(self.CREATABLES) and self._prev 1607 1608 if not properties or not create_token: 1609 return self._parse_as_command(start) 1610 1611 exists = self._parse_exists(not_=True) 1612 this = None 1613 expression: t.Optional[exp.Expression] = None 1614 indexes = None 1615 no_schema_binding = None 1616 begin = None 1617 end = None 1618 clone = None 1619 1620 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1621 nonlocal properties 1622 if properties and temp_props: 1623 properties.expressions.extend(temp_props.expressions) 1624 elif temp_props: 1625 properties = temp_props 1626 1627 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1628 this = self._parse_user_defined_function(kind=create_token.token_type) 1629 1630 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1631 extend_props(self._parse_properties()) 1632 1633 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1634 1635 if not expression: 1636 if self._match(TokenType.COMMAND): 1637 expression = self._parse_as_command(self._prev) 1638 else: 1639 begin = self._match(TokenType.BEGIN) 1640 return_ = self._match_text_seq("RETURN") 1641 1642 if self._match(TokenType.STRING, advance=False): 1643 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1644 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1645 expression = self._parse_string() 1646 extend_props(self._parse_properties()) 1647 else: 1648 expression = self._parse_statement() 1649 1650 end = self._match_text_seq("END") 1651 1652 if return_: 1653 expression = self.expression(exp.Return, this=expression) 1654 elif create_token.token_type == TokenType.INDEX: 1655 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1656 if not self._match(TokenType.ON): 1657 index = self._parse_id_var() 1658 anonymous = False 1659 else: 1660 index = None 1661 anonymous = True 1662 1663 this = self._parse_index(index=index, anonymous=anonymous) 1664 elif create_token.token_type in self.DB_CREATABLES: 1665 table_parts = self._parse_table_parts( 1666 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1667 ) 1668 1669 # exp.Properties.Location.POST_NAME 1670 self._match(TokenType.COMMA) 1671 extend_props(self._parse_properties(before=True)) 1672 1673 this = self._parse_schema(this=table_parts) 1674 1675 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1676 extend_props(self._parse_properties()) 1677 1678 self._match(TokenType.ALIAS) 1679 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1680 # exp.Properties.Location.POST_ALIAS 1681 extend_props(self._parse_properties()) 1682 1683 if create_token.token_type == TokenType.SEQUENCE: 1684 expression = self._parse_types() 1685 extend_props(self._parse_properties()) 1686 else: 1687 expression = self._parse_ddl_select() 1688 1689 if create_token.token_type == TokenType.TABLE: 1690 # exp.Properties.Location.POST_EXPRESSION 1691 extend_props(self._parse_properties()) 1692 1693 indexes = [] 1694 while True: 1695 index = self._parse_index() 1696 1697 # exp.Properties.Location.POST_INDEX 1698 extend_props(self._parse_properties()) 1699 1700 if not index: 1701 break 1702 else: 1703 self._match(TokenType.COMMA) 1704 indexes.append(index) 1705 elif create_token.token_type == TokenType.VIEW: 1706 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1707 no_schema_binding = True 1708 1709 shallow = self._match_text_seq("SHALLOW") 1710 1711 if self._match_texts(self.CLONE_KEYWORDS): 1712 copy = self._prev.text.lower() == "copy" 1713 clone = self.expression( 1714 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1715 ) 1716 1717 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1718 return self._parse_as_command(start) 1719 1720 return self.expression( 1721 exp.Create, 1722 comments=comments, 1723 this=this, 1724 kind=create_token.text.upper(), 1725 replace=replace, 1726 unique=unique, 1727 expression=expression, 1728 exists=exists, 1729 properties=properties, 1730 indexes=indexes, 1731 no_schema_binding=no_schema_binding, 1732 begin=begin, 1733 end=end, 1734 clone=clone, 1735 ) 1736 1737 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1738 seq = exp.SequenceProperties() 1739 1740 options = [] 1741 index = self._index 1742 1743 while self._curr: 1744 self._match(TokenType.COMMA) 1745 if self._match_text_seq("INCREMENT"): 1746 self._match_text_seq("BY") 1747 self._match_text_seq("=") 1748 seq.set("increment", self._parse_term()) 1749 elif self._match_text_seq("MINVALUE"): 1750 seq.set("minvalue", self._parse_term()) 1751 elif self._match_text_seq("MAXVALUE"): 1752 seq.set("maxvalue", self._parse_term()) 1753 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1754 self._match_text_seq("=") 1755 seq.set("start", self._parse_term()) 1756 elif self._match_text_seq("CACHE"): 1757 # T-SQL allows empty CACHE which is initialized dynamically 1758 seq.set("cache", self._parse_number() or True) 1759 elif self._match_text_seq("OWNED", "BY"): 1760 # "OWNED BY NONE" is the default 1761 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1762 else: 1763 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1764 if opt: 1765 options.append(opt) 1766 else: 1767 break 1768 1769 seq.set("options", options if options else None) 1770 return None if self._index == index else seq 1771 1772 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1773 # only used for teradata currently 1774 self._match(TokenType.COMMA) 1775 1776 kwargs = { 1777 "no": self._match_text_seq("NO"), 1778 "dual": self._match_text_seq("DUAL"), 1779 "before": self._match_text_seq("BEFORE"), 1780 "default": self._match_text_seq("DEFAULT"), 1781 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1782 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1783 "after": self._match_text_seq("AFTER"), 1784 "minimum": self._match_texts(("MIN", "MINIMUM")), 1785 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1786 } 1787 1788 if self._match_texts(self.PROPERTY_PARSERS): 1789 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1790 try: 1791 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1792 except TypeError: 1793 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1794 1795 return None 1796 1797 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1798 return self._parse_wrapped_csv(self._parse_property) 1799 1800 def _parse_property(self) -> t.Optional[exp.Expression]: 1801 if self._match_texts(self.PROPERTY_PARSERS): 1802 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1803 1804 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1805 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1806 1807 if self._match_text_seq("COMPOUND", "SORTKEY"): 1808 return self._parse_sortkey(compound=True) 1809 1810 if self._match_text_seq("SQL", "SECURITY"): 1811 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1812 1813 index = self._index 1814 key = self._parse_column() 1815 1816 if not self._match(TokenType.EQ): 1817 self._retreat(index) 1818 return self._parse_sequence_properties() 1819 1820 return self.expression( 1821 exp.Property, 1822 this=key.to_dot() if isinstance(key, exp.Column) else key, 1823 value=self._parse_bitwise() or self._parse_var(any_token=True), 1824 ) 1825 1826 def _parse_stored(self) -> exp.FileFormatProperty: 1827 self._match(TokenType.ALIAS) 1828 1829 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1830 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1831 1832 return self.expression( 1833 exp.FileFormatProperty, 1834 this=( 1835 self.expression( 1836 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1837 ) 1838 if input_format or output_format 1839 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1840 ), 1841 ) 1842 1843 def _parse_unquoted_field(self): 1844 field = self._parse_field() 1845 if isinstance(field, exp.Identifier) and not field.quoted: 1846 field = exp.var(field) 1847 1848 return field 1849 1850 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1851 self._match(TokenType.EQ) 1852 self._match(TokenType.ALIAS) 1853 1854 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1855 1856 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1857 properties = [] 1858 while True: 1859 if before: 1860 prop = self._parse_property_before() 1861 else: 1862 prop = self._parse_property() 1863 if not prop: 1864 break 1865 for p in ensure_list(prop): 1866 properties.append(p) 1867 1868 if properties: 1869 return self.expression(exp.Properties, expressions=properties) 1870 1871 return None 1872 1873 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1874 return self.expression( 1875 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1876 ) 1877 1878 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1879 if self._index >= 2: 1880 pre_volatile_token = self._tokens[self._index - 2] 1881 else: 1882 pre_volatile_token = None 1883 1884 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1885 return exp.VolatileProperty() 1886 1887 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1888 1889 def _parse_retention_period(self) -> exp.Var: 1890 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1891 number = self._parse_number() 1892 number_str = f"{number} " if number else "" 1893 unit = self._parse_var(any_token=True) 1894 return exp.var(f"{number_str}{unit}") 1895 1896 def _parse_system_versioning_property( 1897 self, with_: bool = False 1898 ) -> exp.WithSystemVersioningProperty: 1899 self._match(TokenType.EQ) 1900 prop = self.expression( 1901 exp.WithSystemVersioningProperty, 1902 **{ # type: ignore 1903 "on": True, 1904 "with": with_, 1905 }, 1906 ) 1907 1908 if self._match_text_seq("OFF"): 1909 prop.set("on", False) 1910 return prop 1911 1912 self._match(TokenType.ON) 1913 if self._match(TokenType.L_PAREN): 1914 while self._curr and not self._match(TokenType.R_PAREN): 1915 if self._match_text_seq("HISTORY_TABLE", "="): 1916 prop.set("this", self._parse_table_parts()) 1917 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1918 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1919 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1920 prop.set("retention_period", self._parse_retention_period()) 1921 1922 self._match(TokenType.COMMA) 1923 1924 return prop 1925 1926 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1927 self._match(TokenType.EQ) 1928 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1929 prop = self.expression(exp.DataDeletionProperty, on=on) 1930 1931 if self._match(TokenType.L_PAREN): 1932 while self._curr and not self._match(TokenType.R_PAREN): 1933 if self._match_text_seq("FILTER_COLUMN", "="): 1934 prop.set("filter_column", self._parse_column()) 1935 elif self._match_text_seq("RETENTION_PERIOD", "="): 1936 prop.set("retention_period", self._parse_retention_period()) 1937 1938 self._match(TokenType.COMMA) 1939 1940 return prop 1941 1942 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1943 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1944 prop = self._parse_system_versioning_property(with_=True) 1945 self._match_r_paren() 1946 return prop 1947 1948 if self._match(TokenType.L_PAREN, advance=False): 1949 return self._parse_wrapped_properties() 1950 1951 if self._match_text_seq("JOURNAL"): 1952 return self._parse_withjournaltable() 1953 1954 if self._match_texts(self.VIEW_ATTRIBUTES): 1955 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1956 1957 if self._match_text_seq("DATA"): 1958 return self._parse_withdata(no=False) 1959 elif self._match_text_seq("NO", "DATA"): 1960 return self._parse_withdata(no=True) 1961 1962 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1963 return self._parse_serde_properties(with_=True) 1964 1965 if not self._next: 1966 return None 1967 1968 return self._parse_withisolatedloading() 1969 1970 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1971 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1972 self._match(TokenType.EQ) 1973 1974 user = self._parse_id_var() 1975 self._match(TokenType.PARAMETER) 1976 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1977 1978 if not user or not host: 1979 return None 1980 1981 return exp.DefinerProperty(this=f"{user}@{host}") 1982 1983 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1984 self._match(TokenType.TABLE) 1985 self._match(TokenType.EQ) 1986 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1987 1988 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1989 return self.expression(exp.LogProperty, no=no) 1990 1991 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1992 return self.expression(exp.JournalProperty, **kwargs) 1993 1994 def _parse_checksum(self) -> exp.ChecksumProperty: 1995 self._match(TokenType.EQ) 1996 1997 on = None 1998 if self._match(TokenType.ON): 1999 on = True 2000 elif self._match_text_seq("OFF"): 2001 on = False 2002 2003 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2004 2005 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2006 return self.expression( 2007 exp.Cluster, 2008 expressions=( 2009 self._parse_wrapped_csv(self._parse_ordered) 2010 if wrapped 2011 else self._parse_csv(self._parse_ordered) 2012 ), 2013 ) 2014 2015 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2016 self._match_text_seq("BY") 2017 2018 self._match_l_paren() 2019 expressions = self._parse_csv(self._parse_column) 2020 self._match_r_paren() 2021 2022 if self._match_text_seq("SORTED", "BY"): 2023 self._match_l_paren() 2024 sorted_by = self._parse_csv(self._parse_ordered) 2025 self._match_r_paren() 2026 else: 2027 sorted_by = None 2028 2029 self._match(TokenType.INTO) 2030 buckets = self._parse_number() 2031 self._match_text_seq("BUCKETS") 2032 2033 return self.expression( 2034 exp.ClusteredByProperty, 2035 expressions=expressions, 2036 sorted_by=sorted_by, 2037 buckets=buckets, 2038 ) 2039 2040 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2041 if not self._match_text_seq("GRANTS"): 2042 self._retreat(self._index - 1) 2043 return None 2044 2045 return self.expression(exp.CopyGrantsProperty) 2046 2047 def _parse_freespace(self) -> exp.FreespaceProperty: 2048 self._match(TokenType.EQ) 2049 return self.expression( 2050 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2051 ) 2052 2053 def _parse_mergeblockratio( 2054 self, no: bool = False, default: bool = False 2055 ) -> exp.MergeBlockRatioProperty: 2056 if self._match(TokenType.EQ): 2057 return self.expression( 2058 exp.MergeBlockRatioProperty, 2059 this=self._parse_number(), 2060 percent=self._match(TokenType.PERCENT), 2061 ) 2062 2063 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2064 2065 def _parse_datablocksize( 2066 self, 2067 default: t.Optional[bool] = None, 2068 minimum: t.Optional[bool] = None, 2069 maximum: t.Optional[bool] = None, 2070 ) -> exp.DataBlocksizeProperty: 2071 self._match(TokenType.EQ) 2072 size = self._parse_number() 2073 2074 units = None 2075 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2076 units = self._prev.text 2077 2078 return self.expression( 2079 exp.DataBlocksizeProperty, 2080 size=size, 2081 units=units, 2082 default=default, 2083 minimum=minimum, 2084 maximum=maximum, 2085 ) 2086 2087 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2088 self._match(TokenType.EQ) 2089 always = self._match_text_seq("ALWAYS") 2090 manual = self._match_text_seq("MANUAL") 2091 never = self._match_text_seq("NEVER") 2092 default = self._match_text_seq("DEFAULT") 2093 2094 autotemp = None 2095 if self._match_text_seq("AUTOTEMP"): 2096 autotemp = self._parse_schema() 2097 2098 return self.expression( 2099 exp.BlockCompressionProperty, 2100 always=always, 2101 manual=manual, 2102 never=never, 2103 default=default, 2104 autotemp=autotemp, 2105 ) 2106 2107 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2108 index = self._index 2109 no = self._match_text_seq("NO") 2110 concurrent = self._match_text_seq("CONCURRENT") 2111 2112 if not self._match_text_seq("ISOLATED", "LOADING"): 2113 self._retreat(index) 2114 return None 2115 2116 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2117 return self.expression( 2118 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2119 ) 2120 2121 def _parse_locking(self) -> exp.LockingProperty: 2122 if self._match(TokenType.TABLE): 2123 kind = "TABLE" 2124 elif self._match(TokenType.VIEW): 2125 kind = "VIEW" 2126 elif self._match(TokenType.ROW): 2127 kind = "ROW" 2128 elif self._match_text_seq("DATABASE"): 2129 kind = "DATABASE" 2130 else: 2131 kind = None 2132 2133 if kind in ("DATABASE", "TABLE", "VIEW"): 2134 this = self._parse_table_parts() 2135 else: 2136 this = None 2137 2138 if self._match(TokenType.FOR): 2139 for_or_in = "FOR" 2140 elif self._match(TokenType.IN): 2141 for_or_in = "IN" 2142 else: 2143 for_or_in = None 2144 2145 if self._match_text_seq("ACCESS"): 2146 lock_type = "ACCESS" 2147 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2148 lock_type = "EXCLUSIVE" 2149 elif self._match_text_seq("SHARE"): 2150 lock_type = "SHARE" 2151 elif self._match_text_seq("READ"): 2152 lock_type = "READ" 2153 elif self._match_text_seq("WRITE"): 2154 lock_type = "WRITE" 2155 elif self._match_text_seq("CHECKSUM"): 2156 lock_type = "CHECKSUM" 2157 else: 2158 lock_type = None 2159 2160 override = self._match_text_seq("OVERRIDE") 2161 2162 return self.expression( 2163 exp.LockingProperty, 2164 this=this, 2165 kind=kind, 2166 for_or_in=for_or_in, 2167 lock_type=lock_type, 2168 override=override, 2169 ) 2170 2171 def _parse_partition_by(self) -> t.List[exp.Expression]: 2172 if self._match(TokenType.PARTITION_BY): 2173 return self._parse_csv(self._parse_conjunction) 2174 return [] 2175 2176 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2177 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2178 if self._match_text_seq("MINVALUE"): 2179 return exp.var("MINVALUE") 2180 if self._match_text_seq("MAXVALUE"): 2181 return exp.var("MAXVALUE") 2182 return self._parse_bitwise() 2183 2184 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2185 expression = None 2186 from_expressions = None 2187 to_expressions = None 2188 2189 if self._match(TokenType.IN): 2190 this = self._parse_wrapped_csv(self._parse_bitwise) 2191 elif self._match(TokenType.FROM): 2192 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2193 self._match_text_seq("TO") 2194 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2195 elif self._match_text_seq("WITH", "(", "MODULUS"): 2196 this = self._parse_number() 2197 self._match_text_seq(",", "REMAINDER") 2198 expression = self._parse_number() 2199 self._match_r_paren() 2200 else: 2201 self.raise_error("Failed to parse partition bound spec.") 2202 2203 return self.expression( 2204 exp.PartitionBoundSpec, 2205 this=this, 2206 expression=expression, 2207 from_expressions=from_expressions, 2208 to_expressions=to_expressions, 2209 ) 2210 2211 # https://www.postgresql.org/docs/current/sql-createtable.html 2212 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2213 if not self._match_text_seq("OF"): 2214 self._retreat(self._index - 1) 2215 return None 2216 2217 this = self._parse_table(schema=True) 2218 2219 if self._match(TokenType.DEFAULT): 2220 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2221 elif self._match_text_seq("FOR", "VALUES"): 2222 expression = self._parse_partition_bound_spec() 2223 else: 2224 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2225 2226 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2227 2228 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2229 self._match(TokenType.EQ) 2230 return self.expression( 2231 exp.PartitionedByProperty, 2232 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2233 ) 2234 2235 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2236 if self._match_text_seq("AND", "STATISTICS"): 2237 statistics = True 2238 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2239 statistics = False 2240 else: 2241 statistics = None 2242 2243 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2244 2245 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2246 if self._match_text_seq("SQL"): 2247 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2248 return None 2249 2250 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2251 if self._match_text_seq("SQL", "DATA"): 2252 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2253 return None 2254 2255 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2256 if self._match_text_seq("PRIMARY", "INDEX"): 2257 return exp.NoPrimaryIndexProperty() 2258 if self._match_text_seq("SQL"): 2259 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2260 return None 2261 2262 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2263 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2264 return exp.OnCommitProperty() 2265 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2266 return exp.OnCommitProperty(delete=True) 2267 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2268 2269 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2270 if self._match_text_seq("SQL", "DATA"): 2271 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2272 return None 2273 2274 def _parse_distkey(self) -> exp.DistKeyProperty: 2275 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2276 2277 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2278 table = self._parse_table(schema=True) 2279 2280 options = [] 2281 while self._match_texts(("INCLUDING", "EXCLUDING")): 2282 this = self._prev.text.upper() 2283 2284 id_var = self._parse_id_var() 2285 if not id_var: 2286 return None 2287 2288 options.append( 2289 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2290 ) 2291 2292 return self.expression(exp.LikeProperty, this=table, expressions=options) 2293 2294 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2295 return self.expression( 2296 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2297 ) 2298 2299 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2300 self._match(TokenType.EQ) 2301 return self.expression( 2302 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2303 ) 2304 2305 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2306 self._match_text_seq("WITH", "CONNECTION") 2307 return self.expression( 2308 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2309 ) 2310 2311 def _parse_returns(self) -> exp.ReturnsProperty: 2312 value: t.Optional[exp.Expression] 2313 null = None 2314 is_table = self._match(TokenType.TABLE) 2315 2316 if is_table: 2317 if self._match(TokenType.LT): 2318 value = self.expression( 2319 exp.Schema, 2320 this="TABLE", 2321 expressions=self._parse_csv(self._parse_struct_types), 2322 ) 2323 if not self._match(TokenType.GT): 2324 self.raise_error("Expecting >") 2325 else: 2326 value = self._parse_schema(exp.var("TABLE")) 2327 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2328 null = True 2329 value = None 2330 else: 2331 value = self._parse_types() 2332 2333 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2334 2335 def _parse_describe(self) -> exp.Describe: 2336 kind = self._match_set(self.CREATABLES) and self._prev.text 2337 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2338 if self._match(TokenType.DOT): 2339 style = None 2340 self._retreat(self._index - 2) 2341 this = self._parse_table(schema=True) 2342 properties = self._parse_properties() 2343 expressions = properties.expressions if properties else None 2344 return self.expression( 2345 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2346 ) 2347 2348 def _parse_insert(self) -> exp.Insert: 2349 comments = ensure_list(self._prev_comments) 2350 hint = self._parse_hint() 2351 overwrite = self._match(TokenType.OVERWRITE) 2352 ignore = self._match(TokenType.IGNORE) 2353 local = self._match_text_seq("LOCAL") 2354 alternative = None 2355 is_function = None 2356 2357 if self._match_text_seq("DIRECTORY"): 2358 this: t.Optional[exp.Expression] = self.expression( 2359 exp.Directory, 2360 this=self._parse_var_or_string(), 2361 local=local, 2362 row_format=self._parse_row_format(match_row=True), 2363 ) 2364 else: 2365 if self._match(TokenType.OR): 2366 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2367 2368 self._match(TokenType.INTO) 2369 comments += ensure_list(self._prev_comments) 2370 self._match(TokenType.TABLE) 2371 is_function = self._match(TokenType.FUNCTION) 2372 2373 this = ( 2374 self._parse_table(schema=True, parse_partition=True) 2375 if not is_function 2376 else self._parse_function() 2377 ) 2378 2379 returning = self._parse_returning() 2380 2381 return self.expression( 2382 exp.Insert, 2383 comments=comments, 2384 hint=hint, 2385 is_function=is_function, 2386 this=this, 2387 stored=self._match_text_seq("STORED") and self._parse_stored(), 2388 by_name=self._match_text_seq("BY", "NAME"), 2389 exists=self._parse_exists(), 2390 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2391 and self._parse_conjunction(), 2392 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2393 conflict=self._parse_on_conflict(), 2394 returning=returning or self._parse_returning(), 2395 overwrite=overwrite, 2396 alternative=alternative, 2397 ignore=ignore, 2398 ) 2399 2400 def _parse_kill(self) -> exp.Kill: 2401 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2402 2403 return self.expression( 2404 exp.Kill, 2405 this=self._parse_primary(), 2406 kind=kind, 2407 ) 2408 2409 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2410 conflict = self._match_text_seq("ON", "CONFLICT") 2411 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2412 2413 if not conflict and not duplicate: 2414 return None 2415 2416 conflict_keys = None 2417 constraint = None 2418 2419 if conflict: 2420 if self._match_text_seq("ON", "CONSTRAINT"): 2421 constraint = self._parse_id_var() 2422 elif self._match(TokenType.L_PAREN): 2423 conflict_keys = self._parse_csv(self._parse_id_var) 2424 self._match_r_paren() 2425 2426 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2427 if self._prev.token_type == TokenType.UPDATE: 2428 self._match(TokenType.SET) 2429 expressions = self._parse_csv(self._parse_equality) 2430 else: 2431 expressions = None 2432 2433 return self.expression( 2434 exp.OnConflict, 2435 duplicate=duplicate, 2436 expressions=expressions, 2437 action=action, 2438 conflict_keys=conflict_keys, 2439 constraint=constraint, 2440 ) 2441 2442 def _parse_returning(self) -> t.Optional[exp.Returning]: 2443 if not self._match(TokenType.RETURNING): 2444 return None 2445 return self.expression( 2446 exp.Returning, 2447 expressions=self._parse_csv(self._parse_expression), 2448 into=self._match(TokenType.INTO) and self._parse_table_part(), 2449 ) 2450 2451 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2452 if not self._match(TokenType.FORMAT): 2453 return None 2454 return self._parse_row_format() 2455 2456 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2457 index = self._index 2458 with_ = with_ or self._match_text_seq("WITH") 2459 2460 if not self._match(TokenType.SERDE_PROPERTIES): 2461 self._retreat(index) 2462 return None 2463 return self.expression( 2464 exp.SerdeProperties, 2465 **{ # type: ignore 2466 "expressions": self._parse_wrapped_properties(), 2467 "with": with_, 2468 }, 2469 ) 2470 2471 def _parse_row_format( 2472 self, match_row: bool = False 2473 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2474 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2475 return None 2476 2477 if self._match_text_seq("SERDE"): 2478 this = self._parse_string() 2479 2480 serde_properties = self._parse_serde_properties() 2481 2482 return self.expression( 2483 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2484 ) 2485 2486 self._match_text_seq("DELIMITED") 2487 2488 kwargs = {} 2489 2490 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2491 kwargs["fields"] = self._parse_string() 2492 if self._match_text_seq("ESCAPED", "BY"): 2493 kwargs["escaped"] = self._parse_string() 2494 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2495 kwargs["collection_items"] = self._parse_string() 2496 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2497 kwargs["map_keys"] = self._parse_string() 2498 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2499 kwargs["lines"] = self._parse_string() 2500 if self._match_text_seq("NULL", "DEFINED", "AS"): 2501 kwargs["null"] = self._parse_string() 2502 2503 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2504 2505 def _parse_load(self) -> exp.LoadData | exp.Command: 2506 if self._match_text_seq("DATA"): 2507 local = self._match_text_seq("LOCAL") 2508 self._match_text_seq("INPATH") 2509 inpath = self._parse_string() 2510 overwrite = self._match(TokenType.OVERWRITE) 2511 self._match_pair(TokenType.INTO, TokenType.TABLE) 2512 2513 return self.expression( 2514 exp.LoadData, 2515 this=self._parse_table(schema=True), 2516 local=local, 2517 overwrite=overwrite, 2518 inpath=inpath, 2519 partition=self._parse_partition(), 2520 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2521 serde=self._match_text_seq("SERDE") and self._parse_string(), 2522 ) 2523 return self._parse_as_command(self._prev) 2524 2525 def _parse_delete(self) -> exp.Delete: 2526 # This handles MySQL's "Multiple-Table Syntax" 2527 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2528 tables = None 2529 comments = self._prev_comments 2530 if not self._match(TokenType.FROM, advance=False): 2531 tables = self._parse_csv(self._parse_table) or None 2532 2533 returning = self._parse_returning() 2534 2535 return self.expression( 2536 exp.Delete, 2537 comments=comments, 2538 tables=tables, 2539 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2540 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2541 where=self._parse_where(), 2542 returning=returning or self._parse_returning(), 2543 limit=self._parse_limit(), 2544 ) 2545 2546 def _parse_update(self) -> exp.Update: 2547 comments = self._prev_comments 2548 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2549 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2550 returning = self._parse_returning() 2551 return self.expression( 2552 exp.Update, 2553 comments=comments, 2554 **{ # type: ignore 2555 "this": this, 2556 "expressions": expressions, 2557 "from": self._parse_from(joins=True), 2558 "where": self._parse_where(), 2559 "returning": returning or self._parse_returning(), 2560 "order": self._parse_order(), 2561 "limit": self._parse_limit(), 2562 }, 2563 ) 2564 2565 def _parse_uncache(self) -> exp.Uncache: 2566 if not self._match(TokenType.TABLE): 2567 self.raise_error("Expecting TABLE after UNCACHE") 2568 2569 return self.expression( 2570 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2571 ) 2572 2573 def _parse_cache(self) -> exp.Cache: 2574 lazy = self._match_text_seq("LAZY") 2575 self._match(TokenType.TABLE) 2576 table = self._parse_table(schema=True) 2577 2578 options = [] 2579 if self._match_text_seq("OPTIONS"): 2580 self._match_l_paren() 2581 k = self._parse_string() 2582 self._match(TokenType.EQ) 2583 v = self._parse_string() 2584 options = [k, v] 2585 self._match_r_paren() 2586 2587 self._match(TokenType.ALIAS) 2588 return self.expression( 2589 exp.Cache, 2590 this=table, 2591 lazy=lazy, 2592 options=options, 2593 expression=self._parse_select(nested=True), 2594 ) 2595 2596 def _parse_partition(self) -> t.Optional[exp.Partition]: 2597 if not self._match(TokenType.PARTITION): 2598 return None 2599 2600 return self.expression( 2601 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2602 ) 2603 2604 def _parse_value(self) -> t.Optional[exp.Tuple]: 2605 if self._match(TokenType.L_PAREN): 2606 expressions = self._parse_csv(self._parse_expression) 2607 self._match_r_paren() 2608 return self.expression(exp.Tuple, expressions=expressions) 2609 2610 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2611 expression = self._parse_expression() 2612 if expression: 2613 return self.expression(exp.Tuple, expressions=[expression]) 2614 return None 2615 2616 def _parse_projections(self) -> t.List[exp.Expression]: 2617 return self._parse_expressions() 2618 2619 def _parse_select( 2620 self, 2621 nested: bool = False, 2622 table: bool = False, 2623 parse_subquery_alias: bool = True, 2624 parse_set_operation: bool = True, 2625 ) -> t.Optional[exp.Expression]: 2626 cte = self._parse_with() 2627 2628 if cte: 2629 this = self._parse_statement() 2630 2631 if not this: 2632 self.raise_error("Failed to parse any statement following CTE") 2633 return cte 2634 2635 if "with" in this.arg_types: 2636 this.set("with", cte) 2637 else: 2638 self.raise_error(f"{this.key} does not support CTE") 2639 this = cte 2640 2641 return this 2642 2643 # duckdb supports leading with FROM x 2644 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2645 2646 if self._match(TokenType.SELECT): 2647 comments = self._prev_comments 2648 2649 hint = self._parse_hint() 2650 all_ = self._match(TokenType.ALL) 2651 distinct = self._match_set(self.DISTINCT_TOKENS) 2652 2653 kind = ( 2654 self._match(TokenType.ALIAS) 2655 and self._match_texts(("STRUCT", "VALUE")) 2656 and self._prev.text.upper() 2657 ) 2658 2659 if distinct: 2660 distinct = self.expression( 2661 exp.Distinct, 2662 on=self._parse_value() if self._match(TokenType.ON) else None, 2663 ) 2664 2665 if all_ and distinct: 2666 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2667 2668 limit = self._parse_limit(top=True) 2669 projections = self._parse_projections() 2670 2671 this = self.expression( 2672 exp.Select, 2673 kind=kind, 2674 hint=hint, 2675 distinct=distinct, 2676 expressions=projections, 2677 limit=limit, 2678 ) 2679 this.comments = comments 2680 2681 into = self._parse_into() 2682 if into: 2683 this.set("into", into) 2684 2685 if not from_: 2686 from_ = self._parse_from() 2687 2688 if from_: 2689 this.set("from", from_) 2690 2691 this = self._parse_query_modifiers(this) 2692 elif (table or nested) and self._match(TokenType.L_PAREN): 2693 if self._match(TokenType.PIVOT): 2694 this = self._parse_simplified_pivot() 2695 elif self._match(TokenType.FROM): 2696 this = exp.select("*").from_( 2697 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2698 ) 2699 else: 2700 this = ( 2701 self._parse_table() 2702 if table 2703 else self._parse_select(nested=True, parse_set_operation=False) 2704 ) 2705 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2706 2707 self._match_r_paren() 2708 2709 # We return early here so that the UNION isn't attached to the subquery by the 2710 # following call to _parse_set_operations, but instead becomes the parent node 2711 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2712 elif self._match(TokenType.VALUES, advance=False): 2713 this = self._parse_derived_table_values() 2714 elif from_: 2715 this = exp.select("*").from_(from_.this, copy=False) 2716 else: 2717 this = None 2718 2719 if parse_set_operation: 2720 return self._parse_set_operations(this) 2721 return this 2722 2723 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2724 if not skip_with_token and not self._match(TokenType.WITH): 2725 return None 2726 2727 comments = self._prev_comments 2728 recursive = self._match(TokenType.RECURSIVE) 2729 2730 expressions = [] 2731 while True: 2732 expressions.append(self._parse_cte()) 2733 2734 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2735 break 2736 else: 2737 self._match(TokenType.WITH) 2738 2739 return self.expression( 2740 exp.With, comments=comments, expressions=expressions, recursive=recursive 2741 ) 2742 2743 def _parse_cte(self) -> exp.CTE: 2744 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2745 if not alias or not alias.this: 2746 self.raise_error("Expected CTE to have alias") 2747 2748 self._match(TokenType.ALIAS) 2749 2750 if self._match_text_seq("NOT", "MATERIALIZED"): 2751 materialized = False 2752 elif self._match_text_seq("MATERIALIZED"): 2753 materialized = True 2754 else: 2755 materialized = None 2756 2757 return self.expression( 2758 exp.CTE, 2759 this=self._parse_wrapped(self._parse_statement), 2760 alias=alias, 2761 materialized=materialized, 2762 ) 2763 2764 def _parse_table_alias( 2765 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2766 ) -> t.Optional[exp.TableAlias]: 2767 any_token = self._match(TokenType.ALIAS) 2768 alias = ( 2769 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2770 or self._parse_string_as_identifier() 2771 ) 2772 2773 index = self._index 2774 if self._match(TokenType.L_PAREN): 2775 columns = self._parse_csv(self._parse_function_parameter) 2776 self._match_r_paren() if columns else self._retreat(index) 2777 else: 2778 columns = None 2779 2780 if not alias and not columns: 2781 return None 2782 2783 return self.expression(exp.TableAlias, this=alias, columns=columns) 2784 2785 def _parse_subquery( 2786 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2787 ) -> t.Optional[exp.Subquery]: 2788 if not this: 2789 return None 2790 2791 return self.expression( 2792 exp.Subquery, 2793 this=this, 2794 pivots=self._parse_pivots(), 2795 alias=self._parse_table_alias() if parse_alias else None, 2796 ) 2797 2798 def _implicit_unnests_to_explicit(self, this: E) -> E: 2799 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2800 2801 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2802 for i, join in enumerate(this.args.get("joins") or []): 2803 table = join.this 2804 normalized_table = table.copy() 2805 normalized_table.meta["maybe_column"] = True 2806 normalized_table = _norm(normalized_table, dialect=self.dialect) 2807 2808 if isinstance(table, exp.Table) and not join.args.get("on"): 2809 if normalized_table.parts[0].name in refs: 2810 table_as_column = table.to_column() 2811 unnest = exp.Unnest(expressions=[table_as_column]) 2812 2813 # Table.to_column creates a parent Alias node that we want to convert to 2814 # a TableAlias and attach to the Unnest, so it matches the parser's output 2815 if isinstance(table.args.get("alias"), exp.TableAlias): 2816 table_as_column.replace(table_as_column.this) 2817 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2818 2819 table.replace(unnest) 2820 2821 refs.add(normalized_table.alias_or_name) 2822 2823 return this 2824 2825 def _parse_query_modifiers( 2826 self, this: t.Optional[exp.Expression] 2827 ) -> t.Optional[exp.Expression]: 2828 if isinstance(this, (exp.Query, exp.Table)): 2829 for join in self._parse_joins(): 2830 this.append("joins", join) 2831 for lateral in iter(self._parse_lateral, None): 2832 this.append("laterals", lateral) 2833 2834 while True: 2835 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2836 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2837 key, expression = parser(self) 2838 2839 if expression: 2840 this.set(key, expression) 2841 if key == "limit": 2842 offset = expression.args.pop("offset", None) 2843 2844 if offset: 2845 offset = exp.Offset(expression=offset) 2846 this.set("offset", offset) 2847 2848 limit_by_expressions = expression.expressions 2849 expression.set("expressions", None) 2850 offset.set("expressions", limit_by_expressions) 2851 continue 2852 break 2853 2854 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2855 this = self._implicit_unnests_to_explicit(this) 2856 2857 return this 2858 2859 def _parse_hint(self) -> t.Optional[exp.Hint]: 2860 if self._match(TokenType.HINT): 2861 hints = [] 2862 for hint in iter( 2863 lambda: self._parse_csv( 2864 lambda: self._parse_function() or self._parse_var(upper=True) 2865 ), 2866 [], 2867 ): 2868 hints.extend(hint) 2869 2870 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2871 self.raise_error("Expected */ after HINT") 2872 2873 return self.expression(exp.Hint, expressions=hints) 2874 2875 return None 2876 2877 def _parse_into(self) -> t.Optional[exp.Into]: 2878 if not self._match(TokenType.INTO): 2879 return None 2880 2881 temp = self._match(TokenType.TEMPORARY) 2882 unlogged = self._match_text_seq("UNLOGGED") 2883 self._match(TokenType.TABLE) 2884 2885 return self.expression( 2886 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2887 ) 2888 2889 def _parse_from( 2890 self, joins: bool = False, skip_from_token: bool = False 2891 ) -> t.Optional[exp.From]: 2892 if not skip_from_token and not self._match(TokenType.FROM): 2893 return None 2894 2895 return self.expression( 2896 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2897 ) 2898 2899 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2900 return self.expression( 2901 exp.MatchRecognizeMeasure, 2902 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2903 this=self._parse_expression(), 2904 ) 2905 2906 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2907 if not self._match(TokenType.MATCH_RECOGNIZE): 2908 return None 2909 2910 self._match_l_paren() 2911 2912 partition = self._parse_partition_by() 2913 order = self._parse_order() 2914 2915 measures = ( 2916 self._parse_csv(self._parse_match_recognize_measure) 2917 if self._match_text_seq("MEASURES") 2918 else None 2919 ) 2920 2921 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2922 rows = exp.var("ONE ROW PER MATCH") 2923 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2924 text = "ALL ROWS PER MATCH" 2925 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2926 text += " SHOW EMPTY MATCHES" 2927 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2928 text += " OMIT EMPTY MATCHES" 2929 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2930 text += " WITH UNMATCHED ROWS" 2931 rows = exp.var(text) 2932 else: 2933 rows = None 2934 2935 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2936 text = "AFTER MATCH SKIP" 2937 if self._match_text_seq("PAST", "LAST", "ROW"): 2938 text += " PAST LAST ROW" 2939 elif self._match_text_seq("TO", "NEXT", "ROW"): 2940 text += " TO NEXT ROW" 2941 elif self._match_text_seq("TO", "FIRST"): 2942 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2943 elif self._match_text_seq("TO", "LAST"): 2944 text += f" TO LAST {self._advance_any().text}" # type: ignore 2945 after = exp.var(text) 2946 else: 2947 after = None 2948 2949 if self._match_text_seq("PATTERN"): 2950 self._match_l_paren() 2951 2952 if not self._curr: 2953 self.raise_error("Expecting )", self._curr) 2954 2955 paren = 1 2956 start = self._curr 2957 2958 while self._curr and paren > 0: 2959 if self._curr.token_type == TokenType.L_PAREN: 2960 paren += 1 2961 if self._curr.token_type == TokenType.R_PAREN: 2962 paren -= 1 2963 2964 end = self._prev 2965 self._advance() 2966 2967 if paren > 0: 2968 self.raise_error("Expecting )", self._curr) 2969 2970 pattern = exp.var(self._find_sql(start, end)) 2971 else: 2972 pattern = None 2973 2974 define = ( 2975 self._parse_csv(self._parse_name_as_expression) 2976 if self._match_text_seq("DEFINE") 2977 else None 2978 ) 2979 2980 self._match_r_paren() 2981 2982 return self.expression( 2983 exp.MatchRecognize, 2984 partition_by=partition, 2985 order=order, 2986 measures=measures, 2987 rows=rows, 2988 after=after, 2989 pattern=pattern, 2990 define=define, 2991 alias=self._parse_table_alias(), 2992 ) 2993 2994 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2995 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2996 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2997 cross_apply = False 2998 2999 if cross_apply is not None: 3000 this = self._parse_select(table=True) 3001 view = None 3002 outer = None 3003 elif self._match(TokenType.LATERAL): 3004 this = self._parse_select(table=True) 3005 view = self._match(TokenType.VIEW) 3006 outer = self._match(TokenType.OUTER) 3007 else: 3008 return None 3009 3010 if not this: 3011 this = ( 3012 self._parse_unnest() 3013 or self._parse_function() 3014 or self._parse_id_var(any_token=False) 3015 ) 3016 3017 while self._match(TokenType.DOT): 3018 this = exp.Dot( 3019 this=this, 3020 expression=self._parse_function() or self._parse_id_var(any_token=False), 3021 ) 3022 3023 if view: 3024 table = self._parse_id_var(any_token=False) 3025 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3026 table_alias: t.Optional[exp.TableAlias] = self.expression( 3027 exp.TableAlias, this=table, columns=columns 3028 ) 3029 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3030 # We move the alias from the lateral's child node to the lateral itself 3031 table_alias = this.args["alias"].pop() 3032 else: 3033 table_alias = self._parse_table_alias() 3034 3035 return self.expression( 3036 exp.Lateral, 3037 this=this, 3038 view=view, 3039 outer=outer, 3040 alias=table_alias, 3041 cross_apply=cross_apply, 3042 ) 3043 3044 def _parse_join_parts( 3045 self, 3046 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3047 return ( 3048 self._match_set(self.JOIN_METHODS) and self._prev, 3049 self._match_set(self.JOIN_SIDES) and self._prev, 3050 self._match_set(self.JOIN_KINDS) and self._prev, 3051 ) 3052 3053 def _parse_join( 3054 self, skip_join_token: bool = False, parse_bracket: bool = False 3055 ) -> t.Optional[exp.Join]: 3056 if self._match(TokenType.COMMA): 3057 return self.expression(exp.Join, this=self._parse_table()) 3058 3059 index = self._index 3060 method, side, kind = self._parse_join_parts() 3061 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3062 join = self._match(TokenType.JOIN) 3063 3064 if not skip_join_token and not join: 3065 self._retreat(index) 3066 kind = None 3067 method = None 3068 side = None 3069 3070 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3071 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3072 3073 if not skip_join_token and not join and not outer_apply and not cross_apply: 3074 return None 3075 3076 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3077 3078 if method: 3079 kwargs["method"] = method.text 3080 if side: 3081 kwargs["side"] = side.text 3082 if kind: 3083 kwargs["kind"] = kind.text 3084 if hint: 3085 kwargs["hint"] = hint 3086 3087 if self._match(TokenType.MATCH_CONDITION): 3088 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3089 3090 if self._match(TokenType.ON): 3091 kwargs["on"] = self._parse_conjunction() 3092 elif self._match(TokenType.USING): 3093 kwargs["using"] = self._parse_wrapped_id_vars() 3094 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3095 kind and kind.token_type == TokenType.CROSS 3096 ): 3097 index = self._index 3098 joins: t.Optional[list] = list(self._parse_joins()) 3099 3100 if joins and self._match(TokenType.ON): 3101 kwargs["on"] = self._parse_conjunction() 3102 elif joins and self._match(TokenType.USING): 3103 kwargs["using"] = self._parse_wrapped_id_vars() 3104 else: 3105 joins = None 3106 self._retreat(index) 3107 3108 kwargs["this"].set("joins", joins if joins else None) 3109 3110 comments = [c for token in (method, side, kind) if token for c in token.comments] 3111 return self.expression(exp.Join, comments=comments, **kwargs) 3112 3113 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3114 this = self._parse_conjunction() 3115 3116 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3117 return this 3118 3119 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3120 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3121 3122 return this 3123 3124 def _parse_index_params(self) -> exp.IndexParameters: 3125 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3126 3127 if self._match(TokenType.L_PAREN, advance=False): 3128 columns = self._parse_wrapped_csv(self._parse_with_operator) 3129 else: 3130 columns = None 3131 3132 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3133 partition_by = self._parse_partition_by() 3134 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3135 tablespace = ( 3136 self._parse_var(any_token=True) 3137 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3138 else None 3139 ) 3140 where = self._parse_where() 3141 3142 return self.expression( 3143 exp.IndexParameters, 3144 using=using, 3145 columns=columns, 3146 include=include, 3147 partition_by=partition_by, 3148 where=where, 3149 with_storage=with_storage, 3150 tablespace=tablespace, 3151 ) 3152 3153 def _parse_index( 3154 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3155 ) -> t.Optional[exp.Index]: 3156 if index or anonymous: 3157 unique = None 3158 primary = None 3159 amp = None 3160 3161 self._match(TokenType.ON) 3162 self._match(TokenType.TABLE) # hive 3163 table = self._parse_table_parts(schema=True) 3164 else: 3165 unique = self._match(TokenType.UNIQUE) 3166 primary = self._match_text_seq("PRIMARY") 3167 amp = self._match_text_seq("AMP") 3168 3169 if not self._match(TokenType.INDEX): 3170 return None 3171 3172 index = self._parse_id_var() 3173 table = None 3174 3175 params = self._parse_index_params() 3176 3177 return self.expression( 3178 exp.Index, 3179 this=index, 3180 table=table, 3181 unique=unique, 3182 primary=primary, 3183 amp=amp, 3184 params=params, 3185 ) 3186 3187 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3188 hints: t.List[exp.Expression] = [] 3189 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3190 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3191 hints.append( 3192 self.expression( 3193 exp.WithTableHint, 3194 expressions=self._parse_csv( 3195 lambda: self._parse_function() or self._parse_var(any_token=True) 3196 ), 3197 ) 3198 ) 3199 self._match_r_paren() 3200 else: 3201 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3202 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3203 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3204 3205 self._match_texts(("INDEX", "KEY")) 3206 if self._match(TokenType.FOR): 3207 hint.set("target", self._advance_any() and self._prev.text.upper()) 3208 3209 hint.set("expressions", self._parse_wrapped_id_vars()) 3210 hints.append(hint) 3211 3212 return hints or None 3213 3214 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3215 return ( 3216 (not schema and self._parse_function(optional_parens=False)) 3217 or self._parse_id_var(any_token=False) 3218 or self._parse_string_as_identifier() 3219 or self._parse_placeholder() 3220 ) 3221 3222 def _parse_table_parts( 3223 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3224 ) -> exp.Table: 3225 catalog = None 3226 db = None 3227 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3228 3229 while self._match(TokenType.DOT): 3230 if catalog: 3231 # This allows nesting the table in arbitrarily many dot expressions if needed 3232 table = self.expression( 3233 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3234 ) 3235 else: 3236 catalog = db 3237 db = table 3238 # "" used for tsql FROM a..b case 3239 table = self._parse_table_part(schema=schema) or "" 3240 3241 if ( 3242 wildcard 3243 and self._is_connected() 3244 and (isinstance(table, exp.Identifier) or not table) 3245 and self._match(TokenType.STAR) 3246 ): 3247 if isinstance(table, exp.Identifier): 3248 table.args["this"] += "*" 3249 else: 3250 table = exp.Identifier(this="*") 3251 3252 # We bubble up comments from the Identifier to the Table 3253 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3254 3255 if is_db_reference: 3256 catalog = db 3257 db = table 3258 table = None 3259 3260 if not table and not is_db_reference: 3261 self.raise_error(f"Expected table name but got {self._curr}") 3262 if not db and is_db_reference: 3263 self.raise_error(f"Expected database name but got {self._curr}") 3264 3265 return self.expression( 3266 exp.Table, 3267 comments=comments, 3268 this=table, 3269 db=db, 3270 catalog=catalog, 3271 pivots=self._parse_pivots(), 3272 ) 3273 3274 def _parse_table( 3275 self, 3276 schema: bool = False, 3277 joins: bool = False, 3278 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3279 parse_bracket: bool = False, 3280 is_db_reference: bool = False, 3281 parse_partition: bool = False, 3282 ) -> t.Optional[exp.Expression]: 3283 lateral = self._parse_lateral() 3284 if lateral: 3285 return lateral 3286 3287 unnest = self._parse_unnest() 3288 if unnest: 3289 return unnest 3290 3291 values = self._parse_derived_table_values() 3292 if values: 3293 return values 3294 3295 subquery = self._parse_select(table=True) 3296 if subquery: 3297 if not subquery.args.get("pivots"): 3298 subquery.set("pivots", self._parse_pivots()) 3299 return subquery 3300 3301 bracket = parse_bracket and self._parse_bracket(None) 3302 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3303 3304 only = self._match(TokenType.ONLY) 3305 3306 this = t.cast( 3307 exp.Expression, 3308 bracket 3309 or self._parse_bracket( 3310 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3311 ), 3312 ) 3313 3314 if only: 3315 this.set("only", only) 3316 3317 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3318 self._match_text_seq("*") 3319 3320 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3321 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3322 this.set("partition", self._parse_partition()) 3323 3324 if schema: 3325 return self._parse_schema(this=this) 3326 3327 version = self._parse_version() 3328 3329 if version: 3330 this.set("version", version) 3331 3332 if self.dialect.ALIAS_POST_TABLESAMPLE: 3333 table_sample = self._parse_table_sample() 3334 3335 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3336 if alias: 3337 this.set("alias", alias) 3338 3339 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3340 return self.expression( 3341 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3342 ) 3343 3344 this.set("hints", self._parse_table_hints()) 3345 3346 if not this.args.get("pivots"): 3347 this.set("pivots", self._parse_pivots()) 3348 3349 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3350 table_sample = self._parse_table_sample() 3351 3352 if table_sample: 3353 table_sample.set("this", this) 3354 this = table_sample 3355 3356 if joins: 3357 for join in self._parse_joins(): 3358 this.append("joins", join) 3359 3360 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3361 this.set("ordinality", True) 3362 this.set("alias", self._parse_table_alias()) 3363 3364 return this 3365 3366 def _parse_version(self) -> t.Optional[exp.Version]: 3367 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3368 this = "TIMESTAMP" 3369 elif self._match(TokenType.VERSION_SNAPSHOT): 3370 this = "VERSION" 3371 else: 3372 return None 3373 3374 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3375 kind = self._prev.text.upper() 3376 start = self._parse_bitwise() 3377 self._match_texts(("TO", "AND")) 3378 end = self._parse_bitwise() 3379 expression: t.Optional[exp.Expression] = self.expression( 3380 exp.Tuple, expressions=[start, end] 3381 ) 3382 elif self._match_text_seq("CONTAINED", "IN"): 3383 kind = "CONTAINED IN" 3384 expression = self.expression( 3385 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3386 ) 3387 elif self._match(TokenType.ALL): 3388 kind = "ALL" 3389 expression = None 3390 else: 3391 self._match_text_seq("AS", "OF") 3392 kind = "AS OF" 3393 expression = self._parse_type() 3394 3395 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3396 3397 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3398 if not self._match(TokenType.UNNEST): 3399 return None 3400 3401 expressions = self._parse_wrapped_csv(self._parse_equality) 3402 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3403 3404 alias = self._parse_table_alias() if with_alias else None 3405 3406 if alias: 3407 if self.dialect.UNNEST_COLUMN_ONLY: 3408 if alias.args.get("columns"): 3409 self.raise_error("Unexpected extra column alias in unnest.") 3410 3411 alias.set("columns", [alias.this]) 3412 alias.set("this", None) 3413 3414 columns = alias.args.get("columns") or [] 3415 if offset and len(expressions) < len(columns): 3416 offset = columns.pop() 3417 3418 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3419 self._match(TokenType.ALIAS) 3420 offset = self._parse_id_var( 3421 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3422 ) or exp.to_identifier("offset") 3423 3424 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3425 3426 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3427 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3428 if not is_derived and not self._match_text_seq("VALUES"): 3429 return None 3430 3431 expressions = self._parse_csv(self._parse_value) 3432 alias = self._parse_table_alias() 3433 3434 if is_derived: 3435 self._match_r_paren() 3436 3437 return self.expression( 3438 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3439 ) 3440 3441 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3442 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3443 as_modifier and self._match_text_seq("USING", "SAMPLE") 3444 ): 3445 return None 3446 3447 bucket_numerator = None 3448 bucket_denominator = None 3449 bucket_field = None 3450 percent = None 3451 size = None 3452 seed = None 3453 3454 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3455 matched_l_paren = self._match(TokenType.L_PAREN) 3456 3457 if self.TABLESAMPLE_CSV: 3458 num = None 3459 expressions = self._parse_csv(self._parse_primary) 3460 else: 3461 expressions = None 3462 num = ( 3463 self._parse_factor() 3464 if self._match(TokenType.NUMBER, advance=False) 3465 else self._parse_primary() or self._parse_placeholder() 3466 ) 3467 3468 if self._match_text_seq("BUCKET"): 3469 bucket_numerator = self._parse_number() 3470 self._match_text_seq("OUT", "OF") 3471 bucket_denominator = bucket_denominator = self._parse_number() 3472 self._match(TokenType.ON) 3473 bucket_field = self._parse_field() 3474 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3475 percent = num 3476 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3477 size = num 3478 else: 3479 percent = num 3480 3481 if matched_l_paren: 3482 self._match_r_paren() 3483 3484 if self._match(TokenType.L_PAREN): 3485 method = self._parse_var(upper=True) 3486 seed = self._match(TokenType.COMMA) and self._parse_number() 3487 self._match_r_paren() 3488 elif self._match_texts(("SEED", "REPEATABLE")): 3489 seed = self._parse_wrapped(self._parse_number) 3490 3491 if not method and self.DEFAULT_SAMPLING_METHOD: 3492 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3493 3494 return self.expression( 3495 exp.TableSample, 3496 expressions=expressions, 3497 method=method, 3498 bucket_numerator=bucket_numerator, 3499 bucket_denominator=bucket_denominator, 3500 bucket_field=bucket_field, 3501 percent=percent, 3502 size=size, 3503 seed=seed, 3504 ) 3505 3506 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3507 return list(iter(self._parse_pivot, None)) or None 3508 3509 def _parse_joins(self) -> t.Iterator[exp.Join]: 3510 return iter(self._parse_join, None) 3511 3512 # https://duckdb.org/docs/sql/statements/pivot 3513 def _parse_simplified_pivot(self) -> exp.Pivot: 3514 def _parse_on() -> t.Optional[exp.Expression]: 3515 this = self._parse_bitwise() 3516 return self._parse_in(this) if self._match(TokenType.IN) else this 3517 3518 this = self._parse_table() 3519 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3520 using = self._match(TokenType.USING) and self._parse_csv( 3521 lambda: self._parse_alias(self._parse_function()) 3522 ) 3523 group = self._parse_group() 3524 return self.expression( 3525 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3526 ) 3527 3528 def _parse_pivot_in(self) -> exp.In: 3529 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3530 this = self._parse_conjunction() 3531 3532 self._match(TokenType.ALIAS) 3533 alias = self._parse_field() 3534 if alias: 3535 return self.expression(exp.PivotAlias, this=this, alias=alias) 3536 3537 return this 3538 3539 value = self._parse_column() 3540 3541 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3542 self.raise_error("Expecting IN (") 3543 3544 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3545 3546 self._match_r_paren() 3547 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3548 3549 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3550 index = self._index 3551 include_nulls = None 3552 3553 if self._match(TokenType.PIVOT): 3554 unpivot = False 3555 elif self._match(TokenType.UNPIVOT): 3556 unpivot = True 3557 3558 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3559 if self._match_text_seq("INCLUDE", "NULLS"): 3560 include_nulls = True 3561 elif self._match_text_seq("EXCLUDE", "NULLS"): 3562 include_nulls = False 3563 else: 3564 return None 3565 3566 expressions = [] 3567 3568 if not self._match(TokenType.L_PAREN): 3569 self._retreat(index) 3570 return None 3571 3572 if unpivot: 3573 expressions = self._parse_csv(self._parse_column) 3574 else: 3575 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3576 3577 if not expressions: 3578 self.raise_error("Failed to parse PIVOT's aggregation list") 3579 3580 if not self._match(TokenType.FOR): 3581 self.raise_error("Expecting FOR") 3582 3583 field = self._parse_pivot_in() 3584 3585 self._match_r_paren() 3586 3587 pivot = self.expression( 3588 exp.Pivot, 3589 expressions=expressions, 3590 field=field, 3591 unpivot=unpivot, 3592 include_nulls=include_nulls, 3593 ) 3594 3595 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3596 pivot.set("alias", self._parse_table_alias()) 3597 3598 if not unpivot: 3599 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3600 3601 columns: t.List[exp.Expression] = [] 3602 for fld in pivot.args["field"].expressions: 3603 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3604 for name in names: 3605 if self.PREFIXED_PIVOT_COLUMNS: 3606 name = f"{name}_{field_name}" if name else field_name 3607 else: 3608 name = f"{field_name}_{name}" if name else field_name 3609 3610 columns.append(exp.to_identifier(name)) 3611 3612 pivot.set("columns", columns) 3613 3614 return pivot 3615 3616 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3617 return [agg.alias for agg in aggregations] 3618 3619 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3620 if not skip_where_token and not self._match(TokenType.PREWHERE): 3621 return None 3622 3623 return self.expression( 3624 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3625 ) 3626 3627 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3628 if not skip_where_token and not self._match(TokenType.WHERE): 3629 return None 3630 3631 return self.expression( 3632 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3633 ) 3634 3635 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3636 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3637 return None 3638 3639 elements: t.Dict[str, t.Any] = defaultdict(list) 3640 3641 if self._match(TokenType.ALL): 3642 elements["all"] = True 3643 elif self._match(TokenType.DISTINCT): 3644 elements["all"] = False 3645 3646 while True: 3647 expressions = self._parse_csv( 3648 lambda: None 3649 if self._match(TokenType.ROLLUP, advance=False) 3650 else self._parse_conjunction() 3651 ) 3652 if expressions: 3653 elements["expressions"].extend(expressions) 3654 3655 grouping_sets = self._parse_grouping_sets() 3656 if grouping_sets: 3657 elements["grouping_sets"].extend(grouping_sets) 3658 3659 rollup = None 3660 cube = None 3661 totals = None 3662 3663 index = self._index 3664 with_ = self._match(TokenType.WITH) 3665 if self._match(TokenType.ROLLUP): 3666 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3667 elements["rollup"].extend(ensure_list(rollup)) 3668 3669 if self._match(TokenType.CUBE): 3670 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3671 elements["cube"].extend(ensure_list(cube)) 3672 3673 if self._match_text_seq("TOTALS"): 3674 totals = True 3675 elements["totals"] = True # type: ignore 3676 3677 if not (grouping_sets or rollup or cube or totals): 3678 if with_: 3679 self._retreat(index) 3680 break 3681 3682 return self.expression(exp.Group, **elements) # type: ignore 3683 3684 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3685 if not self._match(TokenType.GROUPING_SETS): 3686 return None 3687 3688 return self._parse_wrapped_csv(self._parse_grouping_set) 3689 3690 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3691 if self._match(TokenType.L_PAREN): 3692 grouping_set = self._parse_csv(self._parse_column) 3693 self._match_r_paren() 3694 return self.expression(exp.Tuple, expressions=grouping_set) 3695 3696 return self._parse_column() 3697 3698 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3699 if not skip_having_token and not self._match(TokenType.HAVING): 3700 return None 3701 return self.expression(exp.Having, this=self._parse_conjunction()) 3702 3703 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3704 if not self._match(TokenType.QUALIFY): 3705 return None 3706 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3707 3708 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3709 if skip_start_token: 3710 start = None 3711 elif self._match(TokenType.START_WITH): 3712 start = self._parse_conjunction() 3713 else: 3714 return None 3715 3716 self._match(TokenType.CONNECT_BY) 3717 nocycle = self._match_text_seq("NOCYCLE") 3718 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3719 exp.Prior, this=self._parse_bitwise() 3720 ) 3721 connect = self._parse_conjunction() 3722 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3723 3724 if not start and self._match(TokenType.START_WITH): 3725 start = self._parse_conjunction() 3726 3727 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3728 3729 def _parse_name_as_expression(self) -> exp.Alias: 3730 return self.expression( 3731 exp.Alias, 3732 alias=self._parse_id_var(any_token=True), 3733 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3734 ) 3735 3736 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3737 if self._match_text_seq("INTERPOLATE"): 3738 return self._parse_wrapped_csv(self._parse_name_as_expression) 3739 return None 3740 3741 def _parse_order( 3742 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3743 ) -> t.Optional[exp.Expression]: 3744 siblings = None 3745 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3746 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3747 return this 3748 3749 siblings = True 3750 3751 return self.expression( 3752 exp.Order, 3753 this=this, 3754 expressions=self._parse_csv(self._parse_ordered), 3755 interpolate=self._parse_interpolate(), 3756 siblings=siblings, 3757 ) 3758 3759 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3760 if not self._match(token): 3761 return None 3762 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3763 3764 def _parse_ordered( 3765 self, parse_method: t.Optional[t.Callable] = None 3766 ) -> t.Optional[exp.Ordered]: 3767 this = parse_method() if parse_method else self._parse_conjunction() 3768 if not this: 3769 return None 3770 3771 asc = self._match(TokenType.ASC) 3772 desc = self._match(TokenType.DESC) or (asc and False) 3773 3774 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3775 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3776 3777 nulls_first = is_nulls_first or False 3778 explicitly_null_ordered = is_nulls_first or is_nulls_last 3779 3780 if ( 3781 not explicitly_null_ordered 3782 and ( 3783 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3784 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3785 ) 3786 and self.dialect.NULL_ORDERING != "nulls_are_last" 3787 ): 3788 nulls_first = True 3789 3790 if self._match_text_seq("WITH", "FILL"): 3791 with_fill = self.expression( 3792 exp.WithFill, 3793 **{ # type: ignore 3794 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3795 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3796 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3797 }, 3798 ) 3799 else: 3800 with_fill = None 3801 3802 return self.expression( 3803 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3804 ) 3805 3806 def _parse_limit( 3807 self, 3808 this: t.Optional[exp.Expression] = None, 3809 top: bool = False, 3810 skip_limit_token: bool = False, 3811 ) -> t.Optional[exp.Expression]: 3812 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3813 comments = self._prev_comments 3814 if top: 3815 limit_paren = self._match(TokenType.L_PAREN) 3816 expression = self._parse_term() if limit_paren else self._parse_number() 3817 3818 if limit_paren: 3819 self._match_r_paren() 3820 else: 3821 expression = self._parse_term() 3822 3823 if self._match(TokenType.COMMA): 3824 offset = expression 3825 expression = self._parse_term() 3826 else: 3827 offset = None 3828 3829 limit_exp = self.expression( 3830 exp.Limit, 3831 this=this, 3832 expression=expression, 3833 offset=offset, 3834 comments=comments, 3835 expressions=self._parse_limit_by(), 3836 ) 3837 3838 return limit_exp 3839 3840 if self._match(TokenType.FETCH): 3841 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3842 direction = self._prev.text.upper() if direction else "FIRST" 3843 3844 count = self._parse_field(tokens=self.FETCH_TOKENS) 3845 percent = self._match(TokenType.PERCENT) 3846 3847 self._match_set((TokenType.ROW, TokenType.ROWS)) 3848 3849 only = self._match_text_seq("ONLY") 3850 with_ties = self._match_text_seq("WITH", "TIES") 3851 3852 if only and with_ties: 3853 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3854 3855 return self.expression( 3856 exp.Fetch, 3857 direction=direction, 3858 count=count, 3859 percent=percent, 3860 with_ties=with_ties, 3861 ) 3862 3863 return this 3864 3865 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3866 if not self._match(TokenType.OFFSET): 3867 return this 3868 3869 count = self._parse_term() 3870 self._match_set((TokenType.ROW, TokenType.ROWS)) 3871 3872 return self.expression( 3873 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3874 ) 3875 3876 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3877 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3878 3879 def _parse_locks(self) -> t.List[exp.Lock]: 3880 locks = [] 3881 while True: 3882 if self._match_text_seq("FOR", "UPDATE"): 3883 update = True 3884 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3885 "LOCK", "IN", "SHARE", "MODE" 3886 ): 3887 update = False 3888 else: 3889 break 3890 3891 expressions = None 3892 if self._match_text_seq("OF"): 3893 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3894 3895 wait: t.Optional[bool | exp.Expression] = None 3896 if self._match_text_seq("NOWAIT"): 3897 wait = True 3898 elif self._match_text_seq("WAIT"): 3899 wait = self._parse_primary() 3900 elif self._match_text_seq("SKIP", "LOCKED"): 3901 wait = False 3902 3903 locks.append( 3904 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3905 ) 3906 3907 return locks 3908 3909 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3910 while this and self._match_set(self.SET_OPERATIONS): 3911 token_type = self._prev.token_type 3912 3913 if token_type == TokenType.UNION: 3914 operation = exp.Union 3915 elif token_type == TokenType.EXCEPT: 3916 operation = exp.Except 3917 else: 3918 operation = exp.Intersect 3919 3920 comments = self._prev.comments 3921 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3922 by_name = self._match_text_seq("BY", "NAME") 3923 expression = self._parse_select(nested=True, parse_set_operation=False) 3924 3925 this = self.expression( 3926 operation, 3927 comments=comments, 3928 this=this, 3929 distinct=distinct, 3930 by_name=by_name, 3931 expression=expression, 3932 ) 3933 3934 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3935 expression = this.expression 3936 3937 if expression: 3938 for arg in self.UNION_MODIFIERS: 3939 expr = expression.args.get(arg) 3940 if expr: 3941 this.set(arg, expr.pop()) 3942 3943 return this 3944 3945 def _parse_expression(self) -> t.Optional[exp.Expression]: 3946 return self._parse_alias(self._parse_conjunction()) 3947 3948 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3949 this = self._parse_equality() 3950 3951 if self._match(TokenType.COLON_EQ): 3952 this = self.expression( 3953 exp.PropertyEQ, 3954 this=this, 3955 comments=self._prev_comments, 3956 expression=self._parse_conjunction(), 3957 ) 3958 3959 while self._match_set(self.CONJUNCTION): 3960 this = self.expression( 3961 self.CONJUNCTION[self._prev.token_type], 3962 this=this, 3963 comments=self._prev_comments, 3964 expression=self._parse_equality(), 3965 ) 3966 return this 3967 3968 def _parse_equality(self) -> t.Optional[exp.Expression]: 3969 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3970 3971 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3972 return self._parse_tokens(self._parse_range, self.COMPARISON) 3973 3974 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3975 this = this or self._parse_bitwise() 3976 negate = self._match(TokenType.NOT) 3977 3978 if self._match_set(self.RANGE_PARSERS): 3979 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3980 if not expression: 3981 return this 3982 3983 this = expression 3984 elif self._match(TokenType.ISNULL): 3985 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3986 3987 # Postgres supports ISNULL and NOTNULL for conditions. 3988 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3989 if self._match(TokenType.NOTNULL): 3990 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3991 this = self.expression(exp.Not, this=this) 3992 3993 if negate: 3994 this = self.expression(exp.Not, this=this) 3995 3996 if self._match(TokenType.IS): 3997 this = self._parse_is(this) 3998 3999 return this 4000 4001 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4002 index = self._index - 1 4003 negate = self._match(TokenType.NOT) 4004 4005 if self._match_text_seq("DISTINCT", "FROM"): 4006 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4007 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4008 4009 expression = self._parse_null() or self._parse_boolean() 4010 if not expression: 4011 self._retreat(index) 4012 return None 4013 4014 this = self.expression(exp.Is, this=this, expression=expression) 4015 return self.expression(exp.Not, this=this) if negate else this 4016 4017 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4018 unnest = self._parse_unnest(with_alias=False) 4019 if unnest: 4020 this = self.expression(exp.In, this=this, unnest=unnest) 4021 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4022 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4023 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4024 4025 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4026 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4027 else: 4028 this = self.expression(exp.In, this=this, expressions=expressions) 4029 4030 if matched_l_paren: 4031 self._match_r_paren(this) 4032 elif not self._match(TokenType.R_BRACKET, expression=this): 4033 self.raise_error("Expecting ]") 4034 else: 4035 this = self.expression(exp.In, this=this, field=self._parse_field()) 4036 4037 return this 4038 4039 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4040 low = self._parse_bitwise() 4041 self._match(TokenType.AND) 4042 high = self._parse_bitwise() 4043 return self.expression(exp.Between, this=this, low=low, high=high) 4044 4045 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4046 if not self._match(TokenType.ESCAPE): 4047 return this 4048 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4049 4050 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 4051 index = self._index 4052 4053 if not self._match(TokenType.INTERVAL) and match_interval: 4054 return None 4055 4056 if self._match(TokenType.STRING, advance=False): 4057 this = self._parse_primary() 4058 else: 4059 this = self._parse_term() 4060 4061 if not this or ( 4062 isinstance(this, exp.Column) 4063 and not this.table 4064 and not this.this.quoted 4065 and this.name.upper() == "IS" 4066 ): 4067 self._retreat(index) 4068 return None 4069 4070 unit = self._parse_function() or ( 4071 not self._match(TokenType.ALIAS, advance=False) 4072 and self._parse_var(any_token=True, upper=True) 4073 ) 4074 4075 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4076 # each INTERVAL expression into this canonical form so it's easy to transpile 4077 if this and this.is_number: 4078 this = exp.Literal.string(this.name) 4079 elif this and this.is_string: 4080 parts = this.name.split() 4081 4082 if len(parts) == 2: 4083 if unit: 4084 # This is not actually a unit, it's something else (e.g. a "window side") 4085 unit = None 4086 self._retreat(self._index - 1) 4087 4088 this = exp.Literal.string(parts[0]) 4089 unit = self.expression(exp.Var, this=parts[1].upper()) 4090 4091 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4092 unit = self.expression( 4093 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4094 ) 4095 4096 return self.expression(exp.Interval, this=this, unit=unit) 4097 4098 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4099 this = self._parse_term() 4100 4101 while True: 4102 if self._match_set(self.BITWISE): 4103 this = self.expression( 4104 self.BITWISE[self._prev.token_type], 4105 this=this, 4106 expression=self._parse_term(), 4107 ) 4108 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4109 this = self.expression( 4110 exp.DPipe, 4111 this=this, 4112 expression=self._parse_term(), 4113 safe=not self.dialect.STRICT_STRING_CONCAT, 4114 ) 4115 elif self._match(TokenType.DQMARK): 4116 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4117 elif self._match_pair(TokenType.LT, TokenType.LT): 4118 this = self.expression( 4119 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4120 ) 4121 elif self._match_pair(TokenType.GT, TokenType.GT): 4122 this = self.expression( 4123 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4124 ) 4125 else: 4126 break 4127 4128 return this 4129 4130 def _parse_term(self) -> t.Optional[exp.Expression]: 4131 return self._parse_tokens(self._parse_factor, self.TERM) 4132 4133 def _parse_factor(self) -> t.Optional[exp.Expression]: 4134 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4135 this = parse_method() 4136 4137 while self._match_set(self.FACTOR): 4138 this = self.expression( 4139 self.FACTOR[self._prev.token_type], 4140 this=this, 4141 comments=self._prev_comments, 4142 expression=parse_method(), 4143 ) 4144 if isinstance(this, exp.Div): 4145 this.args["typed"] = self.dialect.TYPED_DIVISION 4146 this.args["safe"] = self.dialect.SAFE_DIVISION 4147 4148 return this 4149 4150 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4151 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4152 4153 def _parse_unary(self) -> t.Optional[exp.Expression]: 4154 if self._match_set(self.UNARY_PARSERS): 4155 return self.UNARY_PARSERS[self._prev.token_type](self) 4156 return self._parse_at_time_zone(self._parse_type()) 4157 4158 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4159 interval = parse_interval and self._parse_interval() 4160 if interval: 4161 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4162 while True: 4163 index = self._index 4164 self._match(TokenType.PLUS) 4165 4166 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4167 self._retreat(index) 4168 break 4169 4170 interval = self.expression( # type: ignore 4171 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4172 ) 4173 4174 return interval 4175 4176 index = self._index 4177 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4178 this = self._parse_column() 4179 4180 if data_type: 4181 if isinstance(this, exp.Literal): 4182 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4183 if parser: 4184 return parser(self, this, data_type) 4185 return self.expression(exp.Cast, this=this, to=data_type) 4186 if not data_type.expressions: 4187 self._retreat(index) 4188 return self._parse_column() 4189 return self._parse_column_ops(data_type) 4190 4191 return this and self._parse_column_ops(this) 4192 4193 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4194 this = self._parse_type() 4195 if not this: 4196 return None 4197 4198 if isinstance(this, exp.Column) and not this.table: 4199 this = exp.var(this.name.upper()) 4200 4201 return self.expression( 4202 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4203 ) 4204 4205 def _parse_types( 4206 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4207 ) -> t.Optional[exp.Expression]: 4208 index = self._index 4209 4210 this: t.Optional[exp.Expression] = None 4211 prefix = self._match_text_seq("SYSUDTLIB", ".") 4212 4213 if not self._match_set(self.TYPE_TOKENS): 4214 identifier = allow_identifiers and self._parse_id_var( 4215 any_token=False, tokens=(TokenType.VAR,) 4216 ) 4217 if identifier: 4218 tokens = self.dialect.tokenize(identifier.name) 4219 4220 if len(tokens) != 1: 4221 self.raise_error("Unexpected identifier", self._prev) 4222 4223 if tokens[0].token_type in self.TYPE_TOKENS: 4224 self._prev = tokens[0] 4225 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4226 type_name = identifier.name 4227 4228 while self._match(TokenType.DOT): 4229 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4230 4231 this = exp.DataType.build(type_name, udt=True) 4232 else: 4233 self._retreat(self._index - 1) 4234 return None 4235 else: 4236 return None 4237 4238 type_token = self._prev.token_type 4239 4240 if type_token == TokenType.PSEUDO_TYPE: 4241 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4242 4243 if type_token == TokenType.OBJECT_IDENTIFIER: 4244 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4245 4246 nested = type_token in self.NESTED_TYPE_TOKENS 4247 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4248 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4249 expressions = None 4250 maybe_func = False 4251 4252 if self._match(TokenType.L_PAREN): 4253 if is_struct: 4254 expressions = self._parse_csv(self._parse_struct_types) 4255 elif nested: 4256 expressions = self._parse_csv( 4257 lambda: self._parse_types( 4258 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4259 ) 4260 ) 4261 elif type_token in self.ENUM_TYPE_TOKENS: 4262 expressions = self._parse_csv(self._parse_equality) 4263 elif is_aggregate: 4264 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4265 any_token=False, tokens=(TokenType.VAR,) 4266 ) 4267 if not func_or_ident or not self._match(TokenType.COMMA): 4268 return None 4269 expressions = self._parse_csv( 4270 lambda: self._parse_types( 4271 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4272 ) 4273 ) 4274 expressions.insert(0, func_or_ident) 4275 else: 4276 expressions = self._parse_csv(self._parse_type_size) 4277 4278 if not expressions or not self._match(TokenType.R_PAREN): 4279 self._retreat(index) 4280 return None 4281 4282 maybe_func = True 4283 4284 values: t.Optional[t.List[exp.Expression]] = None 4285 4286 if nested and self._match(TokenType.LT): 4287 if is_struct: 4288 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4289 else: 4290 expressions = self._parse_csv( 4291 lambda: self._parse_types( 4292 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4293 ) 4294 ) 4295 4296 if not self._match(TokenType.GT): 4297 self.raise_error("Expecting >") 4298 4299 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4300 values = self._parse_csv(self._parse_conjunction) 4301 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4302 4303 if type_token in self.TIMESTAMPS: 4304 if self._match_text_seq("WITH", "TIME", "ZONE"): 4305 maybe_func = False 4306 tz_type = ( 4307 exp.DataType.Type.TIMETZ 4308 if type_token in self.TIMES 4309 else exp.DataType.Type.TIMESTAMPTZ 4310 ) 4311 this = exp.DataType(this=tz_type, expressions=expressions) 4312 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4313 maybe_func = False 4314 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4315 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4316 maybe_func = False 4317 elif type_token == TokenType.INTERVAL: 4318 unit = self._parse_var(upper=True) 4319 if unit: 4320 if self._match_text_seq("TO"): 4321 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4322 4323 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4324 else: 4325 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4326 4327 if maybe_func and check_func: 4328 index2 = self._index 4329 peek = self._parse_string() 4330 4331 if not peek: 4332 self._retreat(index) 4333 return None 4334 4335 self._retreat(index2) 4336 4337 if not this: 4338 if self._match_text_seq("UNSIGNED"): 4339 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4340 if not unsigned_type_token: 4341 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4342 4343 type_token = unsigned_type_token or type_token 4344 4345 this = exp.DataType( 4346 this=exp.DataType.Type[type_token.value], 4347 expressions=expressions, 4348 nested=nested, 4349 values=values, 4350 prefix=prefix, 4351 ) 4352 elif expressions: 4353 this.set("expressions", expressions) 4354 4355 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4356 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4357 4358 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4359 converter = self.TYPE_CONVERTER.get(this.this) 4360 if converter: 4361 this = converter(t.cast(exp.DataType, this)) 4362 4363 return this 4364 4365 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4366 index = self._index 4367 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4368 self._match(TokenType.COLON) 4369 column_def = self._parse_column_def(this) 4370 4371 if type_required and ( 4372 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4373 ): 4374 self._retreat(index) 4375 return self._parse_types() 4376 4377 return column_def 4378 4379 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4380 if not self._match_text_seq("AT", "TIME", "ZONE"): 4381 return this 4382 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4383 4384 def _parse_column(self) -> t.Optional[exp.Expression]: 4385 this = self._parse_column_reference() 4386 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4387 4388 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4389 this = self._parse_field() 4390 if ( 4391 not this 4392 and self._match(TokenType.VALUES, advance=False) 4393 and self.VALUES_FOLLOWED_BY_PAREN 4394 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4395 ): 4396 this = self._parse_id_var() 4397 4398 if isinstance(this, exp.Identifier): 4399 # We bubble up comments from the Identifier to the Column 4400 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4401 4402 return this 4403 4404 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4405 this = self._parse_bracket(this) 4406 4407 while self._match_set(self.COLUMN_OPERATORS): 4408 op_token = self._prev.token_type 4409 op = self.COLUMN_OPERATORS.get(op_token) 4410 4411 if op_token == TokenType.DCOLON: 4412 field = self._parse_types() 4413 if not field: 4414 self.raise_error("Expected type") 4415 elif op and self._curr: 4416 field = self._parse_column_reference() 4417 else: 4418 field = self._parse_field(any_token=True, anonymous_func=True) 4419 4420 if isinstance(field, exp.Func) and this: 4421 # bigquery allows function calls like x.y.count(...) 4422 # SAFE.SUBSTR(...) 4423 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4424 this = exp.replace_tree( 4425 this, 4426 lambda n: ( 4427 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4428 if n.table 4429 else n.this 4430 ) 4431 if isinstance(n, exp.Column) 4432 else n, 4433 ) 4434 4435 if op: 4436 this = op(self, this, field) 4437 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4438 this = self.expression( 4439 exp.Column, 4440 this=field, 4441 table=this.this, 4442 db=this.args.get("table"), 4443 catalog=this.args.get("db"), 4444 ) 4445 else: 4446 this = self.expression(exp.Dot, this=this, expression=field) 4447 this = self._parse_bracket(this) 4448 return this 4449 4450 def _parse_primary(self) -> t.Optional[exp.Expression]: 4451 if self._match_set(self.PRIMARY_PARSERS): 4452 token_type = self._prev.token_type 4453 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4454 4455 if token_type == TokenType.STRING: 4456 expressions = [primary] 4457 while self._match(TokenType.STRING): 4458 expressions.append(exp.Literal.string(self._prev.text)) 4459 4460 if len(expressions) > 1: 4461 return self.expression(exp.Concat, expressions=expressions) 4462 4463 return primary 4464 4465 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4466 return exp.Literal.number(f"0.{self._prev.text}") 4467 4468 if self._match(TokenType.L_PAREN): 4469 comments = self._prev_comments 4470 query = self._parse_select() 4471 4472 if query: 4473 expressions = [query] 4474 else: 4475 expressions = self._parse_expressions() 4476 4477 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4478 4479 if not this and self._match(TokenType.R_PAREN, advance=False): 4480 this = self.expression(exp.Tuple) 4481 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4482 this = self._parse_subquery(this=this, parse_alias=False) 4483 elif isinstance(this, exp.Subquery): 4484 this = self._parse_subquery( 4485 this=self._parse_set_operations(this), parse_alias=False 4486 ) 4487 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4488 this = self.expression(exp.Tuple, expressions=expressions) 4489 else: 4490 this = self.expression(exp.Paren, this=this) 4491 4492 if this: 4493 this.add_comments(comments) 4494 4495 self._match_r_paren(expression=this) 4496 return this 4497 4498 return None 4499 4500 def _parse_field( 4501 self, 4502 any_token: bool = False, 4503 tokens: t.Optional[t.Collection[TokenType]] = None, 4504 anonymous_func: bool = False, 4505 ) -> t.Optional[exp.Expression]: 4506 if anonymous_func: 4507 field = ( 4508 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4509 or self._parse_primary() 4510 ) 4511 else: 4512 field = self._parse_primary() or self._parse_function( 4513 anonymous=anonymous_func, any_token=any_token 4514 ) 4515 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4516 4517 def _parse_function( 4518 self, 4519 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4520 anonymous: bool = False, 4521 optional_parens: bool = True, 4522 any_token: bool = False, 4523 ) -> t.Optional[exp.Expression]: 4524 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4525 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4526 fn_syntax = False 4527 if ( 4528 self._match(TokenType.L_BRACE, advance=False) 4529 and self._next 4530 and self._next.text.upper() == "FN" 4531 ): 4532 self._advance(2) 4533 fn_syntax = True 4534 4535 func = self._parse_function_call( 4536 functions=functions, 4537 anonymous=anonymous, 4538 optional_parens=optional_parens, 4539 any_token=any_token, 4540 ) 4541 4542 if fn_syntax: 4543 self._match(TokenType.R_BRACE) 4544 4545 return func 4546 4547 def _parse_function_call( 4548 self, 4549 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4550 anonymous: bool = False, 4551 optional_parens: bool = True, 4552 any_token: bool = False, 4553 ) -> t.Optional[exp.Expression]: 4554 if not self._curr: 4555 return None 4556 4557 comments = self._curr.comments 4558 token_type = self._curr.token_type 4559 this = self._curr.text 4560 upper = this.upper() 4561 4562 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4563 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4564 self._advance() 4565 return self._parse_window(parser(self)) 4566 4567 if not self._next or self._next.token_type != TokenType.L_PAREN: 4568 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4569 self._advance() 4570 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4571 4572 return None 4573 4574 if any_token: 4575 if token_type in self.RESERVED_TOKENS: 4576 return None 4577 elif token_type not in self.FUNC_TOKENS: 4578 return None 4579 4580 self._advance(2) 4581 4582 parser = self.FUNCTION_PARSERS.get(upper) 4583 if parser and not anonymous: 4584 this = parser(self) 4585 else: 4586 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4587 4588 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4589 this = self.expression(subquery_predicate, this=self._parse_select()) 4590 self._match_r_paren() 4591 return this 4592 4593 if functions is None: 4594 functions = self.FUNCTIONS 4595 4596 function = functions.get(upper) 4597 4598 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4599 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4600 4601 if alias: 4602 args = self._kv_to_prop_eq(args) 4603 4604 if function and not anonymous: 4605 if "dialect" in function.__code__.co_varnames: 4606 func = function(args, dialect=self.dialect) 4607 else: 4608 func = function(args) 4609 4610 func = self.validate_expression(func, args) 4611 if not self.dialect.NORMALIZE_FUNCTIONS: 4612 func.meta["name"] = this 4613 4614 this = func 4615 else: 4616 if token_type == TokenType.IDENTIFIER: 4617 this = exp.Identifier(this=this, quoted=True) 4618 this = self.expression(exp.Anonymous, this=this, expressions=args) 4619 4620 if isinstance(this, exp.Expression): 4621 this.add_comments(comments) 4622 4623 self._match_r_paren(this) 4624 return self._parse_window(this) 4625 4626 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4627 transformed = [] 4628 4629 for e in expressions: 4630 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4631 if isinstance(e, exp.Alias): 4632 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4633 4634 if not isinstance(e, exp.PropertyEQ): 4635 e = self.expression( 4636 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4637 ) 4638 4639 if isinstance(e.this, exp.Column): 4640 e.this.replace(e.this.this) 4641 4642 transformed.append(e) 4643 4644 return transformed 4645 4646 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4647 return self._parse_column_def(self._parse_id_var()) 4648 4649 def _parse_user_defined_function( 4650 self, kind: t.Optional[TokenType] = None 4651 ) -> t.Optional[exp.Expression]: 4652 this = self._parse_id_var() 4653 4654 while self._match(TokenType.DOT): 4655 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4656 4657 if not self._match(TokenType.L_PAREN): 4658 return this 4659 4660 expressions = self._parse_csv(self._parse_function_parameter) 4661 self._match_r_paren() 4662 return self.expression( 4663 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4664 ) 4665 4666 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4667 literal = self._parse_primary() 4668 if literal: 4669 return self.expression(exp.Introducer, this=token.text, expression=literal) 4670 4671 return self.expression(exp.Identifier, this=token.text) 4672 4673 def _parse_session_parameter(self) -> exp.SessionParameter: 4674 kind = None 4675 this = self._parse_id_var() or self._parse_primary() 4676 4677 if this and self._match(TokenType.DOT): 4678 kind = this.name 4679 this = self._parse_var() or self._parse_primary() 4680 4681 return self.expression(exp.SessionParameter, this=this, kind=kind) 4682 4683 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4684 index = self._index 4685 4686 if self._match(TokenType.L_PAREN): 4687 expressions = t.cast( 4688 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4689 ) 4690 4691 if not self._match(TokenType.R_PAREN): 4692 self._retreat(index) 4693 else: 4694 expressions = [self._parse_id_var()] 4695 4696 if self._match_set(self.LAMBDAS): 4697 return self.LAMBDAS[self._prev.token_type](self, expressions) 4698 4699 self._retreat(index) 4700 4701 this: t.Optional[exp.Expression] 4702 4703 if self._match(TokenType.DISTINCT): 4704 this = self.expression( 4705 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4706 ) 4707 else: 4708 this = self._parse_select_or_expression(alias=alias) 4709 4710 return self._parse_limit( 4711 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4712 ) 4713 4714 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4715 index = self._index 4716 if not self._match(TokenType.L_PAREN): 4717 return this 4718 4719 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4720 # expr can be of both types 4721 if self._match_set(self.SELECT_START_TOKENS): 4722 self._retreat(index) 4723 return this 4724 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4725 self._match_r_paren() 4726 return self.expression(exp.Schema, this=this, expressions=args) 4727 4728 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4729 return self._parse_column_def(self._parse_field(any_token=True)) 4730 4731 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4732 # column defs are not really columns, they're identifiers 4733 if isinstance(this, exp.Column): 4734 this = this.this 4735 4736 kind = self._parse_types(schema=True) 4737 4738 if self._match_text_seq("FOR", "ORDINALITY"): 4739 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4740 4741 constraints: t.List[exp.Expression] = [] 4742 4743 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4744 ("ALIAS", "MATERIALIZED") 4745 ): 4746 persisted = self._prev.text.upper() == "MATERIALIZED" 4747 constraints.append( 4748 self.expression( 4749 exp.ComputedColumnConstraint, 4750 this=self._parse_conjunction(), 4751 persisted=persisted or self._match_text_seq("PERSISTED"), 4752 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4753 ) 4754 ) 4755 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4756 self._match(TokenType.ALIAS) 4757 constraints.append( 4758 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4759 ) 4760 4761 while True: 4762 constraint = self._parse_column_constraint() 4763 if not constraint: 4764 break 4765 constraints.append(constraint) 4766 4767 if not kind and not constraints: 4768 return this 4769 4770 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4771 4772 def _parse_auto_increment( 4773 self, 4774 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4775 start = None 4776 increment = None 4777 4778 if self._match(TokenType.L_PAREN, advance=False): 4779 args = self._parse_wrapped_csv(self._parse_bitwise) 4780 start = seq_get(args, 0) 4781 increment = seq_get(args, 1) 4782 elif self._match_text_seq("START"): 4783 start = self._parse_bitwise() 4784 self._match_text_seq("INCREMENT") 4785 increment = self._parse_bitwise() 4786 4787 if start and increment: 4788 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4789 4790 return exp.AutoIncrementColumnConstraint() 4791 4792 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4793 if not self._match_text_seq("REFRESH"): 4794 self._retreat(self._index - 1) 4795 return None 4796 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4797 4798 def _parse_compress(self) -> exp.CompressColumnConstraint: 4799 if self._match(TokenType.L_PAREN, advance=False): 4800 return self.expression( 4801 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4802 ) 4803 4804 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4805 4806 def _parse_generated_as_identity( 4807 self, 4808 ) -> ( 4809 exp.GeneratedAsIdentityColumnConstraint 4810 | exp.ComputedColumnConstraint 4811 | exp.GeneratedAsRowColumnConstraint 4812 ): 4813 if self._match_text_seq("BY", "DEFAULT"): 4814 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4815 this = self.expression( 4816 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4817 ) 4818 else: 4819 self._match_text_seq("ALWAYS") 4820 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4821 4822 self._match(TokenType.ALIAS) 4823 4824 if self._match_text_seq("ROW"): 4825 start = self._match_text_seq("START") 4826 if not start: 4827 self._match(TokenType.END) 4828 hidden = self._match_text_seq("HIDDEN") 4829 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4830 4831 identity = self._match_text_seq("IDENTITY") 4832 4833 if self._match(TokenType.L_PAREN): 4834 if self._match(TokenType.START_WITH): 4835 this.set("start", self._parse_bitwise()) 4836 if self._match_text_seq("INCREMENT", "BY"): 4837 this.set("increment", self._parse_bitwise()) 4838 if self._match_text_seq("MINVALUE"): 4839 this.set("minvalue", self._parse_bitwise()) 4840 if self._match_text_seq("MAXVALUE"): 4841 this.set("maxvalue", self._parse_bitwise()) 4842 4843 if self._match_text_seq("CYCLE"): 4844 this.set("cycle", True) 4845 elif self._match_text_seq("NO", "CYCLE"): 4846 this.set("cycle", False) 4847 4848 if not identity: 4849 this.set("expression", self._parse_range()) 4850 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4851 args = self._parse_csv(self._parse_bitwise) 4852 this.set("start", seq_get(args, 0)) 4853 this.set("increment", seq_get(args, 1)) 4854 4855 self._match_r_paren() 4856 4857 return this 4858 4859 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4860 self._match_text_seq("LENGTH") 4861 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4862 4863 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4864 if self._match_text_seq("NULL"): 4865 return self.expression(exp.NotNullColumnConstraint) 4866 if self._match_text_seq("CASESPECIFIC"): 4867 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4868 if self._match_text_seq("FOR", "REPLICATION"): 4869 return self.expression(exp.NotForReplicationColumnConstraint) 4870 return None 4871 4872 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4873 if self._match(TokenType.CONSTRAINT): 4874 this = self._parse_id_var() 4875 else: 4876 this = None 4877 4878 if self._match_texts(self.CONSTRAINT_PARSERS): 4879 return self.expression( 4880 exp.ColumnConstraint, 4881 this=this, 4882 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4883 ) 4884 4885 return this 4886 4887 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4888 if not self._match(TokenType.CONSTRAINT): 4889 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4890 4891 return self.expression( 4892 exp.Constraint, 4893 this=self._parse_id_var(), 4894 expressions=self._parse_unnamed_constraints(), 4895 ) 4896 4897 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4898 constraints = [] 4899 while True: 4900 constraint = self._parse_unnamed_constraint() or self._parse_function() 4901 if not constraint: 4902 break 4903 constraints.append(constraint) 4904 4905 return constraints 4906 4907 def _parse_unnamed_constraint( 4908 self, constraints: t.Optional[t.Collection[str]] = None 4909 ) -> t.Optional[exp.Expression]: 4910 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4911 constraints or self.CONSTRAINT_PARSERS 4912 ): 4913 return None 4914 4915 constraint = self._prev.text.upper() 4916 if constraint not in self.CONSTRAINT_PARSERS: 4917 self.raise_error(f"No parser found for schema constraint {constraint}.") 4918 4919 return self.CONSTRAINT_PARSERS[constraint](self) 4920 4921 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4922 self._match_text_seq("KEY") 4923 return self.expression( 4924 exp.UniqueColumnConstraint, 4925 this=self._parse_schema(self._parse_id_var(any_token=False)), 4926 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4927 on_conflict=self._parse_on_conflict(), 4928 ) 4929 4930 def _parse_key_constraint_options(self) -> t.List[str]: 4931 options = [] 4932 while True: 4933 if not self._curr: 4934 break 4935 4936 if self._match(TokenType.ON): 4937 action = None 4938 on = self._advance_any() and self._prev.text 4939 4940 if self._match_text_seq("NO", "ACTION"): 4941 action = "NO ACTION" 4942 elif self._match_text_seq("CASCADE"): 4943 action = "CASCADE" 4944 elif self._match_text_seq("RESTRICT"): 4945 action = "RESTRICT" 4946 elif self._match_pair(TokenType.SET, TokenType.NULL): 4947 action = "SET NULL" 4948 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4949 action = "SET DEFAULT" 4950 else: 4951 self.raise_error("Invalid key constraint") 4952 4953 options.append(f"ON {on} {action}") 4954 elif self._match_text_seq("NOT", "ENFORCED"): 4955 options.append("NOT ENFORCED") 4956 elif self._match_text_seq("DEFERRABLE"): 4957 options.append("DEFERRABLE") 4958 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4959 options.append("INITIALLY DEFERRED") 4960 elif self._match_text_seq("NORELY"): 4961 options.append("NORELY") 4962 elif self._match_text_seq("MATCH", "FULL"): 4963 options.append("MATCH FULL") 4964 else: 4965 break 4966 4967 return options 4968 4969 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4970 if match and not self._match(TokenType.REFERENCES): 4971 return None 4972 4973 expressions = None 4974 this = self._parse_table(schema=True) 4975 options = self._parse_key_constraint_options() 4976 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4977 4978 def _parse_foreign_key(self) -> exp.ForeignKey: 4979 expressions = self._parse_wrapped_id_vars() 4980 reference = self._parse_references() 4981 options = {} 4982 4983 while self._match(TokenType.ON): 4984 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4985 self.raise_error("Expected DELETE or UPDATE") 4986 4987 kind = self._prev.text.lower() 4988 4989 if self._match_text_seq("NO", "ACTION"): 4990 action = "NO ACTION" 4991 elif self._match(TokenType.SET): 4992 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4993 action = "SET " + self._prev.text.upper() 4994 else: 4995 self._advance() 4996 action = self._prev.text.upper() 4997 4998 options[kind] = action 4999 5000 return self.expression( 5001 exp.ForeignKey, 5002 expressions=expressions, 5003 reference=reference, 5004 **options, # type: ignore 5005 ) 5006 5007 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5008 return self._parse_field() 5009 5010 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5011 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5012 self._retreat(self._index - 1) 5013 return None 5014 5015 id_vars = self._parse_wrapped_id_vars() 5016 return self.expression( 5017 exp.PeriodForSystemTimeConstraint, 5018 this=seq_get(id_vars, 0), 5019 expression=seq_get(id_vars, 1), 5020 ) 5021 5022 def _parse_primary_key( 5023 self, wrapped_optional: bool = False, in_props: bool = False 5024 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5025 desc = ( 5026 self._match_set((TokenType.ASC, TokenType.DESC)) 5027 and self._prev.token_type == TokenType.DESC 5028 ) 5029 5030 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5031 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5032 5033 expressions = self._parse_wrapped_csv( 5034 self._parse_primary_key_part, optional=wrapped_optional 5035 ) 5036 options = self._parse_key_constraint_options() 5037 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5038 5039 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5040 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5041 5042 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5043 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5044 return this 5045 5046 bracket_kind = self._prev.token_type 5047 expressions = self._parse_csv( 5048 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5049 ) 5050 5051 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5052 self.raise_error("Expected ]") 5053 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5054 self.raise_error("Expected }") 5055 5056 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5057 if bracket_kind == TokenType.L_BRACE: 5058 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5059 elif not this or this.name.upper() == "ARRAY": 5060 this = self.expression(exp.Array, expressions=expressions) 5061 else: 5062 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5063 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5064 5065 self._add_comments(this) 5066 return self._parse_bracket(this) 5067 5068 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5069 if self._match(TokenType.COLON): 5070 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5071 return this 5072 5073 def _parse_case(self) -> t.Optional[exp.Expression]: 5074 ifs = [] 5075 default = None 5076 5077 comments = self._prev_comments 5078 expression = self._parse_conjunction() 5079 5080 while self._match(TokenType.WHEN): 5081 this = self._parse_conjunction() 5082 self._match(TokenType.THEN) 5083 then = self._parse_conjunction() 5084 ifs.append(self.expression(exp.If, this=this, true=then)) 5085 5086 if self._match(TokenType.ELSE): 5087 default = self._parse_conjunction() 5088 5089 if not self._match(TokenType.END): 5090 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5091 default = exp.column("interval") 5092 else: 5093 self.raise_error("Expected END after CASE", self._prev) 5094 5095 return self.expression( 5096 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5097 ) 5098 5099 def _parse_if(self) -> t.Optional[exp.Expression]: 5100 if self._match(TokenType.L_PAREN): 5101 args = self._parse_csv(self._parse_conjunction) 5102 this = self.validate_expression(exp.If.from_arg_list(args), args) 5103 self._match_r_paren() 5104 else: 5105 index = self._index - 1 5106 5107 if self.NO_PAREN_IF_COMMANDS and index == 0: 5108 return self._parse_as_command(self._prev) 5109 5110 condition = self._parse_conjunction() 5111 5112 if not condition: 5113 self._retreat(index) 5114 return None 5115 5116 self._match(TokenType.THEN) 5117 true = self._parse_conjunction() 5118 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5119 self._match(TokenType.END) 5120 this = self.expression(exp.If, this=condition, true=true, false=false) 5121 5122 return this 5123 5124 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5125 if not self._match_text_seq("VALUE", "FOR"): 5126 self._retreat(self._index - 1) 5127 return None 5128 5129 return self.expression( 5130 exp.NextValueFor, 5131 this=self._parse_column(), 5132 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5133 ) 5134 5135 def _parse_extract(self) -> exp.Extract: 5136 this = self._parse_function() or self._parse_var() or self._parse_type() 5137 5138 if self._match(TokenType.FROM): 5139 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5140 5141 if not self._match(TokenType.COMMA): 5142 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5143 5144 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5145 5146 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5147 this = self._parse_conjunction() 5148 5149 if not self._match(TokenType.ALIAS): 5150 if self._match(TokenType.COMMA): 5151 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5152 5153 self.raise_error("Expected AS after CAST") 5154 5155 fmt = None 5156 to = self._parse_types() 5157 5158 if self._match(TokenType.FORMAT): 5159 fmt_string = self._parse_string() 5160 fmt = self._parse_at_time_zone(fmt_string) 5161 5162 if not to: 5163 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5164 if to.this in exp.DataType.TEMPORAL_TYPES: 5165 this = self.expression( 5166 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5167 this=this, 5168 format=exp.Literal.string( 5169 format_time( 5170 fmt_string.this if fmt_string else "", 5171 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5172 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5173 ) 5174 ), 5175 ) 5176 5177 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5178 this.set("zone", fmt.args["zone"]) 5179 return this 5180 elif not to: 5181 self.raise_error("Expected TYPE after CAST") 5182 elif isinstance(to, exp.Identifier): 5183 to = exp.DataType.build(to.name, udt=True) 5184 elif to.this == exp.DataType.Type.CHAR: 5185 if self._match(TokenType.CHARACTER_SET): 5186 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5187 5188 return self.expression( 5189 exp.Cast if strict else exp.TryCast, 5190 this=this, 5191 to=to, 5192 format=fmt, 5193 safe=safe, 5194 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5195 ) 5196 5197 def _parse_string_agg(self) -> exp.Expression: 5198 if self._match(TokenType.DISTINCT): 5199 args: t.List[t.Optional[exp.Expression]] = [ 5200 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5201 ] 5202 if self._match(TokenType.COMMA): 5203 args.extend(self._parse_csv(self._parse_conjunction)) 5204 else: 5205 args = self._parse_csv(self._parse_conjunction) # type: ignore 5206 5207 index = self._index 5208 if not self._match(TokenType.R_PAREN) and args: 5209 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5210 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5211 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5212 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5213 5214 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5215 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5216 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5217 if not self._match_text_seq("WITHIN", "GROUP"): 5218 self._retreat(index) 5219 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5220 5221 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5222 order = self._parse_order(this=seq_get(args, 0)) 5223 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5224 5225 def _parse_convert( 5226 self, strict: bool, safe: t.Optional[bool] = None 5227 ) -> t.Optional[exp.Expression]: 5228 this = self._parse_bitwise() 5229 5230 if self._match(TokenType.USING): 5231 to: t.Optional[exp.Expression] = self.expression( 5232 exp.CharacterSet, this=self._parse_var() 5233 ) 5234 elif self._match(TokenType.COMMA): 5235 to = self._parse_types() 5236 else: 5237 to = None 5238 5239 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5240 5241 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5242 """ 5243 There are generally two variants of the DECODE function: 5244 5245 - DECODE(bin, charset) 5246 - DECODE(expression, search, result [, search, result] ... [, default]) 5247 5248 The second variant will always be parsed into a CASE expression. Note that NULL 5249 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5250 instead of relying on pattern matching. 5251 """ 5252 args = self._parse_csv(self._parse_conjunction) 5253 5254 if len(args) < 3: 5255 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5256 5257 expression, *expressions = args 5258 if not expression: 5259 return None 5260 5261 ifs = [] 5262 for search, result in zip(expressions[::2], expressions[1::2]): 5263 if not search or not result: 5264 return None 5265 5266 if isinstance(search, exp.Literal): 5267 ifs.append( 5268 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5269 ) 5270 elif isinstance(search, exp.Null): 5271 ifs.append( 5272 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5273 ) 5274 else: 5275 cond = exp.or_( 5276 exp.EQ(this=expression.copy(), expression=search), 5277 exp.and_( 5278 exp.Is(this=expression.copy(), expression=exp.Null()), 5279 exp.Is(this=search.copy(), expression=exp.Null()), 5280 copy=False, 5281 ), 5282 copy=False, 5283 ) 5284 ifs.append(exp.If(this=cond, true=result)) 5285 5286 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5287 5288 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5289 self._match_text_seq("KEY") 5290 key = self._parse_column() 5291 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5292 self._match_text_seq("VALUE") 5293 value = self._parse_bitwise() 5294 5295 if not key and not value: 5296 return None 5297 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5298 5299 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5300 if not this or not self._match_text_seq("FORMAT", "JSON"): 5301 return this 5302 5303 return self.expression(exp.FormatJson, this=this) 5304 5305 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5306 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5307 for value in values: 5308 if self._match_text_seq(value, "ON", on): 5309 return f"{value} ON {on}" 5310 5311 return None 5312 5313 @t.overload 5314 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5315 5316 @t.overload 5317 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5318 5319 def _parse_json_object(self, agg=False): 5320 star = self._parse_star() 5321 expressions = ( 5322 [star] 5323 if star 5324 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5325 ) 5326 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5327 5328 unique_keys = None 5329 if self._match_text_seq("WITH", "UNIQUE"): 5330 unique_keys = True 5331 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5332 unique_keys = False 5333 5334 self._match_text_seq("KEYS") 5335 5336 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5337 self._parse_type() 5338 ) 5339 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5340 5341 return self.expression( 5342 exp.JSONObjectAgg if agg else exp.JSONObject, 5343 expressions=expressions, 5344 null_handling=null_handling, 5345 unique_keys=unique_keys, 5346 return_type=return_type, 5347 encoding=encoding, 5348 ) 5349 5350 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5351 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5352 if not self._match_text_seq("NESTED"): 5353 this = self._parse_id_var() 5354 kind = self._parse_types(allow_identifiers=False) 5355 nested = None 5356 else: 5357 this = None 5358 kind = None 5359 nested = True 5360 5361 path = self._match_text_seq("PATH") and self._parse_string() 5362 nested_schema = nested and self._parse_json_schema() 5363 5364 return self.expression( 5365 exp.JSONColumnDef, 5366 this=this, 5367 kind=kind, 5368 path=path, 5369 nested_schema=nested_schema, 5370 ) 5371 5372 def _parse_json_schema(self) -> exp.JSONSchema: 5373 self._match_text_seq("COLUMNS") 5374 return self.expression( 5375 exp.JSONSchema, 5376 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5377 ) 5378 5379 def _parse_json_table(self) -> exp.JSONTable: 5380 this = self._parse_format_json(self._parse_bitwise()) 5381 path = self._match(TokenType.COMMA) and self._parse_string() 5382 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5383 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5384 schema = self._parse_json_schema() 5385 5386 return exp.JSONTable( 5387 this=this, 5388 schema=schema, 5389 path=path, 5390 error_handling=error_handling, 5391 empty_handling=empty_handling, 5392 ) 5393 5394 def _parse_match_against(self) -> exp.MatchAgainst: 5395 expressions = self._parse_csv(self._parse_column) 5396 5397 self._match_text_seq(")", "AGAINST", "(") 5398 5399 this = self._parse_string() 5400 5401 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5402 modifier = "IN NATURAL LANGUAGE MODE" 5403 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5404 modifier = f"{modifier} WITH QUERY EXPANSION" 5405 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5406 modifier = "IN BOOLEAN MODE" 5407 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5408 modifier = "WITH QUERY EXPANSION" 5409 else: 5410 modifier = None 5411 5412 return self.expression( 5413 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5414 ) 5415 5416 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5417 def _parse_open_json(self) -> exp.OpenJSON: 5418 this = self._parse_bitwise() 5419 path = self._match(TokenType.COMMA) and self._parse_string() 5420 5421 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5422 this = self._parse_field(any_token=True) 5423 kind = self._parse_types() 5424 path = self._parse_string() 5425 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5426 5427 return self.expression( 5428 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5429 ) 5430 5431 expressions = None 5432 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5433 self._match_l_paren() 5434 expressions = self._parse_csv(_parse_open_json_column_def) 5435 5436 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5437 5438 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5439 args = self._parse_csv(self._parse_bitwise) 5440 5441 if self._match(TokenType.IN): 5442 return self.expression( 5443 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5444 ) 5445 5446 if haystack_first: 5447 haystack = seq_get(args, 0) 5448 needle = seq_get(args, 1) 5449 else: 5450 needle = seq_get(args, 0) 5451 haystack = seq_get(args, 1) 5452 5453 return self.expression( 5454 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5455 ) 5456 5457 def _parse_predict(self) -> exp.Predict: 5458 self._match_text_seq("MODEL") 5459 this = self._parse_table() 5460 5461 self._match(TokenType.COMMA) 5462 self._match_text_seq("TABLE") 5463 5464 return self.expression( 5465 exp.Predict, 5466 this=this, 5467 expression=self._parse_table(), 5468 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5469 ) 5470 5471 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5472 args = self._parse_csv(self._parse_table) 5473 return exp.JoinHint(this=func_name.upper(), expressions=args) 5474 5475 def _parse_substring(self) -> exp.Substring: 5476 # Postgres supports the form: substring(string [from int] [for int]) 5477 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5478 5479 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5480 5481 if self._match(TokenType.FROM): 5482 args.append(self._parse_bitwise()) 5483 if self._match(TokenType.FOR): 5484 if len(args) == 1: 5485 args.append(exp.Literal.number(1)) 5486 args.append(self._parse_bitwise()) 5487 5488 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5489 5490 def _parse_trim(self) -> exp.Trim: 5491 # https://www.w3resource.com/sql/character-functions/trim.php 5492 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5493 5494 position = None 5495 collation = None 5496 expression = None 5497 5498 if self._match_texts(self.TRIM_TYPES): 5499 position = self._prev.text.upper() 5500 5501 this = self._parse_bitwise() 5502 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5503 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5504 expression = self._parse_bitwise() 5505 5506 if invert_order: 5507 this, expression = expression, this 5508 5509 if self._match(TokenType.COLLATE): 5510 collation = self._parse_bitwise() 5511 5512 return self.expression( 5513 exp.Trim, this=this, position=position, expression=expression, collation=collation 5514 ) 5515 5516 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5517 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5518 5519 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5520 return self._parse_window(self._parse_id_var(), alias=True) 5521 5522 def _parse_respect_or_ignore_nulls( 5523 self, this: t.Optional[exp.Expression] 5524 ) -> t.Optional[exp.Expression]: 5525 if self._match_text_seq("IGNORE", "NULLS"): 5526 return self.expression(exp.IgnoreNulls, this=this) 5527 if self._match_text_seq("RESPECT", "NULLS"): 5528 return self.expression(exp.RespectNulls, this=this) 5529 return this 5530 5531 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5532 if self._match(TokenType.HAVING): 5533 self._match_texts(("MAX", "MIN")) 5534 max = self._prev.text.upper() != "MIN" 5535 return self.expression( 5536 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5537 ) 5538 5539 return this 5540 5541 def _parse_window( 5542 self, this: t.Optional[exp.Expression], alias: bool = False 5543 ) -> t.Optional[exp.Expression]: 5544 func = this 5545 comments = func.comments if isinstance(func, exp.Expression) else None 5546 5547 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5548 self._match(TokenType.WHERE) 5549 this = self.expression( 5550 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5551 ) 5552 self._match_r_paren() 5553 5554 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5555 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5556 if self._match_text_seq("WITHIN", "GROUP"): 5557 order = self._parse_wrapped(self._parse_order) 5558 this = self.expression(exp.WithinGroup, this=this, expression=order) 5559 5560 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5561 # Some dialects choose to implement and some do not. 5562 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5563 5564 # There is some code above in _parse_lambda that handles 5565 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5566 5567 # The below changes handle 5568 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5569 5570 # Oracle allows both formats 5571 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5572 # and Snowflake chose to do the same for familiarity 5573 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5574 if isinstance(this, exp.AggFunc): 5575 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5576 5577 if ignore_respect and ignore_respect is not this: 5578 ignore_respect.replace(ignore_respect.this) 5579 this = self.expression(ignore_respect.__class__, this=this) 5580 5581 this = self._parse_respect_or_ignore_nulls(this) 5582 5583 # bigquery select from window x AS (partition by ...) 5584 if alias: 5585 over = None 5586 self._match(TokenType.ALIAS) 5587 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5588 return this 5589 else: 5590 over = self._prev.text.upper() 5591 5592 if comments and isinstance(func, exp.Expression): 5593 func.pop_comments() 5594 5595 if not self._match(TokenType.L_PAREN): 5596 return self.expression( 5597 exp.Window, 5598 comments=comments, 5599 this=this, 5600 alias=self._parse_id_var(False), 5601 over=over, 5602 ) 5603 5604 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5605 5606 first = self._match(TokenType.FIRST) 5607 if self._match_text_seq("LAST"): 5608 first = False 5609 5610 partition, order = self._parse_partition_and_order() 5611 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5612 5613 if kind: 5614 self._match(TokenType.BETWEEN) 5615 start = self._parse_window_spec() 5616 self._match(TokenType.AND) 5617 end = self._parse_window_spec() 5618 5619 spec = self.expression( 5620 exp.WindowSpec, 5621 kind=kind, 5622 start=start["value"], 5623 start_side=start["side"], 5624 end=end["value"], 5625 end_side=end["side"], 5626 ) 5627 else: 5628 spec = None 5629 5630 self._match_r_paren() 5631 5632 window = self.expression( 5633 exp.Window, 5634 comments=comments, 5635 this=this, 5636 partition_by=partition, 5637 order=order, 5638 spec=spec, 5639 alias=window_alias, 5640 over=over, 5641 first=first, 5642 ) 5643 5644 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5645 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5646 return self._parse_window(window, alias=alias) 5647 5648 return window 5649 5650 def _parse_partition_and_order( 5651 self, 5652 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5653 return self._parse_partition_by(), self._parse_order() 5654 5655 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5656 self._match(TokenType.BETWEEN) 5657 5658 return { 5659 "value": ( 5660 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5661 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5662 or self._parse_bitwise() 5663 ), 5664 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5665 } 5666 5667 def _parse_alias( 5668 self, this: t.Optional[exp.Expression], explicit: bool = False 5669 ) -> t.Optional[exp.Expression]: 5670 any_token = self._match(TokenType.ALIAS) 5671 comments = self._prev_comments or [] 5672 5673 if explicit and not any_token: 5674 return this 5675 5676 if self._match(TokenType.L_PAREN): 5677 aliases = self.expression( 5678 exp.Aliases, 5679 comments=comments, 5680 this=this, 5681 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5682 ) 5683 self._match_r_paren(aliases) 5684 return aliases 5685 5686 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5687 self.STRING_ALIASES and self._parse_string_as_identifier() 5688 ) 5689 5690 if alias: 5691 comments.extend(alias.pop_comments()) 5692 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5693 column = this.this 5694 5695 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5696 if not this.comments and column and column.comments: 5697 this.comments = column.pop_comments() 5698 5699 return this 5700 5701 def _parse_id_var( 5702 self, 5703 any_token: bool = True, 5704 tokens: t.Optional[t.Collection[TokenType]] = None, 5705 ) -> t.Optional[exp.Expression]: 5706 expression = self._parse_identifier() 5707 if not expression and ( 5708 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5709 ): 5710 quoted = self._prev.token_type == TokenType.STRING 5711 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5712 5713 return expression 5714 5715 def _parse_string(self) -> t.Optional[exp.Expression]: 5716 if self._match_set(self.STRING_PARSERS): 5717 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5718 return self._parse_placeholder() 5719 5720 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5721 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5722 5723 def _parse_number(self) -> t.Optional[exp.Expression]: 5724 if self._match_set(self.NUMERIC_PARSERS): 5725 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5726 return self._parse_placeholder() 5727 5728 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5729 if self._match(TokenType.IDENTIFIER): 5730 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5731 return self._parse_placeholder() 5732 5733 def _parse_var( 5734 self, 5735 any_token: bool = False, 5736 tokens: t.Optional[t.Collection[TokenType]] = None, 5737 upper: bool = False, 5738 ) -> t.Optional[exp.Expression]: 5739 if ( 5740 (any_token and self._advance_any()) 5741 or self._match(TokenType.VAR) 5742 or (self._match_set(tokens) if tokens else False) 5743 ): 5744 return self.expression( 5745 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5746 ) 5747 return self._parse_placeholder() 5748 5749 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5750 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5751 self._advance() 5752 return self._prev 5753 return None 5754 5755 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5756 return self._parse_var() or self._parse_string() 5757 5758 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5759 return self._parse_primary() or self._parse_var(any_token=True) 5760 5761 def _parse_null(self) -> t.Optional[exp.Expression]: 5762 if self._match_set(self.NULL_TOKENS): 5763 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5764 return self._parse_placeholder() 5765 5766 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5767 if self._match(TokenType.TRUE): 5768 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5769 if self._match(TokenType.FALSE): 5770 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5771 return self._parse_placeholder() 5772 5773 def _parse_star(self) -> t.Optional[exp.Expression]: 5774 if self._match(TokenType.STAR): 5775 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5776 return self._parse_placeholder() 5777 5778 def _parse_parameter(self) -> exp.Parameter: 5779 this = self._parse_identifier() or self._parse_primary_or_var() 5780 return self.expression(exp.Parameter, this=this) 5781 5782 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5783 if self._match_set(self.PLACEHOLDER_PARSERS): 5784 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5785 if placeholder: 5786 return placeholder 5787 self._advance(-1) 5788 return None 5789 5790 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5791 if not self._match_texts(keywords): 5792 return None 5793 if self._match(TokenType.L_PAREN, advance=False): 5794 return self._parse_wrapped_csv(self._parse_expression) 5795 5796 expression = self._parse_expression() 5797 return [expression] if expression else None 5798 5799 def _parse_csv( 5800 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5801 ) -> t.List[exp.Expression]: 5802 parse_result = parse_method() 5803 items = [parse_result] if parse_result is not None else [] 5804 5805 while self._match(sep): 5806 self._add_comments(parse_result) 5807 parse_result = parse_method() 5808 if parse_result is not None: 5809 items.append(parse_result) 5810 5811 return items 5812 5813 def _parse_tokens( 5814 self, parse_method: t.Callable, expressions: t.Dict 5815 ) -> t.Optional[exp.Expression]: 5816 this = parse_method() 5817 5818 while self._match_set(expressions): 5819 this = self.expression( 5820 expressions[self._prev.token_type], 5821 this=this, 5822 comments=self._prev_comments, 5823 expression=parse_method(), 5824 ) 5825 5826 return this 5827 5828 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5829 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5830 5831 def _parse_wrapped_csv( 5832 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5833 ) -> t.List[exp.Expression]: 5834 return self._parse_wrapped( 5835 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5836 ) 5837 5838 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5839 wrapped = self._match(TokenType.L_PAREN) 5840 if not wrapped and not optional: 5841 self.raise_error("Expecting (") 5842 parse_result = parse_method() 5843 if wrapped: 5844 self._match_r_paren() 5845 return parse_result 5846 5847 def _parse_expressions(self) -> t.List[exp.Expression]: 5848 return self._parse_csv(self._parse_expression) 5849 5850 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5851 return self._parse_select() or self._parse_set_operations( 5852 self._parse_expression() if alias else self._parse_conjunction() 5853 ) 5854 5855 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5856 return self._parse_query_modifiers( 5857 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5858 ) 5859 5860 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5861 this = None 5862 if self._match_texts(self.TRANSACTION_KIND): 5863 this = self._prev.text 5864 5865 self._match_texts(("TRANSACTION", "WORK")) 5866 5867 modes = [] 5868 while True: 5869 mode = [] 5870 while self._match(TokenType.VAR): 5871 mode.append(self._prev.text) 5872 5873 if mode: 5874 modes.append(" ".join(mode)) 5875 if not self._match(TokenType.COMMA): 5876 break 5877 5878 return self.expression(exp.Transaction, this=this, modes=modes) 5879 5880 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5881 chain = None 5882 savepoint = None 5883 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5884 5885 self._match_texts(("TRANSACTION", "WORK")) 5886 5887 if self._match_text_seq("TO"): 5888 self._match_text_seq("SAVEPOINT") 5889 savepoint = self._parse_id_var() 5890 5891 if self._match(TokenType.AND): 5892 chain = not self._match_text_seq("NO") 5893 self._match_text_seq("CHAIN") 5894 5895 if is_rollback: 5896 return self.expression(exp.Rollback, savepoint=savepoint) 5897 5898 return self.expression(exp.Commit, chain=chain) 5899 5900 def _parse_refresh(self) -> exp.Refresh: 5901 self._match(TokenType.TABLE) 5902 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5903 5904 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5905 if not self._match_text_seq("ADD"): 5906 return None 5907 5908 self._match(TokenType.COLUMN) 5909 exists_column = self._parse_exists(not_=True) 5910 expression = self._parse_field_def() 5911 5912 if expression: 5913 expression.set("exists", exists_column) 5914 5915 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5916 if self._match_texts(("FIRST", "AFTER")): 5917 position = self._prev.text 5918 column_position = self.expression( 5919 exp.ColumnPosition, this=self._parse_column(), position=position 5920 ) 5921 expression.set("position", column_position) 5922 5923 return expression 5924 5925 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5926 drop = self._match(TokenType.DROP) and self._parse_drop() 5927 if drop and not isinstance(drop, exp.Command): 5928 drop.set("kind", drop.args.get("kind", "COLUMN")) 5929 return drop 5930 5931 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5932 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5933 return self.expression( 5934 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5935 ) 5936 5937 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5938 index = self._index - 1 5939 5940 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5941 return self._parse_csv( 5942 lambda: self.expression( 5943 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5944 ) 5945 ) 5946 5947 self._retreat(index) 5948 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5949 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5950 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5951 5952 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5953 if self._match_texts(self.ALTER_ALTER_PARSERS): 5954 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5955 5956 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5957 # keyword after ALTER we default to parsing this statement 5958 self._match(TokenType.COLUMN) 5959 column = self._parse_field(any_token=True) 5960 5961 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5962 return self.expression(exp.AlterColumn, this=column, drop=True) 5963 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5964 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5965 if self._match(TokenType.COMMENT): 5966 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5967 5968 self._match_text_seq("SET", "DATA") 5969 self._match_text_seq("TYPE") 5970 return self.expression( 5971 exp.AlterColumn, 5972 this=column, 5973 dtype=self._parse_types(), 5974 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5975 using=self._match(TokenType.USING) and self._parse_conjunction(), 5976 ) 5977 5978 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5979 if self._match_texts(("ALL", "EVEN", "AUTO")): 5980 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5981 5982 self._match_text_seq("KEY", "DISTKEY") 5983 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5984 5985 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5986 if compound: 5987 self._match_text_seq("SORTKEY") 5988 5989 if self._match(TokenType.L_PAREN, advance=False): 5990 return self.expression( 5991 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5992 ) 5993 5994 self._match_texts(("AUTO", "NONE")) 5995 return self.expression( 5996 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5997 ) 5998 5999 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6000 index = self._index - 1 6001 6002 partition_exists = self._parse_exists() 6003 if self._match(TokenType.PARTITION, advance=False): 6004 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6005 6006 self._retreat(index) 6007 return self._parse_csv(self._parse_drop_column) 6008 6009 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6010 if self._match(TokenType.COLUMN): 6011 exists = self._parse_exists() 6012 old_column = self._parse_column() 6013 to = self._match_text_seq("TO") 6014 new_column = self._parse_column() 6015 6016 if old_column is None or to is None or new_column is None: 6017 return None 6018 6019 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6020 6021 self._match_text_seq("TO") 6022 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6023 6024 def _parse_alter_table_set(self) -> exp.AlterSet: 6025 alter_set = self.expression(exp.AlterSet) 6026 6027 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6028 "TABLE", "PROPERTIES" 6029 ): 6030 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6031 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6032 alter_set.set("expressions", [self._parse_conjunction()]) 6033 elif self._match_texts(("LOGGED", "UNLOGGED")): 6034 alter_set.set("option", exp.var(self._prev.text.upper())) 6035 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6036 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6037 elif self._match_text_seq("LOCATION"): 6038 alter_set.set("location", self._parse_field()) 6039 elif self._match_text_seq("ACCESS", "METHOD"): 6040 alter_set.set("access_method", self._parse_field()) 6041 elif self._match_text_seq("TABLESPACE"): 6042 alter_set.set("tablespace", self._parse_field()) 6043 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6044 alter_set.set("file_format", [self._parse_field()]) 6045 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6046 alter_set.set("file_format", self._parse_wrapped_options()) 6047 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6048 alter_set.set("copy_options", self._parse_wrapped_options()) 6049 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6050 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6051 else: 6052 if self._match_text_seq("SERDE"): 6053 alter_set.set("serde", self._parse_field()) 6054 6055 alter_set.set("expressions", [self._parse_properties()]) 6056 6057 return alter_set 6058 6059 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6060 start = self._prev 6061 6062 if not self._match(TokenType.TABLE): 6063 return self._parse_as_command(start) 6064 6065 exists = self._parse_exists() 6066 only = self._match_text_seq("ONLY") 6067 this = self._parse_table(schema=True) 6068 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6069 6070 if self._next: 6071 self._advance() 6072 6073 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6074 if parser: 6075 actions = ensure_list(parser(self)) 6076 options = self._parse_csv(self._parse_property) 6077 6078 if not self._curr and actions: 6079 return self.expression( 6080 exp.AlterTable, 6081 this=this, 6082 exists=exists, 6083 actions=actions, 6084 only=only, 6085 options=options, 6086 cluster=cluster, 6087 ) 6088 6089 return self._parse_as_command(start) 6090 6091 def _parse_merge(self) -> exp.Merge: 6092 self._match(TokenType.INTO) 6093 target = self._parse_table() 6094 6095 if target and self._match(TokenType.ALIAS, advance=False): 6096 target.set("alias", self._parse_table_alias()) 6097 6098 self._match(TokenType.USING) 6099 using = self._parse_table() 6100 6101 self._match(TokenType.ON) 6102 on = self._parse_conjunction() 6103 6104 return self.expression( 6105 exp.Merge, 6106 this=target, 6107 using=using, 6108 on=on, 6109 expressions=self._parse_when_matched(), 6110 ) 6111 6112 def _parse_when_matched(self) -> t.List[exp.When]: 6113 whens = [] 6114 6115 while self._match(TokenType.WHEN): 6116 matched = not self._match(TokenType.NOT) 6117 self._match_text_seq("MATCHED") 6118 source = ( 6119 False 6120 if self._match_text_seq("BY", "TARGET") 6121 else self._match_text_seq("BY", "SOURCE") 6122 ) 6123 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6124 6125 self._match(TokenType.THEN) 6126 6127 if self._match(TokenType.INSERT): 6128 _this = self._parse_star() 6129 if _this: 6130 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6131 else: 6132 then = self.expression( 6133 exp.Insert, 6134 this=self._parse_value(), 6135 expression=self._match_text_seq("VALUES") and self._parse_value(), 6136 ) 6137 elif self._match(TokenType.UPDATE): 6138 expressions = self._parse_star() 6139 if expressions: 6140 then = self.expression(exp.Update, expressions=expressions) 6141 else: 6142 then = self.expression( 6143 exp.Update, 6144 expressions=self._match(TokenType.SET) 6145 and self._parse_csv(self._parse_equality), 6146 ) 6147 elif self._match(TokenType.DELETE): 6148 then = self.expression(exp.Var, this=self._prev.text) 6149 else: 6150 then = None 6151 6152 whens.append( 6153 self.expression( 6154 exp.When, 6155 matched=matched, 6156 source=source, 6157 condition=condition, 6158 then=then, 6159 ) 6160 ) 6161 return whens 6162 6163 def _parse_show(self) -> t.Optional[exp.Expression]: 6164 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6165 if parser: 6166 return parser(self) 6167 return self._parse_as_command(self._prev) 6168 6169 def _parse_set_item_assignment( 6170 self, kind: t.Optional[str] = None 6171 ) -> t.Optional[exp.Expression]: 6172 index = self._index 6173 6174 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6175 return self._parse_set_transaction(global_=kind == "GLOBAL") 6176 6177 left = self._parse_primary() or self._parse_column() 6178 assignment_delimiter = self._match_texts(("=", "TO")) 6179 6180 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6181 self._retreat(index) 6182 return None 6183 6184 right = self._parse_statement() or self._parse_id_var() 6185 this = self.expression(exp.EQ, this=left, expression=right) 6186 6187 return self.expression(exp.SetItem, this=this, kind=kind) 6188 6189 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6190 self._match_text_seq("TRANSACTION") 6191 characteristics = self._parse_csv( 6192 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6193 ) 6194 return self.expression( 6195 exp.SetItem, 6196 expressions=characteristics, 6197 kind="TRANSACTION", 6198 **{"global": global_}, # type: ignore 6199 ) 6200 6201 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6202 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6203 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6204 6205 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6206 index = self._index 6207 set_ = self.expression( 6208 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6209 ) 6210 6211 if self._curr: 6212 self._retreat(index) 6213 return self._parse_as_command(self._prev) 6214 6215 return set_ 6216 6217 def _parse_var_from_options( 6218 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6219 ) -> t.Optional[exp.Var]: 6220 start = self._curr 6221 if not start: 6222 return None 6223 6224 option = start.text.upper() 6225 continuations = options.get(option) 6226 6227 index = self._index 6228 self._advance() 6229 for keywords in continuations or []: 6230 if isinstance(keywords, str): 6231 keywords = (keywords,) 6232 6233 if self._match_text_seq(*keywords): 6234 option = f"{option} {' '.join(keywords)}" 6235 break 6236 else: 6237 if continuations or continuations is None: 6238 if raise_unmatched: 6239 self.raise_error(f"Unknown option {option}") 6240 6241 self._retreat(index) 6242 return None 6243 6244 return exp.var(option) 6245 6246 def _parse_as_command(self, start: Token) -> exp.Command: 6247 while self._curr: 6248 self._advance() 6249 text = self._find_sql(start, self._prev) 6250 size = len(start.text) 6251 self._warn_unsupported() 6252 return exp.Command(this=text[:size], expression=text[size:]) 6253 6254 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6255 settings = [] 6256 6257 self._match_l_paren() 6258 kind = self._parse_id_var() 6259 6260 if self._match(TokenType.L_PAREN): 6261 while True: 6262 key = self._parse_id_var() 6263 value = self._parse_primary() 6264 6265 if not key and value is None: 6266 break 6267 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6268 self._match(TokenType.R_PAREN) 6269 6270 self._match_r_paren() 6271 6272 return self.expression( 6273 exp.DictProperty, 6274 this=this, 6275 kind=kind.this if kind else None, 6276 settings=settings, 6277 ) 6278 6279 def _parse_dict_range(self, this: str) -> exp.DictRange: 6280 self._match_l_paren() 6281 has_min = self._match_text_seq("MIN") 6282 if has_min: 6283 min = self._parse_var() or self._parse_primary() 6284 self._match_text_seq("MAX") 6285 max = self._parse_var() or self._parse_primary() 6286 else: 6287 max = self._parse_var() or self._parse_primary() 6288 min = exp.Literal.number(0) 6289 self._match_r_paren() 6290 return self.expression(exp.DictRange, this=this, min=min, max=max) 6291 6292 def _parse_comprehension( 6293 self, this: t.Optional[exp.Expression] 6294 ) -> t.Optional[exp.Comprehension]: 6295 index = self._index 6296 expression = self._parse_column() 6297 if not self._match(TokenType.IN): 6298 self._retreat(index - 1) 6299 return None 6300 iterator = self._parse_column() 6301 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6302 return self.expression( 6303 exp.Comprehension, 6304 this=this, 6305 expression=expression, 6306 iterator=iterator, 6307 condition=condition, 6308 ) 6309 6310 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6311 if self._match(TokenType.HEREDOC_STRING): 6312 return self.expression(exp.Heredoc, this=self._prev.text) 6313 6314 if not self._match_text_seq("$"): 6315 return None 6316 6317 tags = ["$"] 6318 tag_text = None 6319 6320 if self._is_connected(): 6321 self._advance() 6322 tags.append(self._prev.text.upper()) 6323 else: 6324 self.raise_error("No closing $ found") 6325 6326 if tags[-1] != "$": 6327 if self._is_connected() and self._match_text_seq("$"): 6328 tag_text = tags[-1] 6329 tags.append("$") 6330 else: 6331 self.raise_error("No closing $ found") 6332 6333 heredoc_start = self._curr 6334 6335 while self._curr: 6336 if self._match_text_seq(*tags, advance=False): 6337 this = self._find_sql(heredoc_start, self._prev) 6338 self._advance(len(tags)) 6339 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6340 6341 self._advance() 6342 6343 self.raise_error(f"No closing {''.join(tags)} found") 6344 return None 6345 6346 def _find_parser( 6347 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6348 ) -> t.Optional[t.Callable]: 6349 if not self._curr: 6350 return None 6351 6352 index = self._index 6353 this = [] 6354 while True: 6355 # The current token might be multiple words 6356 curr = self._curr.text.upper() 6357 key = curr.split(" ") 6358 this.append(curr) 6359 6360 self._advance() 6361 result, trie = in_trie(trie, key) 6362 if result == TrieResult.FAILED: 6363 break 6364 6365 if result == TrieResult.EXISTS: 6366 subparser = parsers[" ".join(this)] 6367 return subparser 6368 6369 self._retreat(index) 6370 return None 6371 6372 def _match(self, token_type, advance=True, expression=None): 6373 if not self._curr: 6374 return None 6375 6376 if self._curr.token_type == token_type: 6377 if advance: 6378 self._advance() 6379 self._add_comments(expression) 6380 return True 6381 6382 return None 6383 6384 def _match_set(self, types, advance=True): 6385 if not self._curr: 6386 return None 6387 6388 if self._curr.token_type in types: 6389 if advance: 6390 self._advance() 6391 return True 6392 6393 return None 6394 6395 def _match_pair(self, token_type_a, token_type_b, advance=True): 6396 if not self._curr or not self._next: 6397 return None 6398 6399 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6400 if advance: 6401 self._advance(2) 6402 return True 6403 6404 return None 6405 6406 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6407 if not self._match(TokenType.L_PAREN, expression=expression): 6408 self.raise_error("Expecting (") 6409 6410 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6411 if not self._match(TokenType.R_PAREN, expression=expression): 6412 self.raise_error("Expecting )") 6413 6414 def _match_texts(self, texts, advance=True): 6415 if self._curr and self._curr.text.upper() in texts: 6416 if advance: 6417 self._advance() 6418 return True 6419 return None 6420 6421 def _match_text_seq(self, *texts, advance=True): 6422 index = self._index 6423 for text in texts: 6424 if self._curr and self._curr.text.upper() == text: 6425 self._advance() 6426 else: 6427 self._retreat(index) 6428 return None 6429 6430 if not advance: 6431 self._retreat(index) 6432 6433 return True 6434 6435 def _replace_lambda( 6436 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6437 ) -> t.Optional[exp.Expression]: 6438 if not node: 6439 return node 6440 6441 for column in node.find_all(exp.Column): 6442 if column.parts[0].name in lambda_variables: 6443 dot_or_id = column.to_dot() if column.table else column.this 6444 parent = column.parent 6445 6446 while isinstance(parent, exp.Dot): 6447 if not isinstance(parent.parent, exp.Dot): 6448 parent.replace(dot_or_id) 6449 break 6450 parent = parent.parent 6451 else: 6452 if column is node: 6453 node = dot_or_id 6454 else: 6455 column.replace(dot_or_id) 6456 return node 6457 6458 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6459 start = self._prev 6460 6461 # Not to be confused with TRUNCATE(number, decimals) function call 6462 if self._match(TokenType.L_PAREN): 6463 self._retreat(self._index - 2) 6464 return self._parse_function() 6465 6466 # Clickhouse supports TRUNCATE DATABASE as well 6467 is_database = self._match(TokenType.DATABASE) 6468 6469 self._match(TokenType.TABLE) 6470 6471 exists = self._parse_exists(not_=False) 6472 6473 expressions = self._parse_csv( 6474 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6475 ) 6476 6477 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6478 6479 if self._match_text_seq("RESTART", "IDENTITY"): 6480 identity = "RESTART" 6481 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6482 identity = "CONTINUE" 6483 else: 6484 identity = None 6485 6486 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6487 option = self._prev.text 6488 else: 6489 option = None 6490 6491 partition = self._parse_partition() 6492 6493 # Fallback case 6494 if self._curr: 6495 return self._parse_as_command(start) 6496 6497 return self.expression( 6498 exp.TruncateTable, 6499 expressions=expressions, 6500 is_database=is_database, 6501 exists=exists, 6502 cluster=cluster, 6503 identity=identity, 6504 option=option, 6505 partition=partition, 6506 ) 6507 6508 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6509 this = self._parse_ordered(self._parse_opclass) 6510 6511 if not self._match(TokenType.WITH): 6512 return this 6513 6514 op = self._parse_var(any_token=True) 6515 6516 return self.expression(exp.WithOperator, this=this, op=op) 6517 6518 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6519 opts = [] 6520 self._match(TokenType.EQ) 6521 self._match(TokenType.L_PAREN) 6522 while self._curr and not self._match(TokenType.R_PAREN): 6523 opts.append(self._parse_conjunction()) 6524 self._match(TokenType.COMMA) 6525 return opts 6526 6527 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6528 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6529 6530 options = [] 6531 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6532 option = self._parse_unquoted_field() 6533 value = None 6534 6535 # Some options are defined as functions with the values as params 6536 if not isinstance(option, exp.Func): 6537 prev = self._prev.text.upper() 6538 # Different dialects might separate options and values by white space, "=" and "AS" 6539 self._match(TokenType.EQ) 6540 self._match(TokenType.ALIAS) 6541 6542 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6543 # Snowflake FILE_FORMAT case 6544 value = self._parse_wrapped_options() 6545 else: 6546 value = self._parse_unquoted_field() 6547 6548 param = self.expression(exp.CopyParameter, this=option, expression=value) 6549 options.append(param) 6550 6551 if sep: 6552 self._match(sep) 6553 6554 return options 6555 6556 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6557 expr = self.expression(exp.Credentials) 6558 6559 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6560 expr.set("storage", self._parse_conjunction()) 6561 if self._match_text_seq("CREDENTIALS"): 6562 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6563 creds = ( 6564 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6565 ) 6566 expr.set("credentials", creds) 6567 if self._match_text_seq("ENCRYPTION"): 6568 expr.set("encryption", self._parse_wrapped_options()) 6569 if self._match_text_seq("IAM_ROLE"): 6570 expr.set("iam_role", self._parse_field()) 6571 if self._match_text_seq("REGION"): 6572 expr.set("region", self._parse_field()) 6573 6574 return expr 6575 6576 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6577 return self._parse_field() 6578 6579 def _parse_copy(self) -> exp.Copy | exp.Command: 6580 start = self._prev 6581 6582 self._match(TokenType.INTO) 6583 6584 this = ( 6585 self._parse_conjunction() 6586 if self._match(TokenType.L_PAREN, advance=False) 6587 else self._parse_table(schema=True) 6588 ) 6589 6590 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6591 6592 files = self._parse_csv(self._parse_file_location) 6593 credentials = self._parse_credentials() 6594 6595 self._match_text_seq("WITH") 6596 6597 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6598 6599 # Fallback case 6600 if self._curr: 6601 return self._parse_as_command(start) 6602 6603 return self.expression( 6604 exp.Copy, 6605 this=this, 6606 kind=kind, 6607 credentials=credentials, 6608 files=files, 6609 params=params, 6610 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
82def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 83 def _builder(args: t.List, dialect: Dialect) -> E: 84 expression = expr_type( 85 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 86 ) 87 if len(args) > 2 and expr_type is exp.JSONExtract: 88 expression.set("expressions", args[2:]) 89 90 return expression 91 92 return _builder
95def build_mod(args: t.List) -> exp.Mod: 96 this = seq_get(args, 0) 97 expression = seq_get(args, 1) 98 99 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 100 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 101 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 102 103 return exp.Mod(this=this, expression=expression)
116class Parser(metaclass=_Parser): 117 """ 118 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 119 120 Args: 121 error_level: The desired error level. 122 Default: ErrorLevel.IMMEDIATE 123 error_message_context: The amount of context to capture from a query string when displaying 124 the error message (in number of characters). 125 Default: 100 126 max_errors: Maximum number of error messages to include in a raised ParseError. 127 This is only relevant if error_level is ErrorLevel.RAISE. 128 Default: 3 129 """ 130 131 FUNCTIONS: t.Dict[str, t.Callable] = { 132 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 133 "CONCAT": lambda args, dialect: exp.Concat( 134 expressions=args, 135 safe=not dialect.STRICT_STRING_CONCAT, 136 coalesce=dialect.CONCAT_COALESCE, 137 ), 138 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "DATE_TO_DATE_STR": lambda args: exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 148 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 149 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 150 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 151 "LIKE": build_like, 152 "LOG": build_logarithm, 153 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 154 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 155 "MOD": build_mod, 156 "TIME_TO_TIME_STR": lambda args: exp.Cast( 157 this=seq_get(args, 0), 158 to=exp.DataType(this=exp.DataType.Type.TEXT), 159 ), 160 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 161 this=exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 start=exp.Literal.number(1), 166 length=exp.Literal.number(10), 167 ), 168 "VAR_MAP": build_var_map, 169 "LOWER": build_lower, 170 "UPPER": build_upper, 171 "HEX": build_hex, 172 "TO_HEX": build_hex, 173 } 174 175 NO_PAREN_FUNCTIONS = { 176 TokenType.CURRENT_DATE: exp.CurrentDate, 177 TokenType.CURRENT_DATETIME: exp.CurrentDate, 178 TokenType.CURRENT_TIME: exp.CurrentTime, 179 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 180 TokenType.CURRENT_USER: exp.CurrentUser, 181 } 182 183 STRUCT_TYPE_TOKENS = { 184 TokenType.NESTED, 185 TokenType.OBJECT, 186 TokenType.STRUCT, 187 } 188 189 NESTED_TYPE_TOKENS = { 190 TokenType.ARRAY, 191 TokenType.LOWCARDINALITY, 192 TokenType.MAP, 193 TokenType.NULLABLE, 194 *STRUCT_TYPE_TOKENS, 195 } 196 197 ENUM_TYPE_TOKENS = { 198 TokenType.ENUM, 199 TokenType.ENUM8, 200 TokenType.ENUM16, 201 } 202 203 AGGREGATE_TYPE_TOKENS = { 204 TokenType.AGGREGATEFUNCTION, 205 TokenType.SIMPLEAGGREGATEFUNCTION, 206 } 207 208 TYPE_TOKENS = { 209 TokenType.BIT, 210 TokenType.BOOLEAN, 211 TokenType.TINYINT, 212 TokenType.UTINYINT, 213 TokenType.SMALLINT, 214 TokenType.USMALLINT, 215 TokenType.INT, 216 TokenType.UINT, 217 TokenType.BIGINT, 218 TokenType.UBIGINT, 219 TokenType.INT128, 220 TokenType.UINT128, 221 TokenType.INT256, 222 TokenType.UINT256, 223 TokenType.MEDIUMINT, 224 TokenType.UMEDIUMINT, 225 TokenType.FIXEDSTRING, 226 TokenType.FLOAT, 227 TokenType.DOUBLE, 228 TokenType.CHAR, 229 TokenType.NCHAR, 230 TokenType.VARCHAR, 231 TokenType.NVARCHAR, 232 TokenType.BPCHAR, 233 TokenType.TEXT, 234 TokenType.MEDIUMTEXT, 235 TokenType.LONGTEXT, 236 TokenType.MEDIUMBLOB, 237 TokenType.LONGBLOB, 238 TokenType.BINARY, 239 TokenType.VARBINARY, 240 TokenType.JSON, 241 TokenType.JSONB, 242 TokenType.INTERVAL, 243 TokenType.TINYBLOB, 244 TokenType.TINYTEXT, 245 TokenType.TIME, 246 TokenType.TIMETZ, 247 TokenType.TIMESTAMP, 248 TokenType.TIMESTAMP_S, 249 TokenType.TIMESTAMP_MS, 250 TokenType.TIMESTAMP_NS, 251 TokenType.TIMESTAMPTZ, 252 TokenType.TIMESTAMPLTZ, 253 TokenType.TIMESTAMPNTZ, 254 TokenType.DATETIME, 255 TokenType.DATETIME64, 256 TokenType.DATE, 257 TokenType.DATE32, 258 TokenType.INT4RANGE, 259 TokenType.INT4MULTIRANGE, 260 TokenType.INT8RANGE, 261 TokenType.INT8MULTIRANGE, 262 TokenType.NUMRANGE, 263 TokenType.NUMMULTIRANGE, 264 TokenType.TSRANGE, 265 TokenType.TSMULTIRANGE, 266 TokenType.TSTZRANGE, 267 TokenType.TSTZMULTIRANGE, 268 TokenType.DATERANGE, 269 TokenType.DATEMULTIRANGE, 270 TokenType.DECIMAL, 271 TokenType.UDECIMAL, 272 TokenType.BIGDECIMAL, 273 TokenType.UUID, 274 TokenType.GEOGRAPHY, 275 TokenType.GEOMETRY, 276 TokenType.HLLSKETCH, 277 TokenType.HSTORE, 278 TokenType.PSEUDO_TYPE, 279 TokenType.SUPER, 280 TokenType.SERIAL, 281 TokenType.SMALLSERIAL, 282 TokenType.BIGSERIAL, 283 TokenType.XML, 284 TokenType.YEAR, 285 TokenType.UNIQUEIDENTIFIER, 286 TokenType.USERDEFINED, 287 TokenType.MONEY, 288 TokenType.SMALLMONEY, 289 TokenType.ROWVERSION, 290 TokenType.IMAGE, 291 TokenType.VARIANT, 292 TokenType.OBJECT, 293 TokenType.OBJECT_IDENTIFIER, 294 TokenType.INET, 295 TokenType.IPADDRESS, 296 TokenType.IPPREFIX, 297 TokenType.IPV4, 298 TokenType.IPV6, 299 TokenType.UNKNOWN, 300 TokenType.NULL, 301 TokenType.NAME, 302 TokenType.TDIGEST, 303 *ENUM_TYPE_TOKENS, 304 *NESTED_TYPE_TOKENS, 305 *AGGREGATE_TYPE_TOKENS, 306 } 307 308 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 309 TokenType.BIGINT: TokenType.UBIGINT, 310 TokenType.INT: TokenType.UINT, 311 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 312 TokenType.SMALLINT: TokenType.USMALLINT, 313 TokenType.TINYINT: TokenType.UTINYINT, 314 TokenType.DECIMAL: TokenType.UDECIMAL, 315 } 316 317 SUBQUERY_PREDICATES = { 318 TokenType.ANY: exp.Any, 319 TokenType.ALL: exp.All, 320 TokenType.EXISTS: exp.Exists, 321 TokenType.SOME: exp.Any, 322 } 323 324 RESERVED_TOKENS = { 325 *Tokenizer.SINGLE_TOKENS.values(), 326 TokenType.SELECT, 327 } - {TokenType.IDENTIFIER} 328 329 DB_CREATABLES = { 330 TokenType.DATABASE, 331 TokenType.DICTIONARY, 332 TokenType.MODEL, 333 TokenType.SCHEMA, 334 TokenType.SEQUENCE, 335 TokenType.STORAGE_INTEGRATION, 336 TokenType.TABLE, 337 TokenType.TAG, 338 TokenType.VIEW, 339 } 340 341 CREATABLES = { 342 TokenType.COLUMN, 343 TokenType.CONSTRAINT, 344 TokenType.FOREIGN_KEY, 345 TokenType.FUNCTION, 346 TokenType.INDEX, 347 TokenType.PROCEDURE, 348 *DB_CREATABLES, 349 } 350 351 # Tokens that can represent identifiers 352 ID_VAR_TOKENS = { 353 TokenType.VAR, 354 TokenType.ANTI, 355 TokenType.APPLY, 356 TokenType.ASC, 357 TokenType.ASOF, 358 TokenType.AUTO_INCREMENT, 359 TokenType.BEGIN, 360 TokenType.BPCHAR, 361 TokenType.CACHE, 362 TokenType.CASE, 363 TokenType.COLLATE, 364 TokenType.COMMAND, 365 TokenType.COMMENT, 366 TokenType.COMMIT, 367 TokenType.CONSTRAINT, 368 TokenType.COPY, 369 TokenType.DEFAULT, 370 TokenType.DELETE, 371 TokenType.DESC, 372 TokenType.DESCRIBE, 373 TokenType.DICTIONARY, 374 TokenType.DIV, 375 TokenType.END, 376 TokenType.EXECUTE, 377 TokenType.ESCAPE, 378 TokenType.FALSE, 379 TokenType.FIRST, 380 TokenType.FILTER, 381 TokenType.FINAL, 382 TokenType.FORMAT, 383 TokenType.FULL, 384 TokenType.IDENTIFIER, 385 TokenType.IS, 386 TokenType.ISNULL, 387 TokenType.INTERVAL, 388 TokenType.KEEP, 389 TokenType.KILL, 390 TokenType.LEFT, 391 TokenType.LOAD, 392 TokenType.MERGE, 393 TokenType.NATURAL, 394 TokenType.NEXT, 395 TokenType.OFFSET, 396 TokenType.OPERATOR, 397 TokenType.ORDINALITY, 398 TokenType.OVERLAPS, 399 TokenType.OVERWRITE, 400 TokenType.PARTITION, 401 TokenType.PERCENT, 402 TokenType.PIVOT, 403 TokenType.PRAGMA, 404 TokenType.RANGE, 405 TokenType.RECURSIVE, 406 TokenType.REFERENCES, 407 TokenType.REFRESH, 408 TokenType.REPLACE, 409 TokenType.RIGHT, 410 TokenType.ROLLUP, 411 TokenType.ROW, 412 TokenType.ROWS, 413 TokenType.SEMI, 414 TokenType.SET, 415 TokenType.SETTINGS, 416 TokenType.SHOW, 417 TokenType.TEMPORARY, 418 TokenType.TOP, 419 TokenType.TRUE, 420 TokenType.TRUNCATE, 421 TokenType.UNIQUE, 422 TokenType.UNPIVOT, 423 TokenType.UPDATE, 424 TokenType.USE, 425 TokenType.VOLATILE, 426 TokenType.WINDOW, 427 *CREATABLES, 428 *SUBQUERY_PREDICATES, 429 *TYPE_TOKENS, 430 *NO_PAREN_FUNCTIONS, 431 } 432 433 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 434 435 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 436 TokenType.ANTI, 437 TokenType.APPLY, 438 TokenType.ASOF, 439 TokenType.FULL, 440 TokenType.LEFT, 441 TokenType.LOCK, 442 TokenType.NATURAL, 443 TokenType.OFFSET, 444 TokenType.RIGHT, 445 TokenType.SEMI, 446 TokenType.WINDOW, 447 } 448 449 ALIAS_TOKENS = ID_VAR_TOKENS 450 451 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 452 453 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 454 455 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 456 457 FUNC_TOKENS = { 458 TokenType.COLLATE, 459 TokenType.COMMAND, 460 TokenType.CURRENT_DATE, 461 TokenType.CURRENT_DATETIME, 462 TokenType.CURRENT_TIMESTAMP, 463 TokenType.CURRENT_TIME, 464 TokenType.CURRENT_USER, 465 TokenType.FILTER, 466 TokenType.FIRST, 467 TokenType.FORMAT, 468 TokenType.GLOB, 469 TokenType.IDENTIFIER, 470 TokenType.INDEX, 471 TokenType.ISNULL, 472 TokenType.ILIKE, 473 TokenType.INSERT, 474 TokenType.LIKE, 475 TokenType.MERGE, 476 TokenType.OFFSET, 477 TokenType.PRIMARY_KEY, 478 TokenType.RANGE, 479 TokenType.REPLACE, 480 TokenType.RLIKE, 481 TokenType.ROW, 482 TokenType.UNNEST, 483 TokenType.VAR, 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.SEQUENCE, 487 TokenType.DATE, 488 TokenType.DATETIME, 489 TokenType.TABLE, 490 TokenType.TIMESTAMP, 491 TokenType.TIMESTAMPTZ, 492 TokenType.TRUNCATE, 493 TokenType.WINDOW, 494 TokenType.XOR, 495 *TYPE_TOKENS, 496 *SUBQUERY_PREDICATES, 497 } 498 499 CONJUNCTION = { 500 TokenType.AND: exp.And, 501 TokenType.OR: exp.Or, 502 } 503 504 EQUALITY = { 505 TokenType.EQ: exp.EQ, 506 TokenType.NEQ: exp.NEQ, 507 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 508 } 509 510 COMPARISON = { 511 TokenType.GT: exp.GT, 512 TokenType.GTE: exp.GTE, 513 TokenType.LT: exp.LT, 514 TokenType.LTE: exp.LTE, 515 } 516 517 BITWISE = { 518 TokenType.AMP: exp.BitwiseAnd, 519 TokenType.CARET: exp.BitwiseXor, 520 TokenType.PIPE: exp.BitwiseOr, 521 } 522 523 TERM = { 524 TokenType.DASH: exp.Sub, 525 TokenType.PLUS: exp.Add, 526 TokenType.MOD: exp.Mod, 527 TokenType.COLLATE: exp.Collate, 528 } 529 530 FACTOR = { 531 TokenType.DIV: exp.IntDiv, 532 TokenType.LR_ARROW: exp.Distance, 533 TokenType.SLASH: exp.Div, 534 TokenType.STAR: exp.Mul, 535 } 536 537 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 538 539 TIMES = { 540 TokenType.TIME, 541 TokenType.TIMETZ, 542 } 543 544 TIMESTAMPS = { 545 TokenType.TIMESTAMP, 546 TokenType.TIMESTAMPTZ, 547 TokenType.TIMESTAMPLTZ, 548 *TIMES, 549 } 550 551 SET_OPERATIONS = { 552 TokenType.UNION, 553 TokenType.INTERSECT, 554 TokenType.EXCEPT, 555 } 556 557 JOIN_METHODS = { 558 TokenType.ASOF, 559 TokenType.NATURAL, 560 TokenType.POSITIONAL, 561 } 562 563 JOIN_SIDES = { 564 TokenType.LEFT, 565 TokenType.RIGHT, 566 TokenType.FULL, 567 } 568 569 JOIN_KINDS = { 570 TokenType.INNER, 571 TokenType.OUTER, 572 TokenType.CROSS, 573 TokenType.SEMI, 574 TokenType.ANTI, 575 } 576 577 JOIN_HINTS: t.Set[str] = set() 578 579 LAMBDAS = { 580 TokenType.ARROW: lambda self, expressions: self.expression( 581 exp.Lambda, 582 this=self._replace_lambda( 583 self._parse_conjunction(), 584 {node.name for node in expressions}, 585 ), 586 expressions=expressions, 587 ), 588 TokenType.FARROW: lambda self, expressions: self.expression( 589 exp.Kwarg, 590 this=exp.var(expressions[0].name), 591 expression=self._parse_conjunction(), 592 ), 593 } 594 595 COLUMN_OPERATORS = { 596 TokenType.DOT: None, 597 TokenType.DCOLON: lambda self, this, to: self.expression( 598 exp.Cast if self.STRICT_CAST else exp.TryCast, 599 this=this, 600 to=to, 601 ), 602 TokenType.ARROW: lambda self, this, path: self.expression( 603 exp.JSONExtract, 604 this=this, 605 expression=self.dialect.to_json_path(path), 606 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 607 ), 608 TokenType.DARROW: lambda self, this, path: self.expression( 609 exp.JSONExtractScalar, 610 this=this, 611 expression=self.dialect.to_json_path(path), 612 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 613 ), 614 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 615 exp.JSONBExtract, 616 this=this, 617 expression=path, 618 ), 619 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 620 exp.JSONBExtractScalar, 621 this=this, 622 expression=path, 623 ), 624 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 625 exp.JSONBContains, 626 this=this, 627 expression=key, 628 ), 629 } 630 631 EXPRESSION_PARSERS = { 632 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 633 exp.Column: lambda self: self._parse_column(), 634 exp.Condition: lambda self: self._parse_conjunction(), 635 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 636 exp.Expression: lambda self: self._parse_expression(), 637 exp.From: lambda self: self._parse_from(joins=True), 638 exp.Group: lambda self: self._parse_group(), 639 exp.Having: lambda self: self._parse_having(), 640 exp.Identifier: lambda self: self._parse_id_var(), 641 exp.Join: lambda self: self._parse_join(), 642 exp.Lambda: lambda self: self._parse_lambda(), 643 exp.Lateral: lambda self: self._parse_lateral(), 644 exp.Limit: lambda self: self._parse_limit(), 645 exp.Offset: lambda self: self._parse_offset(), 646 exp.Order: lambda self: self._parse_order(), 647 exp.Ordered: lambda self: self._parse_ordered(), 648 exp.Properties: lambda self: self._parse_properties(), 649 exp.Qualify: lambda self: self._parse_qualify(), 650 exp.Returning: lambda self: self._parse_returning(), 651 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 652 exp.Table: lambda self: self._parse_table_parts(), 653 exp.TableAlias: lambda self: self._parse_table_alias(), 654 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 655 exp.Where: lambda self: self._parse_where(), 656 exp.Window: lambda self: self._parse_named_window(), 657 exp.With: lambda self: self._parse_with(), 658 "JOIN_TYPE": lambda self: self._parse_join_parts(), 659 } 660 661 STATEMENT_PARSERS = { 662 TokenType.ALTER: lambda self: self._parse_alter(), 663 TokenType.BEGIN: lambda self: self._parse_transaction(), 664 TokenType.CACHE: lambda self: self._parse_cache(), 665 TokenType.COMMENT: lambda self: self._parse_comment(), 666 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 667 TokenType.COPY: lambda self: self._parse_copy(), 668 TokenType.CREATE: lambda self: self._parse_create(), 669 TokenType.DELETE: lambda self: self._parse_delete(), 670 TokenType.DESC: lambda self: self._parse_describe(), 671 TokenType.DESCRIBE: lambda self: self._parse_describe(), 672 TokenType.DROP: lambda self: self._parse_drop(), 673 TokenType.INSERT: lambda self: self._parse_insert(), 674 TokenType.KILL: lambda self: self._parse_kill(), 675 TokenType.LOAD: lambda self: self._parse_load(), 676 TokenType.MERGE: lambda self: self._parse_merge(), 677 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 678 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 679 TokenType.REFRESH: lambda self: self._parse_refresh(), 680 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 681 TokenType.SET: lambda self: self._parse_set(), 682 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 683 TokenType.UNCACHE: lambda self: self._parse_uncache(), 684 TokenType.UPDATE: lambda self: self._parse_update(), 685 TokenType.USE: lambda self: self.expression( 686 exp.Use, 687 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 688 this=self._parse_table(schema=False), 689 ), 690 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 691 } 692 693 UNARY_PARSERS = { 694 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 695 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 696 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 697 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 698 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 699 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 700 } 701 702 STRING_PARSERS = { 703 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 704 exp.RawString, this=token.text 705 ), 706 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 707 exp.National, this=token.text 708 ), 709 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 710 TokenType.STRING: lambda self, token: self.expression( 711 exp.Literal, this=token.text, is_string=True 712 ), 713 TokenType.UNICODE_STRING: lambda self, token: self.expression( 714 exp.UnicodeString, 715 this=token.text, 716 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 717 ), 718 } 719 720 NUMERIC_PARSERS = { 721 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 722 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 723 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 724 TokenType.NUMBER: lambda self, token: self.expression( 725 exp.Literal, this=token.text, is_string=False 726 ), 727 } 728 729 PRIMARY_PARSERS = { 730 **STRING_PARSERS, 731 **NUMERIC_PARSERS, 732 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 733 TokenType.NULL: lambda self, _: self.expression(exp.Null), 734 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 735 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 736 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 737 TokenType.STAR: lambda self, _: self.expression( 738 exp.Star, 739 **{ 740 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 741 "replace": self._parse_star_op("REPLACE"), 742 "rename": self._parse_star_op("RENAME"), 743 }, 744 ), 745 } 746 747 PLACEHOLDER_PARSERS = { 748 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 749 TokenType.PARAMETER: lambda self: self._parse_parameter(), 750 TokenType.COLON: lambda self: ( 751 self.expression(exp.Placeholder, this=self._prev.text) 752 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 753 else None 754 ), 755 } 756 757 RANGE_PARSERS = { 758 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 759 TokenType.GLOB: binary_range_parser(exp.Glob), 760 TokenType.ILIKE: binary_range_parser(exp.ILike), 761 TokenType.IN: lambda self, this: self._parse_in(this), 762 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 763 TokenType.IS: lambda self, this: self._parse_is(this), 764 TokenType.LIKE: binary_range_parser(exp.Like), 765 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 766 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 767 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 768 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 769 } 770 771 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 772 "ALLOWED_VALUES": lambda self: self.expression( 773 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 774 ), 775 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 776 "AUTO": lambda self: self._parse_auto_property(), 777 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 778 "BACKUP": lambda self: self.expression( 779 exp.BackupProperty, this=self._parse_var(any_token=True) 780 ), 781 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 782 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 784 "CHECKSUM": lambda self: self._parse_checksum(), 785 "CLUSTER BY": lambda self: self._parse_cluster(), 786 "CLUSTERED": lambda self: self._parse_clustered_by(), 787 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 788 exp.CollateProperty, **kwargs 789 ), 790 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 791 "CONTAINS": lambda self: self._parse_contains_property(), 792 "COPY": lambda self: self._parse_copy_property(), 793 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 794 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 795 "DEFINER": lambda self: self._parse_definer(), 796 "DETERMINISTIC": lambda self: self.expression( 797 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 798 ), 799 "DISTKEY": lambda self: self._parse_distkey(), 800 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 801 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 802 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 803 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 804 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 805 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 806 "FREESPACE": lambda self: self._parse_freespace(), 807 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 808 "HEAP": lambda self: self.expression(exp.HeapProperty), 809 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 810 "IMMUTABLE": lambda self: self.expression( 811 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 812 ), 813 "INHERITS": lambda self: self.expression( 814 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 815 ), 816 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 817 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 818 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 819 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 820 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 821 "LIKE": lambda self: self._parse_create_like(), 822 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 823 "LOCK": lambda self: self._parse_locking(), 824 "LOCKING": lambda self: self._parse_locking(), 825 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 826 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 827 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 828 "MODIFIES": lambda self: self._parse_modifies_property(), 829 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 830 "NO": lambda self: self._parse_no_property(), 831 "ON": lambda self: self._parse_on_property(), 832 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 833 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 834 "PARTITION": lambda self: self._parse_partitioned_of(), 835 "PARTITION BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 837 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 838 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 839 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 840 "READS": lambda self: self._parse_reads_property(), 841 "REMOTE": lambda self: self._parse_remote_with_connection(), 842 "RETURNS": lambda self: self._parse_returns(), 843 "STRICT": lambda self: self.expression(exp.StrictProperty), 844 "ROW": lambda self: self._parse_row(), 845 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 846 "SAMPLE": lambda self: self.expression( 847 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 848 ), 849 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 850 "SETTINGS": lambda self: self.expression( 851 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 852 ), 853 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 854 "SORTKEY": lambda self: self._parse_sortkey(), 855 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 856 "STABLE": lambda self: self.expression( 857 exp.StabilityProperty, this=exp.Literal.string("STABLE") 858 ), 859 "STORED": lambda self: self._parse_stored(), 860 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 861 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 862 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 863 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 864 "TO": lambda self: self._parse_to_table(), 865 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 866 "TRANSFORM": lambda self: self.expression( 867 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 868 ), 869 "TTL": lambda self: self._parse_ttl(), 870 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 871 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 872 "VOLATILE": lambda self: self._parse_volatile_property(), 873 "WITH": lambda self: self._parse_with_property(), 874 } 875 876 CONSTRAINT_PARSERS = { 877 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 878 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 879 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 880 "CHARACTER SET": lambda self: self.expression( 881 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 882 ), 883 "CHECK": lambda self: self.expression( 884 exp.CheckColumnConstraint, 885 this=self._parse_wrapped(self._parse_conjunction), 886 enforced=self._match_text_seq("ENFORCED"), 887 ), 888 "COLLATE": lambda self: self.expression( 889 exp.CollateColumnConstraint, this=self._parse_var() 890 ), 891 "COMMENT": lambda self: self.expression( 892 exp.CommentColumnConstraint, this=self._parse_string() 893 ), 894 "COMPRESS": lambda self: self._parse_compress(), 895 "CLUSTERED": lambda self: self.expression( 896 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 897 ), 898 "NONCLUSTERED": lambda self: self.expression( 899 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 900 ), 901 "DEFAULT": lambda self: self.expression( 902 exp.DefaultColumnConstraint, this=self._parse_bitwise() 903 ), 904 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 905 "EPHEMERAL": lambda self: self.expression( 906 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 907 ), 908 "EXCLUDE": lambda self: self.expression( 909 exp.ExcludeColumnConstraint, this=self._parse_index_params() 910 ), 911 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 912 "FORMAT": lambda self: self.expression( 913 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 914 ), 915 "GENERATED": lambda self: self._parse_generated_as_identity(), 916 "IDENTITY": lambda self: self._parse_auto_increment(), 917 "INLINE": lambda self: self._parse_inline(), 918 "LIKE": lambda self: self._parse_create_like(), 919 "NOT": lambda self: self._parse_not_constraint(), 920 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 921 "ON": lambda self: ( 922 self._match(TokenType.UPDATE) 923 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 924 ) 925 or self.expression(exp.OnProperty, this=self._parse_id_var()), 926 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 927 "PERIOD": lambda self: self._parse_period_for_system_time(), 928 "PRIMARY KEY": lambda self: self._parse_primary_key(), 929 "REFERENCES": lambda self: self._parse_references(match=False), 930 "TITLE": lambda self: self.expression( 931 exp.TitleColumnConstraint, this=self._parse_var_or_string() 932 ), 933 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 934 "UNIQUE": lambda self: self._parse_unique(), 935 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 936 "WITH": lambda self: self.expression( 937 exp.Properties, expressions=self._parse_wrapped_properties() 938 ), 939 } 940 941 ALTER_PARSERS = { 942 "ADD": lambda self: self._parse_alter_table_add(), 943 "ALTER": lambda self: self._parse_alter_table_alter(), 944 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 945 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 946 "DROP": lambda self: self._parse_alter_table_drop(), 947 "RENAME": lambda self: self._parse_alter_table_rename(), 948 "SET": lambda self: self._parse_alter_table_set(), 949 } 950 951 ALTER_ALTER_PARSERS = { 952 "DISTKEY": lambda self: self._parse_alter_diststyle(), 953 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 954 "SORTKEY": lambda self: self._parse_alter_sortkey(), 955 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 956 } 957 958 SCHEMA_UNNAMED_CONSTRAINTS = { 959 "CHECK", 960 "EXCLUDE", 961 "FOREIGN KEY", 962 "LIKE", 963 "PERIOD", 964 "PRIMARY KEY", 965 "UNIQUE", 966 } 967 968 NO_PAREN_FUNCTION_PARSERS = { 969 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 970 "CASE": lambda self: self._parse_case(), 971 "IF": lambda self: self._parse_if(), 972 "NEXT": lambda self: self._parse_next_value_for(), 973 } 974 975 INVALID_FUNC_NAME_TOKENS = { 976 TokenType.IDENTIFIER, 977 TokenType.STRING, 978 } 979 980 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 981 982 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 983 984 FUNCTION_PARSERS = { 985 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 986 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 987 "DECODE": lambda self: self._parse_decode(), 988 "EXTRACT": lambda self: self._parse_extract(), 989 "JSON_OBJECT": lambda self: self._parse_json_object(), 990 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 991 "JSON_TABLE": lambda self: self._parse_json_table(), 992 "MATCH": lambda self: self._parse_match_against(), 993 "OPENJSON": lambda self: self._parse_open_json(), 994 "POSITION": lambda self: self._parse_position(), 995 "PREDICT": lambda self: self._parse_predict(), 996 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 997 "STRING_AGG": lambda self: self._parse_string_agg(), 998 "SUBSTRING": lambda self: self._parse_substring(), 999 "TRIM": lambda self: self._parse_trim(), 1000 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1001 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1002 } 1003 1004 QUERY_MODIFIER_PARSERS = { 1005 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1006 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1007 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1008 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1009 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1010 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1011 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1012 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1013 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1014 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1015 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1016 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1017 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1018 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1019 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1020 TokenType.CLUSTER_BY: lambda self: ( 1021 "cluster", 1022 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1023 ), 1024 TokenType.DISTRIBUTE_BY: lambda self: ( 1025 "distribute", 1026 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1027 ), 1028 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1029 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1030 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1031 } 1032 1033 SET_PARSERS = { 1034 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1035 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1036 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1037 "TRANSACTION": lambda self: self._parse_set_transaction(), 1038 } 1039 1040 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1041 1042 TYPE_LITERAL_PARSERS = { 1043 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1044 } 1045 1046 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1047 1048 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1049 1050 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1051 1052 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1053 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1054 "ISOLATION": ( 1055 ("LEVEL", "REPEATABLE", "READ"), 1056 ("LEVEL", "READ", "COMMITTED"), 1057 ("LEVEL", "READ", "UNCOMITTED"), 1058 ("LEVEL", "SERIALIZABLE"), 1059 ), 1060 "READ": ("WRITE", "ONLY"), 1061 } 1062 1063 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1064 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1065 ) 1066 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1067 1068 CREATE_SEQUENCE: OPTIONS_TYPE = { 1069 "SCALE": ("EXTEND", "NOEXTEND"), 1070 "SHARD": ("EXTEND", "NOEXTEND"), 1071 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1072 **dict.fromkeys( 1073 ( 1074 "SESSION", 1075 "GLOBAL", 1076 "KEEP", 1077 "NOKEEP", 1078 "ORDER", 1079 "NOORDER", 1080 "NOCACHE", 1081 "CYCLE", 1082 "NOCYCLE", 1083 "NOMINVALUE", 1084 "NOMAXVALUE", 1085 "NOSCALE", 1086 "NOSHARD", 1087 ), 1088 tuple(), 1089 ), 1090 } 1091 1092 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1093 1094 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1095 1096 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1097 1098 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1099 1100 CLONE_KEYWORDS = {"CLONE", "COPY"} 1101 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1102 1103 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1104 1105 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1106 1107 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1108 1109 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1110 1111 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1112 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1113 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1114 1115 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1116 1117 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1118 1119 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1120 1121 DISTINCT_TOKENS = {TokenType.DISTINCT} 1122 1123 NULL_TOKENS = {TokenType.NULL} 1124 1125 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1126 1127 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1128 1129 STRICT_CAST = True 1130 1131 PREFIXED_PIVOT_COLUMNS = False 1132 IDENTIFY_PIVOT_STRINGS = False 1133 1134 LOG_DEFAULTS_TO_LN = False 1135 1136 # Whether ADD is present for each column added by ALTER TABLE 1137 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1138 1139 # Whether the table sample clause expects CSV syntax 1140 TABLESAMPLE_CSV = False 1141 1142 # The default method used for table sampling 1143 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1144 1145 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1146 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1147 1148 # Whether the TRIM function expects the characters to trim as its first argument 1149 TRIM_PATTERN_FIRST = False 1150 1151 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1152 STRING_ALIASES = False 1153 1154 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1155 MODIFIERS_ATTACHED_TO_UNION = True 1156 UNION_MODIFIERS = {"order", "limit", "offset"} 1157 1158 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1159 NO_PAREN_IF_COMMANDS = True 1160 1161 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1162 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1163 1164 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1165 # If this is True and '(' is not found, the keyword will be treated as an identifier 1166 VALUES_FOLLOWED_BY_PAREN = True 1167 1168 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1169 SUPPORTS_IMPLICIT_UNNEST = False 1170 1171 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1172 INTERVAL_SPANS = True 1173 1174 # Whether a PARTITION clause can follow a table reference 1175 SUPPORTS_PARTITION_SELECTION = False 1176 1177 __slots__ = ( 1178 "error_level", 1179 "error_message_context", 1180 "max_errors", 1181 "dialect", 1182 "sql", 1183 "errors", 1184 "_tokens", 1185 "_index", 1186 "_curr", 1187 "_next", 1188 "_prev", 1189 "_prev_comments", 1190 ) 1191 1192 # Autofilled 1193 SHOW_TRIE: t.Dict = {} 1194 SET_TRIE: t.Dict = {} 1195 1196 def __init__( 1197 self, 1198 error_level: t.Optional[ErrorLevel] = None, 1199 error_message_context: int = 100, 1200 max_errors: int = 3, 1201 dialect: DialectType = None, 1202 ): 1203 from sqlglot.dialects import Dialect 1204 1205 self.error_level = error_level or ErrorLevel.IMMEDIATE 1206 self.error_message_context = error_message_context 1207 self.max_errors = max_errors 1208 self.dialect = Dialect.get_or_raise(dialect) 1209 self.reset() 1210 1211 def reset(self): 1212 self.sql = "" 1213 self.errors = [] 1214 self._tokens = [] 1215 self._index = 0 1216 self._curr = None 1217 self._next = None 1218 self._prev = None 1219 self._prev_comments = None 1220 1221 def parse( 1222 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1223 ) -> t.List[t.Optional[exp.Expression]]: 1224 """ 1225 Parses a list of tokens and returns a list of syntax trees, one tree 1226 per parsed SQL statement. 1227 1228 Args: 1229 raw_tokens: The list of tokens. 1230 sql: The original SQL string, used to produce helpful debug messages. 1231 1232 Returns: 1233 The list of the produced syntax trees. 1234 """ 1235 return self._parse( 1236 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1237 ) 1238 1239 def parse_into( 1240 self, 1241 expression_types: exp.IntoType, 1242 raw_tokens: t.List[Token], 1243 sql: t.Optional[str] = None, 1244 ) -> t.List[t.Optional[exp.Expression]]: 1245 """ 1246 Parses a list of tokens into a given Expression type. If a collection of Expression 1247 types is given instead, this method will try to parse the token list into each one 1248 of them, stopping at the first for which the parsing succeeds. 1249 1250 Args: 1251 expression_types: The expression type(s) to try and parse the token list into. 1252 raw_tokens: The list of tokens. 1253 sql: The original SQL string, used to produce helpful debug messages. 1254 1255 Returns: 1256 The target Expression. 1257 """ 1258 errors = [] 1259 for expression_type in ensure_list(expression_types): 1260 parser = self.EXPRESSION_PARSERS.get(expression_type) 1261 if not parser: 1262 raise TypeError(f"No parser registered for {expression_type}") 1263 1264 try: 1265 return self._parse(parser, raw_tokens, sql) 1266 except ParseError as e: 1267 e.errors[0]["into_expression"] = expression_type 1268 errors.append(e) 1269 1270 raise ParseError( 1271 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1272 errors=merge_errors(errors), 1273 ) from errors[-1] 1274 1275 def _parse( 1276 self, 1277 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1278 raw_tokens: t.List[Token], 1279 sql: t.Optional[str] = None, 1280 ) -> t.List[t.Optional[exp.Expression]]: 1281 self.reset() 1282 self.sql = sql or "" 1283 1284 total = len(raw_tokens) 1285 chunks: t.List[t.List[Token]] = [[]] 1286 1287 for i, token in enumerate(raw_tokens): 1288 if token.token_type == TokenType.SEMICOLON: 1289 if token.comments: 1290 chunks.append([token]) 1291 1292 if i < total - 1: 1293 chunks.append([]) 1294 else: 1295 chunks[-1].append(token) 1296 1297 expressions = [] 1298 1299 for tokens in chunks: 1300 self._index = -1 1301 self._tokens = tokens 1302 self._advance() 1303 1304 expressions.append(parse_method(self)) 1305 1306 if self._index < len(self._tokens): 1307 self.raise_error("Invalid expression / Unexpected token") 1308 1309 self.check_errors() 1310 1311 return expressions 1312 1313 def check_errors(self) -> None: 1314 """Logs or raises any found errors, depending on the chosen error level setting.""" 1315 if self.error_level == ErrorLevel.WARN: 1316 for error in self.errors: 1317 logger.error(str(error)) 1318 elif self.error_level == ErrorLevel.RAISE and self.errors: 1319 raise ParseError( 1320 concat_messages(self.errors, self.max_errors), 1321 errors=merge_errors(self.errors), 1322 ) 1323 1324 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1325 """ 1326 Appends an error in the list of recorded errors or raises it, depending on the chosen 1327 error level setting. 1328 """ 1329 token = token or self._curr or self._prev or Token.string("") 1330 start = token.start 1331 end = token.end + 1 1332 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1333 highlight = self.sql[start:end] 1334 end_context = self.sql[end : end + self.error_message_context] 1335 1336 error = ParseError.new( 1337 f"{message}. Line {token.line}, Col: {token.col}.\n" 1338 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1339 description=message, 1340 line=token.line, 1341 col=token.col, 1342 start_context=start_context, 1343 highlight=highlight, 1344 end_context=end_context, 1345 ) 1346 1347 if self.error_level == ErrorLevel.IMMEDIATE: 1348 raise error 1349 1350 self.errors.append(error) 1351 1352 def expression( 1353 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1354 ) -> E: 1355 """ 1356 Creates a new, validated Expression. 1357 1358 Args: 1359 exp_class: The expression class to instantiate. 1360 comments: An optional list of comments to attach to the expression. 1361 kwargs: The arguments to set for the expression along with their respective values. 1362 1363 Returns: 1364 The target expression. 1365 """ 1366 instance = exp_class(**kwargs) 1367 instance.add_comments(comments) if comments else self._add_comments(instance) 1368 return self.validate_expression(instance) 1369 1370 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1371 if expression and self._prev_comments: 1372 expression.add_comments(self._prev_comments) 1373 self._prev_comments = None 1374 1375 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1376 """ 1377 Validates an Expression, making sure that all its mandatory arguments are set. 1378 1379 Args: 1380 expression: The expression to validate. 1381 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1382 1383 Returns: 1384 The validated expression. 1385 """ 1386 if self.error_level != ErrorLevel.IGNORE: 1387 for error_message in expression.error_messages(args): 1388 self.raise_error(error_message) 1389 1390 return expression 1391 1392 def _find_sql(self, start: Token, end: Token) -> str: 1393 return self.sql[start.start : end.end + 1] 1394 1395 def _is_connected(self) -> bool: 1396 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1397 1398 def _advance(self, times: int = 1) -> None: 1399 self._index += times 1400 self._curr = seq_get(self._tokens, self._index) 1401 self._next = seq_get(self._tokens, self._index + 1) 1402 1403 if self._index > 0: 1404 self._prev = self._tokens[self._index - 1] 1405 self._prev_comments = self._prev.comments 1406 else: 1407 self._prev = None 1408 self._prev_comments = None 1409 1410 def _retreat(self, index: int) -> None: 1411 if index != self._index: 1412 self._advance(index - self._index) 1413 1414 def _warn_unsupported(self) -> None: 1415 if len(self._tokens) <= 1: 1416 return 1417 1418 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1419 # interested in emitting a warning for the one being currently processed. 1420 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1421 1422 logger.warning( 1423 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1424 ) 1425 1426 def _parse_command(self) -> exp.Command: 1427 self._warn_unsupported() 1428 return self.expression( 1429 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1430 ) 1431 1432 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1433 """ 1434 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1435 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1436 the parser state accordingly 1437 """ 1438 index = self._index 1439 error_level = self.error_level 1440 1441 self.error_level = ErrorLevel.IMMEDIATE 1442 try: 1443 this = parse_method() 1444 except ParseError: 1445 this = None 1446 finally: 1447 if not this or retreat: 1448 self._retreat(index) 1449 self.error_level = error_level 1450 1451 return this 1452 1453 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1454 start = self._prev 1455 exists = self._parse_exists() if allow_exists else None 1456 1457 self._match(TokenType.ON) 1458 1459 materialized = self._match_text_seq("MATERIALIZED") 1460 kind = self._match_set(self.CREATABLES) and self._prev 1461 if not kind: 1462 return self._parse_as_command(start) 1463 1464 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1465 this = self._parse_user_defined_function(kind=kind.token_type) 1466 elif kind.token_type == TokenType.TABLE: 1467 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1468 elif kind.token_type == TokenType.COLUMN: 1469 this = self._parse_column() 1470 else: 1471 this = self._parse_id_var() 1472 1473 self._match(TokenType.IS) 1474 1475 return self.expression( 1476 exp.Comment, 1477 this=this, 1478 kind=kind.text, 1479 expression=self._parse_string(), 1480 exists=exists, 1481 materialized=materialized, 1482 ) 1483 1484 def _parse_to_table( 1485 self, 1486 ) -> exp.ToTableProperty: 1487 table = self._parse_table_parts(schema=True) 1488 return self.expression(exp.ToTableProperty, this=table) 1489 1490 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1491 def _parse_ttl(self) -> exp.Expression: 1492 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1493 this = self._parse_bitwise() 1494 1495 if self._match_text_seq("DELETE"): 1496 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1497 if self._match_text_seq("RECOMPRESS"): 1498 return self.expression( 1499 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1500 ) 1501 if self._match_text_seq("TO", "DISK"): 1502 return self.expression( 1503 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1504 ) 1505 if self._match_text_seq("TO", "VOLUME"): 1506 return self.expression( 1507 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1508 ) 1509 1510 return this 1511 1512 expressions = self._parse_csv(_parse_ttl_action) 1513 where = self._parse_where() 1514 group = self._parse_group() 1515 1516 aggregates = None 1517 if group and self._match(TokenType.SET): 1518 aggregates = self._parse_csv(self._parse_set_item) 1519 1520 return self.expression( 1521 exp.MergeTreeTTL, 1522 expressions=expressions, 1523 where=where, 1524 group=group, 1525 aggregates=aggregates, 1526 ) 1527 1528 def _parse_statement(self) -> t.Optional[exp.Expression]: 1529 if self._curr is None: 1530 return None 1531 1532 if self._match_set(self.STATEMENT_PARSERS): 1533 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1534 1535 if self._match_set(self.dialect.tokenizer.COMMANDS): 1536 return self._parse_command() 1537 1538 expression = self._parse_expression() 1539 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1540 return self._parse_query_modifiers(expression) 1541 1542 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1543 start = self._prev 1544 temporary = self._match(TokenType.TEMPORARY) 1545 materialized = self._match_text_seq("MATERIALIZED") 1546 1547 kind = self._match_set(self.CREATABLES) and self._prev.text 1548 if not kind: 1549 return self._parse_as_command(start) 1550 1551 if_exists = exists or self._parse_exists() 1552 table = self._parse_table_parts( 1553 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1554 ) 1555 1556 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1557 1558 if self._match(TokenType.L_PAREN, advance=False): 1559 expressions = self._parse_wrapped_csv(self._parse_types) 1560 else: 1561 expressions = None 1562 1563 return self.expression( 1564 exp.Drop, 1565 comments=start.comments, 1566 exists=if_exists, 1567 this=table, 1568 expressions=expressions, 1569 kind=kind.upper(), 1570 temporary=temporary, 1571 materialized=materialized, 1572 cascade=self._match_text_seq("CASCADE"), 1573 constraints=self._match_text_seq("CONSTRAINTS"), 1574 purge=self._match_text_seq("PURGE"), 1575 cluster=cluster, 1576 ) 1577 1578 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1579 return ( 1580 self._match_text_seq("IF") 1581 and (not not_ or self._match(TokenType.NOT)) 1582 and self._match(TokenType.EXISTS) 1583 ) 1584 1585 def _parse_create(self) -> exp.Create | exp.Command: 1586 # Note: this can't be None because we've matched a statement parser 1587 start = self._prev 1588 comments = self._prev_comments 1589 1590 replace = ( 1591 start.token_type == TokenType.REPLACE 1592 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1593 or self._match_pair(TokenType.OR, TokenType.ALTER) 1594 ) 1595 1596 unique = self._match(TokenType.UNIQUE) 1597 1598 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1599 self._advance() 1600 1601 properties = None 1602 create_token = self._match_set(self.CREATABLES) and self._prev 1603 1604 if not create_token: 1605 # exp.Properties.Location.POST_CREATE 1606 properties = self._parse_properties() 1607 create_token = self._match_set(self.CREATABLES) and self._prev 1608 1609 if not properties or not create_token: 1610 return self._parse_as_command(start) 1611 1612 exists = self._parse_exists(not_=True) 1613 this = None 1614 expression: t.Optional[exp.Expression] = None 1615 indexes = None 1616 no_schema_binding = None 1617 begin = None 1618 end = None 1619 clone = None 1620 1621 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1622 nonlocal properties 1623 if properties and temp_props: 1624 properties.expressions.extend(temp_props.expressions) 1625 elif temp_props: 1626 properties = temp_props 1627 1628 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1629 this = self._parse_user_defined_function(kind=create_token.token_type) 1630 1631 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1632 extend_props(self._parse_properties()) 1633 1634 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1635 1636 if not expression: 1637 if self._match(TokenType.COMMAND): 1638 expression = self._parse_as_command(self._prev) 1639 else: 1640 begin = self._match(TokenType.BEGIN) 1641 return_ = self._match_text_seq("RETURN") 1642 1643 if self._match(TokenType.STRING, advance=False): 1644 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1645 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1646 expression = self._parse_string() 1647 extend_props(self._parse_properties()) 1648 else: 1649 expression = self._parse_statement() 1650 1651 end = self._match_text_seq("END") 1652 1653 if return_: 1654 expression = self.expression(exp.Return, this=expression) 1655 elif create_token.token_type == TokenType.INDEX: 1656 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1657 if not self._match(TokenType.ON): 1658 index = self._parse_id_var() 1659 anonymous = False 1660 else: 1661 index = None 1662 anonymous = True 1663 1664 this = self._parse_index(index=index, anonymous=anonymous) 1665 elif create_token.token_type in self.DB_CREATABLES: 1666 table_parts = self._parse_table_parts( 1667 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1668 ) 1669 1670 # exp.Properties.Location.POST_NAME 1671 self._match(TokenType.COMMA) 1672 extend_props(self._parse_properties(before=True)) 1673 1674 this = self._parse_schema(this=table_parts) 1675 1676 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1677 extend_props(self._parse_properties()) 1678 1679 self._match(TokenType.ALIAS) 1680 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1681 # exp.Properties.Location.POST_ALIAS 1682 extend_props(self._parse_properties()) 1683 1684 if create_token.token_type == TokenType.SEQUENCE: 1685 expression = self._parse_types() 1686 extend_props(self._parse_properties()) 1687 else: 1688 expression = self._parse_ddl_select() 1689 1690 if create_token.token_type == TokenType.TABLE: 1691 # exp.Properties.Location.POST_EXPRESSION 1692 extend_props(self._parse_properties()) 1693 1694 indexes = [] 1695 while True: 1696 index = self._parse_index() 1697 1698 # exp.Properties.Location.POST_INDEX 1699 extend_props(self._parse_properties()) 1700 1701 if not index: 1702 break 1703 else: 1704 self._match(TokenType.COMMA) 1705 indexes.append(index) 1706 elif create_token.token_type == TokenType.VIEW: 1707 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1708 no_schema_binding = True 1709 1710 shallow = self._match_text_seq("SHALLOW") 1711 1712 if self._match_texts(self.CLONE_KEYWORDS): 1713 copy = self._prev.text.lower() == "copy" 1714 clone = self.expression( 1715 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1716 ) 1717 1718 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1719 return self._parse_as_command(start) 1720 1721 return self.expression( 1722 exp.Create, 1723 comments=comments, 1724 this=this, 1725 kind=create_token.text.upper(), 1726 replace=replace, 1727 unique=unique, 1728 expression=expression, 1729 exists=exists, 1730 properties=properties, 1731 indexes=indexes, 1732 no_schema_binding=no_schema_binding, 1733 begin=begin, 1734 end=end, 1735 clone=clone, 1736 ) 1737 1738 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1739 seq = exp.SequenceProperties() 1740 1741 options = [] 1742 index = self._index 1743 1744 while self._curr: 1745 self._match(TokenType.COMMA) 1746 if self._match_text_seq("INCREMENT"): 1747 self._match_text_seq("BY") 1748 self._match_text_seq("=") 1749 seq.set("increment", self._parse_term()) 1750 elif self._match_text_seq("MINVALUE"): 1751 seq.set("minvalue", self._parse_term()) 1752 elif self._match_text_seq("MAXVALUE"): 1753 seq.set("maxvalue", self._parse_term()) 1754 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1755 self._match_text_seq("=") 1756 seq.set("start", self._parse_term()) 1757 elif self._match_text_seq("CACHE"): 1758 # T-SQL allows empty CACHE which is initialized dynamically 1759 seq.set("cache", self._parse_number() or True) 1760 elif self._match_text_seq("OWNED", "BY"): 1761 # "OWNED BY NONE" is the default 1762 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1763 else: 1764 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1765 if opt: 1766 options.append(opt) 1767 else: 1768 break 1769 1770 seq.set("options", options if options else None) 1771 return None if self._index == index else seq 1772 1773 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1774 # only used for teradata currently 1775 self._match(TokenType.COMMA) 1776 1777 kwargs = { 1778 "no": self._match_text_seq("NO"), 1779 "dual": self._match_text_seq("DUAL"), 1780 "before": self._match_text_seq("BEFORE"), 1781 "default": self._match_text_seq("DEFAULT"), 1782 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1783 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1784 "after": self._match_text_seq("AFTER"), 1785 "minimum": self._match_texts(("MIN", "MINIMUM")), 1786 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1787 } 1788 1789 if self._match_texts(self.PROPERTY_PARSERS): 1790 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1791 try: 1792 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1793 except TypeError: 1794 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1795 1796 return None 1797 1798 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1799 return self._parse_wrapped_csv(self._parse_property) 1800 1801 def _parse_property(self) -> t.Optional[exp.Expression]: 1802 if self._match_texts(self.PROPERTY_PARSERS): 1803 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1804 1805 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1806 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1807 1808 if self._match_text_seq("COMPOUND", "SORTKEY"): 1809 return self._parse_sortkey(compound=True) 1810 1811 if self._match_text_seq("SQL", "SECURITY"): 1812 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1813 1814 index = self._index 1815 key = self._parse_column() 1816 1817 if not self._match(TokenType.EQ): 1818 self._retreat(index) 1819 return self._parse_sequence_properties() 1820 1821 return self.expression( 1822 exp.Property, 1823 this=key.to_dot() if isinstance(key, exp.Column) else key, 1824 value=self._parse_bitwise() or self._parse_var(any_token=True), 1825 ) 1826 1827 def _parse_stored(self) -> exp.FileFormatProperty: 1828 self._match(TokenType.ALIAS) 1829 1830 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1831 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1832 1833 return self.expression( 1834 exp.FileFormatProperty, 1835 this=( 1836 self.expression( 1837 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1838 ) 1839 if input_format or output_format 1840 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1841 ), 1842 ) 1843 1844 def _parse_unquoted_field(self): 1845 field = self._parse_field() 1846 if isinstance(field, exp.Identifier) and not field.quoted: 1847 field = exp.var(field) 1848 1849 return field 1850 1851 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1852 self._match(TokenType.EQ) 1853 self._match(TokenType.ALIAS) 1854 1855 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1856 1857 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1858 properties = [] 1859 while True: 1860 if before: 1861 prop = self._parse_property_before() 1862 else: 1863 prop = self._parse_property() 1864 if not prop: 1865 break 1866 for p in ensure_list(prop): 1867 properties.append(p) 1868 1869 if properties: 1870 return self.expression(exp.Properties, expressions=properties) 1871 1872 return None 1873 1874 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1875 return self.expression( 1876 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1877 ) 1878 1879 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1880 if self._index >= 2: 1881 pre_volatile_token = self._tokens[self._index - 2] 1882 else: 1883 pre_volatile_token = None 1884 1885 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1886 return exp.VolatileProperty() 1887 1888 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1889 1890 def _parse_retention_period(self) -> exp.Var: 1891 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1892 number = self._parse_number() 1893 number_str = f"{number} " if number else "" 1894 unit = self._parse_var(any_token=True) 1895 return exp.var(f"{number_str}{unit}") 1896 1897 def _parse_system_versioning_property( 1898 self, with_: bool = False 1899 ) -> exp.WithSystemVersioningProperty: 1900 self._match(TokenType.EQ) 1901 prop = self.expression( 1902 exp.WithSystemVersioningProperty, 1903 **{ # type: ignore 1904 "on": True, 1905 "with": with_, 1906 }, 1907 ) 1908 1909 if self._match_text_seq("OFF"): 1910 prop.set("on", False) 1911 return prop 1912 1913 self._match(TokenType.ON) 1914 if self._match(TokenType.L_PAREN): 1915 while self._curr and not self._match(TokenType.R_PAREN): 1916 if self._match_text_seq("HISTORY_TABLE", "="): 1917 prop.set("this", self._parse_table_parts()) 1918 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1919 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1920 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1921 prop.set("retention_period", self._parse_retention_period()) 1922 1923 self._match(TokenType.COMMA) 1924 1925 return prop 1926 1927 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1928 self._match(TokenType.EQ) 1929 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1930 prop = self.expression(exp.DataDeletionProperty, on=on) 1931 1932 if self._match(TokenType.L_PAREN): 1933 while self._curr and not self._match(TokenType.R_PAREN): 1934 if self._match_text_seq("FILTER_COLUMN", "="): 1935 prop.set("filter_column", self._parse_column()) 1936 elif self._match_text_seq("RETENTION_PERIOD", "="): 1937 prop.set("retention_period", self._parse_retention_period()) 1938 1939 self._match(TokenType.COMMA) 1940 1941 return prop 1942 1943 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1944 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1945 prop = self._parse_system_versioning_property(with_=True) 1946 self._match_r_paren() 1947 return prop 1948 1949 if self._match(TokenType.L_PAREN, advance=False): 1950 return self._parse_wrapped_properties() 1951 1952 if self._match_text_seq("JOURNAL"): 1953 return self._parse_withjournaltable() 1954 1955 if self._match_texts(self.VIEW_ATTRIBUTES): 1956 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1957 1958 if self._match_text_seq("DATA"): 1959 return self._parse_withdata(no=False) 1960 elif self._match_text_seq("NO", "DATA"): 1961 return self._parse_withdata(no=True) 1962 1963 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1964 return self._parse_serde_properties(with_=True) 1965 1966 if not self._next: 1967 return None 1968 1969 return self._parse_withisolatedloading() 1970 1971 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1972 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1973 self._match(TokenType.EQ) 1974 1975 user = self._parse_id_var() 1976 self._match(TokenType.PARAMETER) 1977 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1978 1979 if not user or not host: 1980 return None 1981 1982 return exp.DefinerProperty(this=f"{user}@{host}") 1983 1984 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1985 self._match(TokenType.TABLE) 1986 self._match(TokenType.EQ) 1987 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1988 1989 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1990 return self.expression(exp.LogProperty, no=no) 1991 1992 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1993 return self.expression(exp.JournalProperty, **kwargs) 1994 1995 def _parse_checksum(self) -> exp.ChecksumProperty: 1996 self._match(TokenType.EQ) 1997 1998 on = None 1999 if self._match(TokenType.ON): 2000 on = True 2001 elif self._match_text_seq("OFF"): 2002 on = False 2003 2004 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2005 2006 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2007 return self.expression( 2008 exp.Cluster, 2009 expressions=( 2010 self._parse_wrapped_csv(self._parse_ordered) 2011 if wrapped 2012 else self._parse_csv(self._parse_ordered) 2013 ), 2014 ) 2015 2016 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2017 self._match_text_seq("BY") 2018 2019 self._match_l_paren() 2020 expressions = self._parse_csv(self._parse_column) 2021 self._match_r_paren() 2022 2023 if self._match_text_seq("SORTED", "BY"): 2024 self._match_l_paren() 2025 sorted_by = self._parse_csv(self._parse_ordered) 2026 self._match_r_paren() 2027 else: 2028 sorted_by = None 2029 2030 self._match(TokenType.INTO) 2031 buckets = self._parse_number() 2032 self._match_text_seq("BUCKETS") 2033 2034 return self.expression( 2035 exp.ClusteredByProperty, 2036 expressions=expressions, 2037 sorted_by=sorted_by, 2038 buckets=buckets, 2039 ) 2040 2041 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2042 if not self._match_text_seq("GRANTS"): 2043 self._retreat(self._index - 1) 2044 return None 2045 2046 return self.expression(exp.CopyGrantsProperty) 2047 2048 def _parse_freespace(self) -> exp.FreespaceProperty: 2049 self._match(TokenType.EQ) 2050 return self.expression( 2051 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2052 ) 2053 2054 def _parse_mergeblockratio( 2055 self, no: bool = False, default: bool = False 2056 ) -> exp.MergeBlockRatioProperty: 2057 if self._match(TokenType.EQ): 2058 return self.expression( 2059 exp.MergeBlockRatioProperty, 2060 this=self._parse_number(), 2061 percent=self._match(TokenType.PERCENT), 2062 ) 2063 2064 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2065 2066 def _parse_datablocksize( 2067 self, 2068 default: t.Optional[bool] = None, 2069 minimum: t.Optional[bool] = None, 2070 maximum: t.Optional[bool] = None, 2071 ) -> exp.DataBlocksizeProperty: 2072 self._match(TokenType.EQ) 2073 size = self._parse_number() 2074 2075 units = None 2076 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2077 units = self._prev.text 2078 2079 return self.expression( 2080 exp.DataBlocksizeProperty, 2081 size=size, 2082 units=units, 2083 default=default, 2084 minimum=minimum, 2085 maximum=maximum, 2086 ) 2087 2088 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2089 self._match(TokenType.EQ) 2090 always = self._match_text_seq("ALWAYS") 2091 manual = self._match_text_seq("MANUAL") 2092 never = self._match_text_seq("NEVER") 2093 default = self._match_text_seq("DEFAULT") 2094 2095 autotemp = None 2096 if self._match_text_seq("AUTOTEMP"): 2097 autotemp = self._parse_schema() 2098 2099 return self.expression( 2100 exp.BlockCompressionProperty, 2101 always=always, 2102 manual=manual, 2103 never=never, 2104 default=default, 2105 autotemp=autotemp, 2106 ) 2107 2108 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2109 index = self._index 2110 no = self._match_text_seq("NO") 2111 concurrent = self._match_text_seq("CONCURRENT") 2112 2113 if not self._match_text_seq("ISOLATED", "LOADING"): 2114 self._retreat(index) 2115 return None 2116 2117 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2118 return self.expression( 2119 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2120 ) 2121 2122 def _parse_locking(self) -> exp.LockingProperty: 2123 if self._match(TokenType.TABLE): 2124 kind = "TABLE" 2125 elif self._match(TokenType.VIEW): 2126 kind = "VIEW" 2127 elif self._match(TokenType.ROW): 2128 kind = "ROW" 2129 elif self._match_text_seq("DATABASE"): 2130 kind = "DATABASE" 2131 else: 2132 kind = None 2133 2134 if kind in ("DATABASE", "TABLE", "VIEW"): 2135 this = self._parse_table_parts() 2136 else: 2137 this = None 2138 2139 if self._match(TokenType.FOR): 2140 for_or_in = "FOR" 2141 elif self._match(TokenType.IN): 2142 for_or_in = "IN" 2143 else: 2144 for_or_in = None 2145 2146 if self._match_text_seq("ACCESS"): 2147 lock_type = "ACCESS" 2148 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2149 lock_type = "EXCLUSIVE" 2150 elif self._match_text_seq("SHARE"): 2151 lock_type = "SHARE" 2152 elif self._match_text_seq("READ"): 2153 lock_type = "READ" 2154 elif self._match_text_seq("WRITE"): 2155 lock_type = "WRITE" 2156 elif self._match_text_seq("CHECKSUM"): 2157 lock_type = "CHECKSUM" 2158 else: 2159 lock_type = None 2160 2161 override = self._match_text_seq("OVERRIDE") 2162 2163 return self.expression( 2164 exp.LockingProperty, 2165 this=this, 2166 kind=kind, 2167 for_or_in=for_or_in, 2168 lock_type=lock_type, 2169 override=override, 2170 ) 2171 2172 def _parse_partition_by(self) -> t.List[exp.Expression]: 2173 if self._match(TokenType.PARTITION_BY): 2174 return self._parse_csv(self._parse_conjunction) 2175 return [] 2176 2177 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2178 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2179 if self._match_text_seq("MINVALUE"): 2180 return exp.var("MINVALUE") 2181 if self._match_text_seq("MAXVALUE"): 2182 return exp.var("MAXVALUE") 2183 return self._parse_bitwise() 2184 2185 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2186 expression = None 2187 from_expressions = None 2188 to_expressions = None 2189 2190 if self._match(TokenType.IN): 2191 this = self._parse_wrapped_csv(self._parse_bitwise) 2192 elif self._match(TokenType.FROM): 2193 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2194 self._match_text_seq("TO") 2195 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2196 elif self._match_text_seq("WITH", "(", "MODULUS"): 2197 this = self._parse_number() 2198 self._match_text_seq(",", "REMAINDER") 2199 expression = self._parse_number() 2200 self._match_r_paren() 2201 else: 2202 self.raise_error("Failed to parse partition bound spec.") 2203 2204 return self.expression( 2205 exp.PartitionBoundSpec, 2206 this=this, 2207 expression=expression, 2208 from_expressions=from_expressions, 2209 to_expressions=to_expressions, 2210 ) 2211 2212 # https://www.postgresql.org/docs/current/sql-createtable.html 2213 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2214 if not self._match_text_seq("OF"): 2215 self._retreat(self._index - 1) 2216 return None 2217 2218 this = self._parse_table(schema=True) 2219 2220 if self._match(TokenType.DEFAULT): 2221 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2222 elif self._match_text_seq("FOR", "VALUES"): 2223 expression = self._parse_partition_bound_spec() 2224 else: 2225 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2226 2227 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2228 2229 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2230 self._match(TokenType.EQ) 2231 return self.expression( 2232 exp.PartitionedByProperty, 2233 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2234 ) 2235 2236 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2237 if self._match_text_seq("AND", "STATISTICS"): 2238 statistics = True 2239 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2240 statistics = False 2241 else: 2242 statistics = None 2243 2244 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2245 2246 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2247 if self._match_text_seq("SQL"): 2248 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2249 return None 2250 2251 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2252 if self._match_text_seq("SQL", "DATA"): 2253 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2254 return None 2255 2256 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2257 if self._match_text_seq("PRIMARY", "INDEX"): 2258 return exp.NoPrimaryIndexProperty() 2259 if self._match_text_seq("SQL"): 2260 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2261 return None 2262 2263 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2264 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2265 return exp.OnCommitProperty() 2266 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2267 return exp.OnCommitProperty(delete=True) 2268 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2269 2270 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2271 if self._match_text_seq("SQL", "DATA"): 2272 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2273 return None 2274 2275 def _parse_distkey(self) -> exp.DistKeyProperty: 2276 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2277 2278 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2279 table = self._parse_table(schema=True) 2280 2281 options = [] 2282 while self._match_texts(("INCLUDING", "EXCLUDING")): 2283 this = self._prev.text.upper() 2284 2285 id_var = self._parse_id_var() 2286 if not id_var: 2287 return None 2288 2289 options.append( 2290 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2291 ) 2292 2293 return self.expression(exp.LikeProperty, this=table, expressions=options) 2294 2295 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2296 return self.expression( 2297 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2298 ) 2299 2300 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2301 self._match(TokenType.EQ) 2302 return self.expression( 2303 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2304 ) 2305 2306 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2307 self._match_text_seq("WITH", "CONNECTION") 2308 return self.expression( 2309 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2310 ) 2311 2312 def _parse_returns(self) -> exp.ReturnsProperty: 2313 value: t.Optional[exp.Expression] 2314 null = None 2315 is_table = self._match(TokenType.TABLE) 2316 2317 if is_table: 2318 if self._match(TokenType.LT): 2319 value = self.expression( 2320 exp.Schema, 2321 this="TABLE", 2322 expressions=self._parse_csv(self._parse_struct_types), 2323 ) 2324 if not self._match(TokenType.GT): 2325 self.raise_error("Expecting >") 2326 else: 2327 value = self._parse_schema(exp.var("TABLE")) 2328 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2329 null = True 2330 value = None 2331 else: 2332 value = self._parse_types() 2333 2334 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2335 2336 def _parse_describe(self) -> exp.Describe: 2337 kind = self._match_set(self.CREATABLES) and self._prev.text 2338 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2339 if self._match(TokenType.DOT): 2340 style = None 2341 self._retreat(self._index - 2) 2342 this = self._parse_table(schema=True) 2343 properties = self._parse_properties() 2344 expressions = properties.expressions if properties else None 2345 return self.expression( 2346 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2347 ) 2348 2349 def _parse_insert(self) -> exp.Insert: 2350 comments = ensure_list(self._prev_comments) 2351 hint = self._parse_hint() 2352 overwrite = self._match(TokenType.OVERWRITE) 2353 ignore = self._match(TokenType.IGNORE) 2354 local = self._match_text_seq("LOCAL") 2355 alternative = None 2356 is_function = None 2357 2358 if self._match_text_seq("DIRECTORY"): 2359 this: t.Optional[exp.Expression] = self.expression( 2360 exp.Directory, 2361 this=self._parse_var_or_string(), 2362 local=local, 2363 row_format=self._parse_row_format(match_row=True), 2364 ) 2365 else: 2366 if self._match(TokenType.OR): 2367 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2368 2369 self._match(TokenType.INTO) 2370 comments += ensure_list(self._prev_comments) 2371 self._match(TokenType.TABLE) 2372 is_function = self._match(TokenType.FUNCTION) 2373 2374 this = ( 2375 self._parse_table(schema=True, parse_partition=True) 2376 if not is_function 2377 else self._parse_function() 2378 ) 2379 2380 returning = self._parse_returning() 2381 2382 return self.expression( 2383 exp.Insert, 2384 comments=comments, 2385 hint=hint, 2386 is_function=is_function, 2387 this=this, 2388 stored=self._match_text_seq("STORED") and self._parse_stored(), 2389 by_name=self._match_text_seq("BY", "NAME"), 2390 exists=self._parse_exists(), 2391 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2392 and self._parse_conjunction(), 2393 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2394 conflict=self._parse_on_conflict(), 2395 returning=returning or self._parse_returning(), 2396 overwrite=overwrite, 2397 alternative=alternative, 2398 ignore=ignore, 2399 ) 2400 2401 def _parse_kill(self) -> exp.Kill: 2402 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2403 2404 return self.expression( 2405 exp.Kill, 2406 this=self._parse_primary(), 2407 kind=kind, 2408 ) 2409 2410 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2411 conflict = self._match_text_seq("ON", "CONFLICT") 2412 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2413 2414 if not conflict and not duplicate: 2415 return None 2416 2417 conflict_keys = None 2418 constraint = None 2419 2420 if conflict: 2421 if self._match_text_seq("ON", "CONSTRAINT"): 2422 constraint = self._parse_id_var() 2423 elif self._match(TokenType.L_PAREN): 2424 conflict_keys = self._parse_csv(self._parse_id_var) 2425 self._match_r_paren() 2426 2427 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2428 if self._prev.token_type == TokenType.UPDATE: 2429 self._match(TokenType.SET) 2430 expressions = self._parse_csv(self._parse_equality) 2431 else: 2432 expressions = None 2433 2434 return self.expression( 2435 exp.OnConflict, 2436 duplicate=duplicate, 2437 expressions=expressions, 2438 action=action, 2439 conflict_keys=conflict_keys, 2440 constraint=constraint, 2441 ) 2442 2443 def _parse_returning(self) -> t.Optional[exp.Returning]: 2444 if not self._match(TokenType.RETURNING): 2445 return None 2446 return self.expression( 2447 exp.Returning, 2448 expressions=self._parse_csv(self._parse_expression), 2449 into=self._match(TokenType.INTO) and self._parse_table_part(), 2450 ) 2451 2452 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2453 if not self._match(TokenType.FORMAT): 2454 return None 2455 return self._parse_row_format() 2456 2457 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2458 index = self._index 2459 with_ = with_ or self._match_text_seq("WITH") 2460 2461 if not self._match(TokenType.SERDE_PROPERTIES): 2462 self._retreat(index) 2463 return None 2464 return self.expression( 2465 exp.SerdeProperties, 2466 **{ # type: ignore 2467 "expressions": self._parse_wrapped_properties(), 2468 "with": with_, 2469 }, 2470 ) 2471 2472 def _parse_row_format( 2473 self, match_row: bool = False 2474 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2475 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2476 return None 2477 2478 if self._match_text_seq("SERDE"): 2479 this = self._parse_string() 2480 2481 serde_properties = self._parse_serde_properties() 2482 2483 return self.expression( 2484 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2485 ) 2486 2487 self._match_text_seq("DELIMITED") 2488 2489 kwargs = {} 2490 2491 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2492 kwargs["fields"] = self._parse_string() 2493 if self._match_text_seq("ESCAPED", "BY"): 2494 kwargs["escaped"] = self._parse_string() 2495 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2496 kwargs["collection_items"] = self._parse_string() 2497 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2498 kwargs["map_keys"] = self._parse_string() 2499 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2500 kwargs["lines"] = self._parse_string() 2501 if self._match_text_seq("NULL", "DEFINED", "AS"): 2502 kwargs["null"] = self._parse_string() 2503 2504 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2505 2506 def _parse_load(self) -> exp.LoadData | exp.Command: 2507 if self._match_text_seq("DATA"): 2508 local = self._match_text_seq("LOCAL") 2509 self._match_text_seq("INPATH") 2510 inpath = self._parse_string() 2511 overwrite = self._match(TokenType.OVERWRITE) 2512 self._match_pair(TokenType.INTO, TokenType.TABLE) 2513 2514 return self.expression( 2515 exp.LoadData, 2516 this=self._parse_table(schema=True), 2517 local=local, 2518 overwrite=overwrite, 2519 inpath=inpath, 2520 partition=self._parse_partition(), 2521 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2522 serde=self._match_text_seq("SERDE") and self._parse_string(), 2523 ) 2524 return self._parse_as_command(self._prev) 2525 2526 def _parse_delete(self) -> exp.Delete: 2527 # This handles MySQL's "Multiple-Table Syntax" 2528 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2529 tables = None 2530 comments = self._prev_comments 2531 if not self._match(TokenType.FROM, advance=False): 2532 tables = self._parse_csv(self._parse_table) or None 2533 2534 returning = self._parse_returning() 2535 2536 return self.expression( 2537 exp.Delete, 2538 comments=comments, 2539 tables=tables, 2540 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2541 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2542 where=self._parse_where(), 2543 returning=returning or self._parse_returning(), 2544 limit=self._parse_limit(), 2545 ) 2546 2547 def _parse_update(self) -> exp.Update: 2548 comments = self._prev_comments 2549 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2550 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2551 returning = self._parse_returning() 2552 return self.expression( 2553 exp.Update, 2554 comments=comments, 2555 **{ # type: ignore 2556 "this": this, 2557 "expressions": expressions, 2558 "from": self._parse_from(joins=True), 2559 "where": self._parse_where(), 2560 "returning": returning or self._parse_returning(), 2561 "order": self._parse_order(), 2562 "limit": self._parse_limit(), 2563 }, 2564 ) 2565 2566 def _parse_uncache(self) -> exp.Uncache: 2567 if not self._match(TokenType.TABLE): 2568 self.raise_error("Expecting TABLE after UNCACHE") 2569 2570 return self.expression( 2571 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2572 ) 2573 2574 def _parse_cache(self) -> exp.Cache: 2575 lazy = self._match_text_seq("LAZY") 2576 self._match(TokenType.TABLE) 2577 table = self._parse_table(schema=True) 2578 2579 options = [] 2580 if self._match_text_seq("OPTIONS"): 2581 self._match_l_paren() 2582 k = self._parse_string() 2583 self._match(TokenType.EQ) 2584 v = self._parse_string() 2585 options = [k, v] 2586 self._match_r_paren() 2587 2588 self._match(TokenType.ALIAS) 2589 return self.expression( 2590 exp.Cache, 2591 this=table, 2592 lazy=lazy, 2593 options=options, 2594 expression=self._parse_select(nested=True), 2595 ) 2596 2597 def _parse_partition(self) -> t.Optional[exp.Partition]: 2598 if not self._match(TokenType.PARTITION): 2599 return None 2600 2601 return self.expression( 2602 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2603 ) 2604 2605 def _parse_value(self) -> t.Optional[exp.Tuple]: 2606 if self._match(TokenType.L_PAREN): 2607 expressions = self._parse_csv(self._parse_expression) 2608 self._match_r_paren() 2609 return self.expression(exp.Tuple, expressions=expressions) 2610 2611 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2612 expression = self._parse_expression() 2613 if expression: 2614 return self.expression(exp.Tuple, expressions=[expression]) 2615 return None 2616 2617 def _parse_projections(self) -> t.List[exp.Expression]: 2618 return self._parse_expressions() 2619 2620 def _parse_select( 2621 self, 2622 nested: bool = False, 2623 table: bool = False, 2624 parse_subquery_alias: bool = True, 2625 parse_set_operation: bool = True, 2626 ) -> t.Optional[exp.Expression]: 2627 cte = self._parse_with() 2628 2629 if cte: 2630 this = self._parse_statement() 2631 2632 if not this: 2633 self.raise_error("Failed to parse any statement following CTE") 2634 return cte 2635 2636 if "with" in this.arg_types: 2637 this.set("with", cte) 2638 else: 2639 self.raise_error(f"{this.key} does not support CTE") 2640 this = cte 2641 2642 return this 2643 2644 # duckdb supports leading with FROM x 2645 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2646 2647 if self._match(TokenType.SELECT): 2648 comments = self._prev_comments 2649 2650 hint = self._parse_hint() 2651 all_ = self._match(TokenType.ALL) 2652 distinct = self._match_set(self.DISTINCT_TOKENS) 2653 2654 kind = ( 2655 self._match(TokenType.ALIAS) 2656 and self._match_texts(("STRUCT", "VALUE")) 2657 and self._prev.text.upper() 2658 ) 2659 2660 if distinct: 2661 distinct = self.expression( 2662 exp.Distinct, 2663 on=self._parse_value() if self._match(TokenType.ON) else None, 2664 ) 2665 2666 if all_ and distinct: 2667 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2668 2669 limit = self._parse_limit(top=True) 2670 projections = self._parse_projections() 2671 2672 this = self.expression( 2673 exp.Select, 2674 kind=kind, 2675 hint=hint, 2676 distinct=distinct, 2677 expressions=projections, 2678 limit=limit, 2679 ) 2680 this.comments = comments 2681 2682 into = self._parse_into() 2683 if into: 2684 this.set("into", into) 2685 2686 if not from_: 2687 from_ = self._parse_from() 2688 2689 if from_: 2690 this.set("from", from_) 2691 2692 this = self._parse_query_modifiers(this) 2693 elif (table or nested) and self._match(TokenType.L_PAREN): 2694 if self._match(TokenType.PIVOT): 2695 this = self._parse_simplified_pivot() 2696 elif self._match(TokenType.FROM): 2697 this = exp.select("*").from_( 2698 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2699 ) 2700 else: 2701 this = ( 2702 self._parse_table() 2703 if table 2704 else self._parse_select(nested=True, parse_set_operation=False) 2705 ) 2706 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2707 2708 self._match_r_paren() 2709 2710 # We return early here so that the UNION isn't attached to the subquery by the 2711 # following call to _parse_set_operations, but instead becomes the parent node 2712 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2713 elif self._match(TokenType.VALUES, advance=False): 2714 this = self._parse_derived_table_values() 2715 elif from_: 2716 this = exp.select("*").from_(from_.this, copy=False) 2717 else: 2718 this = None 2719 2720 if parse_set_operation: 2721 return self._parse_set_operations(this) 2722 return this 2723 2724 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2725 if not skip_with_token and not self._match(TokenType.WITH): 2726 return None 2727 2728 comments = self._prev_comments 2729 recursive = self._match(TokenType.RECURSIVE) 2730 2731 expressions = [] 2732 while True: 2733 expressions.append(self._parse_cte()) 2734 2735 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2736 break 2737 else: 2738 self._match(TokenType.WITH) 2739 2740 return self.expression( 2741 exp.With, comments=comments, expressions=expressions, recursive=recursive 2742 ) 2743 2744 def _parse_cte(self) -> exp.CTE: 2745 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2746 if not alias or not alias.this: 2747 self.raise_error("Expected CTE to have alias") 2748 2749 self._match(TokenType.ALIAS) 2750 2751 if self._match_text_seq("NOT", "MATERIALIZED"): 2752 materialized = False 2753 elif self._match_text_seq("MATERIALIZED"): 2754 materialized = True 2755 else: 2756 materialized = None 2757 2758 return self.expression( 2759 exp.CTE, 2760 this=self._parse_wrapped(self._parse_statement), 2761 alias=alias, 2762 materialized=materialized, 2763 ) 2764 2765 def _parse_table_alias( 2766 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2767 ) -> t.Optional[exp.TableAlias]: 2768 any_token = self._match(TokenType.ALIAS) 2769 alias = ( 2770 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2771 or self._parse_string_as_identifier() 2772 ) 2773 2774 index = self._index 2775 if self._match(TokenType.L_PAREN): 2776 columns = self._parse_csv(self._parse_function_parameter) 2777 self._match_r_paren() if columns else self._retreat(index) 2778 else: 2779 columns = None 2780 2781 if not alias and not columns: 2782 return None 2783 2784 return self.expression(exp.TableAlias, this=alias, columns=columns) 2785 2786 def _parse_subquery( 2787 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2788 ) -> t.Optional[exp.Subquery]: 2789 if not this: 2790 return None 2791 2792 return self.expression( 2793 exp.Subquery, 2794 this=this, 2795 pivots=self._parse_pivots(), 2796 alias=self._parse_table_alias() if parse_alias else None, 2797 ) 2798 2799 def _implicit_unnests_to_explicit(self, this: E) -> E: 2800 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2801 2802 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2803 for i, join in enumerate(this.args.get("joins") or []): 2804 table = join.this 2805 normalized_table = table.copy() 2806 normalized_table.meta["maybe_column"] = True 2807 normalized_table = _norm(normalized_table, dialect=self.dialect) 2808 2809 if isinstance(table, exp.Table) and not join.args.get("on"): 2810 if normalized_table.parts[0].name in refs: 2811 table_as_column = table.to_column() 2812 unnest = exp.Unnest(expressions=[table_as_column]) 2813 2814 # Table.to_column creates a parent Alias node that we want to convert to 2815 # a TableAlias and attach to the Unnest, so it matches the parser's output 2816 if isinstance(table.args.get("alias"), exp.TableAlias): 2817 table_as_column.replace(table_as_column.this) 2818 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2819 2820 table.replace(unnest) 2821 2822 refs.add(normalized_table.alias_or_name) 2823 2824 return this 2825 2826 def _parse_query_modifiers( 2827 self, this: t.Optional[exp.Expression] 2828 ) -> t.Optional[exp.Expression]: 2829 if isinstance(this, (exp.Query, exp.Table)): 2830 for join in self._parse_joins(): 2831 this.append("joins", join) 2832 for lateral in iter(self._parse_lateral, None): 2833 this.append("laterals", lateral) 2834 2835 while True: 2836 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2837 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2838 key, expression = parser(self) 2839 2840 if expression: 2841 this.set(key, expression) 2842 if key == "limit": 2843 offset = expression.args.pop("offset", None) 2844 2845 if offset: 2846 offset = exp.Offset(expression=offset) 2847 this.set("offset", offset) 2848 2849 limit_by_expressions = expression.expressions 2850 expression.set("expressions", None) 2851 offset.set("expressions", limit_by_expressions) 2852 continue 2853 break 2854 2855 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2856 this = self._implicit_unnests_to_explicit(this) 2857 2858 return this 2859 2860 def _parse_hint(self) -> t.Optional[exp.Hint]: 2861 if self._match(TokenType.HINT): 2862 hints = [] 2863 for hint in iter( 2864 lambda: self._parse_csv( 2865 lambda: self._parse_function() or self._parse_var(upper=True) 2866 ), 2867 [], 2868 ): 2869 hints.extend(hint) 2870 2871 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2872 self.raise_error("Expected */ after HINT") 2873 2874 return self.expression(exp.Hint, expressions=hints) 2875 2876 return None 2877 2878 def _parse_into(self) -> t.Optional[exp.Into]: 2879 if not self._match(TokenType.INTO): 2880 return None 2881 2882 temp = self._match(TokenType.TEMPORARY) 2883 unlogged = self._match_text_seq("UNLOGGED") 2884 self._match(TokenType.TABLE) 2885 2886 return self.expression( 2887 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2888 ) 2889 2890 def _parse_from( 2891 self, joins: bool = False, skip_from_token: bool = False 2892 ) -> t.Optional[exp.From]: 2893 if not skip_from_token and not self._match(TokenType.FROM): 2894 return None 2895 2896 return self.expression( 2897 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2898 ) 2899 2900 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2901 return self.expression( 2902 exp.MatchRecognizeMeasure, 2903 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2904 this=self._parse_expression(), 2905 ) 2906 2907 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2908 if not self._match(TokenType.MATCH_RECOGNIZE): 2909 return None 2910 2911 self._match_l_paren() 2912 2913 partition = self._parse_partition_by() 2914 order = self._parse_order() 2915 2916 measures = ( 2917 self._parse_csv(self._parse_match_recognize_measure) 2918 if self._match_text_seq("MEASURES") 2919 else None 2920 ) 2921 2922 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2923 rows = exp.var("ONE ROW PER MATCH") 2924 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2925 text = "ALL ROWS PER MATCH" 2926 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2927 text += " SHOW EMPTY MATCHES" 2928 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2929 text += " OMIT EMPTY MATCHES" 2930 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2931 text += " WITH UNMATCHED ROWS" 2932 rows = exp.var(text) 2933 else: 2934 rows = None 2935 2936 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2937 text = "AFTER MATCH SKIP" 2938 if self._match_text_seq("PAST", "LAST", "ROW"): 2939 text += " PAST LAST ROW" 2940 elif self._match_text_seq("TO", "NEXT", "ROW"): 2941 text += " TO NEXT ROW" 2942 elif self._match_text_seq("TO", "FIRST"): 2943 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2944 elif self._match_text_seq("TO", "LAST"): 2945 text += f" TO LAST {self._advance_any().text}" # type: ignore 2946 after = exp.var(text) 2947 else: 2948 after = None 2949 2950 if self._match_text_seq("PATTERN"): 2951 self._match_l_paren() 2952 2953 if not self._curr: 2954 self.raise_error("Expecting )", self._curr) 2955 2956 paren = 1 2957 start = self._curr 2958 2959 while self._curr and paren > 0: 2960 if self._curr.token_type == TokenType.L_PAREN: 2961 paren += 1 2962 if self._curr.token_type == TokenType.R_PAREN: 2963 paren -= 1 2964 2965 end = self._prev 2966 self._advance() 2967 2968 if paren > 0: 2969 self.raise_error("Expecting )", self._curr) 2970 2971 pattern = exp.var(self._find_sql(start, end)) 2972 else: 2973 pattern = None 2974 2975 define = ( 2976 self._parse_csv(self._parse_name_as_expression) 2977 if self._match_text_seq("DEFINE") 2978 else None 2979 ) 2980 2981 self._match_r_paren() 2982 2983 return self.expression( 2984 exp.MatchRecognize, 2985 partition_by=partition, 2986 order=order, 2987 measures=measures, 2988 rows=rows, 2989 after=after, 2990 pattern=pattern, 2991 define=define, 2992 alias=self._parse_table_alias(), 2993 ) 2994 2995 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2996 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2997 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2998 cross_apply = False 2999 3000 if cross_apply is not None: 3001 this = self._parse_select(table=True) 3002 view = None 3003 outer = None 3004 elif self._match(TokenType.LATERAL): 3005 this = self._parse_select(table=True) 3006 view = self._match(TokenType.VIEW) 3007 outer = self._match(TokenType.OUTER) 3008 else: 3009 return None 3010 3011 if not this: 3012 this = ( 3013 self._parse_unnest() 3014 or self._parse_function() 3015 or self._parse_id_var(any_token=False) 3016 ) 3017 3018 while self._match(TokenType.DOT): 3019 this = exp.Dot( 3020 this=this, 3021 expression=self._parse_function() or self._parse_id_var(any_token=False), 3022 ) 3023 3024 if view: 3025 table = self._parse_id_var(any_token=False) 3026 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3027 table_alias: t.Optional[exp.TableAlias] = self.expression( 3028 exp.TableAlias, this=table, columns=columns 3029 ) 3030 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3031 # We move the alias from the lateral's child node to the lateral itself 3032 table_alias = this.args["alias"].pop() 3033 else: 3034 table_alias = self._parse_table_alias() 3035 3036 return self.expression( 3037 exp.Lateral, 3038 this=this, 3039 view=view, 3040 outer=outer, 3041 alias=table_alias, 3042 cross_apply=cross_apply, 3043 ) 3044 3045 def _parse_join_parts( 3046 self, 3047 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3048 return ( 3049 self._match_set(self.JOIN_METHODS) and self._prev, 3050 self._match_set(self.JOIN_SIDES) and self._prev, 3051 self._match_set(self.JOIN_KINDS) and self._prev, 3052 ) 3053 3054 def _parse_join( 3055 self, skip_join_token: bool = False, parse_bracket: bool = False 3056 ) -> t.Optional[exp.Join]: 3057 if self._match(TokenType.COMMA): 3058 return self.expression(exp.Join, this=self._parse_table()) 3059 3060 index = self._index 3061 method, side, kind = self._parse_join_parts() 3062 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3063 join = self._match(TokenType.JOIN) 3064 3065 if not skip_join_token and not join: 3066 self._retreat(index) 3067 kind = None 3068 method = None 3069 side = None 3070 3071 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3072 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3073 3074 if not skip_join_token and not join and not outer_apply and not cross_apply: 3075 return None 3076 3077 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3078 3079 if method: 3080 kwargs["method"] = method.text 3081 if side: 3082 kwargs["side"] = side.text 3083 if kind: 3084 kwargs["kind"] = kind.text 3085 if hint: 3086 kwargs["hint"] = hint 3087 3088 if self._match(TokenType.MATCH_CONDITION): 3089 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3090 3091 if self._match(TokenType.ON): 3092 kwargs["on"] = self._parse_conjunction() 3093 elif self._match(TokenType.USING): 3094 kwargs["using"] = self._parse_wrapped_id_vars() 3095 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3096 kind and kind.token_type == TokenType.CROSS 3097 ): 3098 index = self._index 3099 joins: t.Optional[list] = list(self._parse_joins()) 3100 3101 if joins and self._match(TokenType.ON): 3102 kwargs["on"] = self._parse_conjunction() 3103 elif joins and self._match(TokenType.USING): 3104 kwargs["using"] = self._parse_wrapped_id_vars() 3105 else: 3106 joins = None 3107 self._retreat(index) 3108 3109 kwargs["this"].set("joins", joins if joins else None) 3110 3111 comments = [c for token in (method, side, kind) if token for c in token.comments] 3112 return self.expression(exp.Join, comments=comments, **kwargs) 3113 3114 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3115 this = self._parse_conjunction() 3116 3117 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3118 return this 3119 3120 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3121 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3122 3123 return this 3124 3125 def _parse_index_params(self) -> exp.IndexParameters: 3126 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3127 3128 if self._match(TokenType.L_PAREN, advance=False): 3129 columns = self._parse_wrapped_csv(self._parse_with_operator) 3130 else: 3131 columns = None 3132 3133 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3134 partition_by = self._parse_partition_by() 3135 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3136 tablespace = ( 3137 self._parse_var(any_token=True) 3138 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3139 else None 3140 ) 3141 where = self._parse_where() 3142 3143 return self.expression( 3144 exp.IndexParameters, 3145 using=using, 3146 columns=columns, 3147 include=include, 3148 partition_by=partition_by, 3149 where=where, 3150 with_storage=with_storage, 3151 tablespace=tablespace, 3152 ) 3153 3154 def _parse_index( 3155 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3156 ) -> t.Optional[exp.Index]: 3157 if index or anonymous: 3158 unique = None 3159 primary = None 3160 amp = None 3161 3162 self._match(TokenType.ON) 3163 self._match(TokenType.TABLE) # hive 3164 table = self._parse_table_parts(schema=True) 3165 else: 3166 unique = self._match(TokenType.UNIQUE) 3167 primary = self._match_text_seq("PRIMARY") 3168 amp = self._match_text_seq("AMP") 3169 3170 if not self._match(TokenType.INDEX): 3171 return None 3172 3173 index = self._parse_id_var() 3174 table = None 3175 3176 params = self._parse_index_params() 3177 3178 return self.expression( 3179 exp.Index, 3180 this=index, 3181 table=table, 3182 unique=unique, 3183 primary=primary, 3184 amp=amp, 3185 params=params, 3186 ) 3187 3188 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3189 hints: t.List[exp.Expression] = [] 3190 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3191 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3192 hints.append( 3193 self.expression( 3194 exp.WithTableHint, 3195 expressions=self._parse_csv( 3196 lambda: self._parse_function() or self._parse_var(any_token=True) 3197 ), 3198 ) 3199 ) 3200 self._match_r_paren() 3201 else: 3202 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3203 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3204 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3205 3206 self._match_texts(("INDEX", "KEY")) 3207 if self._match(TokenType.FOR): 3208 hint.set("target", self._advance_any() and self._prev.text.upper()) 3209 3210 hint.set("expressions", self._parse_wrapped_id_vars()) 3211 hints.append(hint) 3212 3213 return hints or None 3214 3215 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3216 return ( 3217 (not schema and self._parse_function(optional_parens=False)) 3218 or self._parse_id_var(any_token=False) 3219 or self._parse_string_as_identifier() 3220 or self._parse_placeholder() 3221 ) 3222 3223 def _parse_table_parts( 3224 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3225 ) -> exp.Table: 3226 catalog = None 3227 db = None 3228 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3229 3230 while self._match(TokenType.DOT): 3231 if catalog: 3232 # This allows nesting the table in arbitrarily many dot expressions if needed 3233 table = self.expression( 3234 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3235 ) 3236 else: 3237 catalog = db 3238 db = table 3239 # "" used for tsql FROM a..b case 3240 table = self._parse_table_part(schema=schema) or "" 3241 3242 if ( 3243 wildcard 3244 and self._is_connected() 3245 and (isinstance(table, exp.Identifier) or not table) 3246 and self._match(TokenType.STAR) 3247 ): 3248 if isinstance(table, exp.Identifier): 3249 table.args["this"] += "*" 3250 else: 3251 table = exp.Identifier(this="*") 3252 3253 # We bubble up comments from the Identifier to the Table 3254 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3255 3256 if is_db_reference: 3257 catalog = db 3258 db = table 3259 table = None 3260 3261 if not table and not is_db_reference: 3262 self.raise_error(f"Expected table name but got {self._curr}") 3263 if not db and is_db_reference: 3264 self.raise_error(f"Expected database name but got {self._curr}") 3265 3266 return self.expression( 3267 exp.Table, 3268 comments=comments, 3269 this=table, 3270 db=db, 3271 catalog=catalog, 3272 pivots=self._parse_pivots(), 3273 ) 3274 3275 def _parse_table( 3276 self, 3277 schema: bool = False, 3278 joins: bool = False, 3279 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3280 parse_bracket: bool = False, 3281 is_db_reference: bool = False, 3282 parse_partition: bool = False, 3283 ) -> t.Optional[exp.Expression]: 3284 lateral = self._parse_lateral() 3285 if lateral: 3286 return lateral 3287 3288 unnest = self._parse_unnest() 3289 if unnest: 3290 return unnest 3291 3292 values = self._parse_derived_table_values() 3293 if values: 3294 return values 3295 3296 subquery = self._parse_select(table=True) 3297 if subquery: 3298 if not subquery.args.get("pivots"): 3299 subquery.set("pivots", self._parse_pivots()) 3300 return subquery 3301 3302 bracket = parse_bracket and self._parse_bracket(None) 3303 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3304 3305 only = self._match(TokenType.ONLY) 3306 3307 this = t.cast( 3308 exp.Expression, 3309 bracket 3310 or self._parse_bracket( 3311 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3312 ), 3313 ) 3314 3315 if only: 3316 this.set("only", only) 3317 3318 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3319 self._match_text_seq("*") 3320 3321 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3322 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3323 this.set("partition", self._parse_partition()) 3324 3325 if schema: 3326 return self._parse_schema(this=this) 3327 3328 version = self._parse_version() 3329 3330 if version: 3331 this.set("version", version) 3332 3333 if self.dialect.ALIAS_POST_TABLESAMPLE: 3334 table_sample = self._parse_table_sample() 3335 3336 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3337 if alias: 3338 this.set("alias", alias) 3339 3340 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3341 return self.expression( 3342 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3343 ) 3344 3345 this.set("hints", self._parse_table_hints()) 3346 3347 if not this.args.get("pivots"): 3348 this.set("pivots", self._parse_pivots()) 3349 3350 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3351 table_sample = self._parse_table_sample() 3352 3353 if table_sample: 3354 table_sample.set("this", this) 3355 this = table_sample 3356 3357 if joins: 3358 for join in self._parse_joins(): 3359 this.append("joins", join) 3360 3361 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3362 this.set("ordinality", True) 3363 this.set("alias", self._parse_table_alias()) 3364 3365 return this 3366 3367 def _parse_version(self) -> t.Optional[exp.Version]: 3368 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3369 this = "TIMESTAMP" 3370 elif self._match(TokenType.VERSION_SNAPSHOT): 3371 this = "VERSION" 3372 else: 3373 return None 3374 3375 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3376 kind = self._prev.text.upper() 3377 start = self._parse_bitwise() 3378 self._match_texts(("TO", "AND")) 3379 end = self._parse_bitwise() 3380 expression: t.Optional[exp.Expression] = self.expression( 3381 exp.Tuple, expressions=[start, end] 3382 ) 3383 elif self._match_text_seq("CONTAINED", "IN"): 3384 kind = "CONTAINED IN" 3385 expression = self.expression( 3386 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3387 ) 3388 elif self._match(TokenType.ALL): 3389 kind = "ALL" 3390 expression = None 3391 else: 3392 self._match_text_seq("AS", "OF") 3393 kind = "AS OF" 3394 expression = self._parse_type() 3395 3396 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3397 3398 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3399 if not self._match(TokenType.UNNEST): 3400 return None 3401 3402 expressions = self._parse_wrapped_csv(self._parse_equality) 3403 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3404 3405 alias = self._parse_table_alias() if with_alias else None 3406 3407 if alias: 3408 if self.dialect.UNNEST_COLUMN_ONLY: 3409 if alias.args.get("columns"): 3410 self.raise_error("Unexpected extra column alias in unnest.") 3411 3412 alias.set("columns", [alias.this]) 3413 alias.set("this", None) 3414 3415 columns = alias.args.get("columns") or [] 3416 if offset and len(expressions) < len(columns): 3417 offset = columns.pop() 3418 3419 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3420 self._match(TokenType.ALIAS) 3421 offset = self._parse_id_var( 3422 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3423 ) or exp.to_identifier("offset") 3424 3425 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3426 3427 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3428 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3429 if not is_derived and not self._match_text_seq("VALUES"): 3430 return None 3431 3432 expressions = self._parse_csv(self._parse_value) 3433 alias = self._parse_table_alias() 3434 3435 if is_derived: 3436 self._match_r_paren() 3437 3438 return self.expression( 3439 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3440 ) 3441 3442 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3443 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3444 as_modifier and self._match_text_seq("USING", "SAMPLE") 3445 ): 3446 return None 3447 3448 bucket_numerator = None 3449 bucket_denominator = None 3450 bucket_field = None 3451 percent = None 3452 size = None 3453 seed = None 3454 3455 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3456 matched_l_paren = self._match(TokenType.L_PAREN) 3457 3458 if self.TABLESAMPLE_CSV: 3459 num = None 3460 expressions = self._parse_csv(self._parse_primary) 3461 else: 3462 expressions = None 3463 num = ( 3464 self._parse_factor() 3465 if self._match(TokenType.NUMBER, advance=False) 3466 else self._parse_primary() or self._parse_placeholder() 3467 ) 3468 3469 if self._match_text_seq("BUCKET"): 3470 bucket_numerator = self._parse_number() 3471 self._match_text_seq("OUT", "OF") 3472 bucket_denominator = bucket_denominator = self._parse_number() 3473 self._match(TokenType.ON) 3474 bucket_field = self._parse_field() 3475 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3476 percent = num 3477 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3478 size = num 3479 else: 3480 percent = num 3481 3482 if matched_l_paren: 3483 self._match_r_paren() 3484 3485 if self._match(TokenType.L_PAREN): 3486 method = self._parse_var(upper=True) 3487 seed = self._match(TokenType.COMMA) and self._parse_number() 3488 self._match_r_paren() 3489 elif self._match_texts(("SEED", "REPEATABLE")): 3490 seed = self._parse_wrapped(self._parse_number) 3491 3492 if not method and self.DEFAULT_SAMPLING_METHOD: 3493 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3494 3495 return self.expression( 3496 exp.TableSample, 3497 expressions=expressions, 3498 method=method, 3499 bucket_numerator=bucket_numerator, 3500 bucket_denominator=bucket_denominator, 3501 bucket_field=bucket_field, 3502 percent=percent, 3503 size=size, 3504 seed=seed, 3505 ) 3506 3507 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3508 return list(iter(self._parse_pivot, None)) or None 3509 3510 def _parse_joins(self) -> t.Iterator[exp.Join]: 3511 return iter(self._parse_join, None) 3512 3513 # https://duckdb.org/docs/sql/statements/pivot 3514 def _parse_simplified_pivot(self) -> exp.Pivot: 3515 def _parse_on() -> t.Optional[exp.Expression]: 3516 this = self._parse_bitwise() 3517 return self._parse_in(this) if self._match(TokenType.IN) else this 3518 3519 this = self._parse_table() 3520 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3521 using = self._match(TokenType.USING) and self._parse_csv( 3522 lambda: self._parse_alias(self._parse_function()) 3523 ) 3524 group = self._parse_group() 3525 return self.expression( 3526 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3527 ) 3528 3529 def _parse_pivot_in(self) -> exp.In: 3530 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3531 this = self._parse_conjunction() 3532 3533 self._match(TokenType.ALIAS) 3534 alias = self._parse_field() 3535 if alias: 3536 return self.expression(exp.PivotAlias, this=this, alias=alias) 3537 3538 return this 3539 3540 value = self._parse_column() 3541 3542 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3543 self.raise_error("Expecting IN (") 3544 3545 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3546 3547 self._match_r_paren() 3548 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3549 3550 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3551 index = self._index 3552 include_nulls = None 3553 3554 if self._match(TokenType.PIVOT): 3555 unpivot = False 3556 elif self._match(TokenType.UNPIVOT): 3557 unpivot = True 3558 3559 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3560 if self._match_text_seq("INCLUDE", "NULLS"): 3561 include_nulls = True 3562 elif self._match_text_seq("EXCLUDE", "NULLS"): 3563 include_nulls = False 3564 else: 3565 return None 3566 3567 expressions = [] 3568 3569 if not self._match(TokenType.L_PAREN): 3570 self._retreat(index) 3571 return None 3572 3573 if unpivot: 3574 expressions = self._parse_csv(self._parse_column) 3575 else: 3576 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3577 3578 if not expressions: 3579 self.raise_error("Failed to parse PIVOT's aggregation list") 3580 3581 if not self._match(TokenType.FOR): 3582 self.raise_error("Expecting FOR") 3583 3584 field = self._parse_pivot_in() 3585 3586 self._match_r_paren() 3587 3588 pivot = self.expression( 3589 exp.Pivot, 3590 expressions=expressions, 3591 field=field, 3592 unpivot=unpivot, 3593 include_nulls=include_nulls, 3594 ) 3595 3596 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3597 pivot.set("alias", self._parse_table_alias()) 3598 3599 if not unpivot: 3600 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3601 3602 columns: t.List[exp.Expression] = [] 3603 for fld in pivot.args["field"].expressions: 3604 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3605 for name in names: 3606 if self.PREFIXED_PIVOT_COLUMNS: 3607 name = f"{name}_{field_name}" if name else field_name 3608 else: 3609 name = f"{field_name}_{name}" if name else field_name 3610 3611 columns.append(exp.to_identifier(name)) 3612 3613 pivot.set("columns", columns) 3614 3615 return pivot 3616 3617 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3618 return [agg.alias for agg in aggregations] 3619 3620 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3621 if not skip_where_token and not self._match(TokenType.PREWHERE): 3622 return None 3623 3624 return self.expression( 3625 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3626 ) 3627 3628 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3629 if not skip_where_token and not self._match(TokenType.WHERE): 3630 return None 3631 3632 return self.expression( 3633 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3634 ) 3635 3636 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3637 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3638 return None 3639 3640 elements: t.Dict[str, t.Any] = defaultdict(list) 3641 3642 if self._match(TokenType.ALL): 3643 elements["all"] = True 3644 elif self._match(TokenType.DISTINCT): 3645 elements["all"] = False 3646 3647 while True: 3648 expressions = self._parse_csv( 3649 lambda: None 3650 if self._match(TokenType.ROLLUP, advance=False) 3651 else self._parse_conjunction() 3652 ) 3653 if expressions: 3654 elements["expressions"].extend(expressions) 3655 3656 grouping_sets = self._parse_grouping_sets() 3657 if grouping_sets: 3658 elements["grouping_sets"].extend(grouping_sets) 3659 3660 rollup = None 3661 cube = None 3662 totals = None 3663 3664 index = self._index 3665 with_ = self._match(TokenType.WITH) 3666 if self._match(TokenType.ROLLUP): 3667 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3668 elements["rollup"].extend(ensure_list(rollup)) 3669 3670 if self._match(TokenType.CUBE): 3671 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3672 elements["cube"].extend(ensure_list(cube)) 3673 3674 if self._match_text_seq("TOTALS"): 3675 totals = True 3676 elements["totals"] = True # type: ignore 3677 3678 if not (grouping_sets or rollup or cube or totals): 3679 if with_: 3680 self._retreat(index) 3681 break 3682 3683 return self.expression(exp.Group, **elements) # type: ignore 3684 3685 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3686 if not self._match(TokenType.GROUPING_SETS): 3687 return None 3688 3689 return self._parse_wrapped_csv(self._parse_grouping_set) 3690 3691 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3692 if self._match(TokenType.L_PAREN): 3693 grouping_set = self._parse_csv(self._parse_column) 3694 self._match_r_paren() 3695 return self.expression(exp.Tuple, expressions=grouping_set) 3696 3697 return self._parse_column() 3698 3699 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3700 if not skip_having_token and not self._match(TokenType.HAVING): 3701 return None 3702 return self.expression(exp.Having, this=self._parse_conjunction()) 3703 3704 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3705 if not self._match(TokenType.QUALIFY): 3706 return None 3707 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3708 3709 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3710 if skip_start_token: 3711 start = None 3712 elif self._match(TokenType.START_WITH): 3713 start = self._parse_conjunction() 3714 else: 3715 return None 3716 3717 self._match(TokenType.CONNECT_BY) 3718 nocycle = self._match_text_seq("NOCYCLE") 3719 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3720 exp.Prior, this=self._parse_bitwise() 3721 ) 3722 connect = self._parse_conjunction() 3723 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3724 3725 if not start and self._match(TokenType.START_WITH): 3726 start = self._parse_conjunction() 3727 3728 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3729 3730 def _parse_name_as_expression(self) -> exp.Alias: 3731 return self.expression( 3732 exp.Alias, 3733 alias=self._parse_id_var(any_token=True), 3734 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3735 ) 3736 3737 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3738 if self._match_text_seq("INTERPOLATE"): 3739 return self._parse_wrapped_csv(self._parse_name_as_expression) 3740 return None 3741 3742 def _parse_order( 3743 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3744 ) -> t.Optional[exp.Expression]: 3745 siblings = None 3746 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3747 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3748 return this 3749 3750 siblings = True 3751 3752 return self.expression( 3753 exp.Order, 3754 this=this, 3755 expressions=self._parse_csv(self._parse_ordered), 3756 interpolate=self._parse_interpolate(), 3757 siblings=siblings, 3758 ) 3759 3760 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3761 if not self._match(token): 3762 return None 3763 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3764 3765 def _parse_ordered( 3766 self, parse_method: t.Optional[t.Callable] = None 3767 ) -> t.Optional[exp.Ordered]: 3768 this = parse_method() if parse_method else self._parse_conjunction() 3769 if not this: 3770 return None 3771 3772 asc = self._match(TokenType.ASC) 3773 desc = self._match(TokenType.DESC) or (asc and False) 3774 3775 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3776 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3777 3778 nulls_first = is_nulls_first or False 3779 explicitly_null_ordered = is_nulls_first or is_nulls_last 3780 3781 if ( 3782 not explicitly_null_ordered 3783 and ( 3784 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3785 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3786 ) 3787 and self.dialect.NULL_ORDERING != "nulls_are_last" 3788 ): 3789 nulls_first = True 3790 3791 if self._match_text_seq("WITH", "FILL"): 3792 with_fill = self.expression( 3793 exp.WithFill, 3794 **{ # type: ignore 3795 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3796 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3797 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3798 }, 3799 ) 3800 else: 3801 with_fill = None 3802 3803 return self.expression( 3804 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3805 ) 3806 3807 def _parse_limit( 3808 self, 3809 this: t.Optional[exp.Expression] = None, 3810 top: bool = False, 3811 skip_limit_token: bool = False, 3812 ) -> t.Optional[exp.Expression]: 3813 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3814 comments = self._prev_comments 3815 if top: 3816 limit_paren = self._match(TokenType.L_PAREN) 3817 expression = self._parse_term() if limit_paren else self._parse_number() 3818 3819 if limit_paren: 3820 self._match_r_paren() 3821 else: 3822 expression = self._parse_term() 3823 3824 if self._match(TokenType.COMMA): 3825 offset = expression 3826 expression = self._parse_term() 3827 else: 3828 offset = None 3829 3830 limit_exp = self.expression( 3831 exp.Limit, 3832 this=this, 3833 expression=expression, 3834 offset=offset, 3835 comments=comments, 3836 expressions=self._parse_limit_by(), 3837 ) 3838 3839 return limit_exp 3840 3841 if self._match(TokenType.FETCH): 3842 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3843 direction = self._prev.text.upper() if direction else "FIRST" 3844 3845 count = self._parse_field(tokens=self.FETCH_TOKENS) 3846 percent = self._match(TokenType.PERCENT) 3847 3848 self._match_set((TokenType.ROW, TokenType.ROWS)) 3849 3850 only = self._match_text_seq("ONLY") 3851 with_ties = self._match_text_seq("WITH", "TIES") 3852 3853 if only and with_ties: 3854 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3855 3856 return self.expression( 3857 exp.Fetch, 3858 direction=direction, 3859 count=count, 3860 percent=percent, 3861 with_ties=with_ties, 3862 ) 3863 3864 return this 3865 3866 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3867 if not self._match(TokenType.OFFSET): 3868 return this 3869 3870 count = self._parse_term() 3871 self._match_set((TokenType.ROW, TokenType.ROWS)) 3872 3873 return self.expression( 3874 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3875 ) 3876 3877 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3878 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3879 3880 def _parse_locks(self) -> t.List[exp.Lock]: 3881 locks = [] 3882 while True: 3883 if self._match_text_seq("FOR", "UPDATE"): 3884 update = True 3885 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3886 "LOCK", "IN", "SHARE", "MODE" 3887 ): 3888 update = False 3889 else: 3890 break 3891 3892 expressions = None 3893 if self._match_text_seq("OF"): 3894 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3895 3896 wait: t.Optional[bool | exp.Expression] = None 3897 if self._match_text_seq("NOWAIT"): 3898 wait = True 3899 elif self._match_text_seq("WAIT"): 3900 wait = self._parse_primary() 3901 elif self._match_text_seq("SKIP", "LOCKED"): 3902 wait = False 3903 3904 locks.append( 3905 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3906 ) 3907 3908 return locks 3909 3910 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3911 while this and self._match_set(self.SET_OPERATIONS): 3912 token_type = self._prev.token_type 3913 3914 if token_type == TokenType.UNION: 3915 operation = exp.Union 3916 elif token_type == TokenType.EXCEPT: 3917 operation = exp.Except 3918 else: 3919 operation = exp.Intersect 3920 3921 comments = self._prev.comments 3922 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3923 by_name = self._match_text_seq("BY", "NAME") 3924 expression = self._parse_select(nested=True, parse_set_operation=False) 3925 3926 this = self.expression( 3927 operation, 3928 comments=comments, 3929 this=this, 3930 distinct=distinct, 3931 by_name=by_name, 3932 expression=expression, 3933 ) 3934 3935 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3936 expression = this.expression 3937 3938 if expression: 3939 for arg in self.UNION_MODIFIERS: 3940 expr = expression.args.get(arg) 3941 if expr: 3942 this.set(arg, expr.pop()) 3943 3944 return this 3945 3946 def _parse_expression(self) -> t.Optional[exp.Expression]: 3947 return self._parse_alias(self._parse_conjunction()) 3948 3949 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3950 this = self._parse_equality() 3951 3952 if self._match(TokenType.COLON_EQ): 3953 this = self.expression( 3954 exp.PropertyEQ, 3955 this=this, 3956 comments=self._prev_comments, 3957 expression=self._parse_conjunction(), 3958 ) 3959 3960 while self._match_set(self.CONJUNCTION): 3961 this = self.expression( 3962 self.CONJUNCTION[self._prev.token_type], 3963 this=this, 3964 comments=self._prev_comments, 3965 expression=self._parse_equality(), 3966 ) 3967 return this 3968 3969 def _parse_equality(self) -> t.Optional[exp.Expression]: 3970 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3971 3972 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3973 return self._parse_tokens(self._parse_range, self.COMPARISON) 3974 3975 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3976 this = this or self._parse_bitwise() 3977 negate = self._match(TokenType.NOT) 3978 3979 if self._match_set(self.RANGE_PARSERS): 3980 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3981 if not expression: 3982 return this 3983 3984 this = expression 3985 elif self._match(TokenType.ISNULL): 3986 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3987 3988 # Postgres supports ISNULL and NOTNULL for conditions. 3989 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3990 if self._match(TokenType.NOTNULL): 3991 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3992 this = self.expression(exp.Not, this=this) 3993 3994 if negate: 3995 this = self.expression(exp.Not, this=this) 3996 3997 if self._match(TokenType.IS): 3998 this = self._parse_is(this) 3999 4000 return this 4001 4002 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4003 index = self._index - 1 4004 negate = self._match(TokenType.NOT) 4005 4006 if self._match_text_seq("DISTINCT", "FROM"): 4007 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4008 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4009 4010 expression = self._parse_null() or self._parse_boolean() 4011 if not expression: 4012 self._retreat(index) 4013 return None 4014 4015 this = self.expression(exp.Is, this=this, expression=expression) 4016 return self.expression(exp.Not, this=this) if negate else this 4017 4018 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4019 unnest = self._parse_unnest(with_alias=False) 4020 if unnest: 4021 this = self.expression(exp.In, this=this, unnest=unnest) 4022 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4023 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4024 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4025 4026 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4027 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4028 else: 4029 this = self.expression(exp.In, this=this, expressions=expressions) 4030 4031 if matched_l_paren: 4032 self._match_r_paren(this) 4033 elif not self._match(TokenType.R_BRACKET, expression=this): 4034 self.raise_error("Expecting ]") 4035 else: 4036 this = self.expression(exp.In, this=this, field=self._parse_field()) 4037 4038 return this 4039 4040 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4041 low = self._parse_bitwise() 4042 self._match(TokenType.AND) 4043 high = self._parse_bitwise() 4044 return self.expression(exp.Between, this=this, low=low, high=high) 4045 4046 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4047 if not self._match(TokenType.ESCAPE): 4048 return this 4049 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4050 4051 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 4052 index = self._index 4053 4054 if not self._match(TokenType.INTERVAL) and match_interval: 4055 return None 4056 4057 if self._match(TokenType.STRING, advance=False): 4058 this = self._parse_primary() 4059 else: 4060 this = self._parse_term() 4061 4062 if not this or ( 4063 isinstance(this, exp.Column) 4064 and not this.table 4065 and not this.this.quoted 4066 and this.name.upper() == "IS" 4067 ): 4068 self._retreat(index) 4069 return None 4070 4071 unit = self._parse_function() or ( 4072 not self._match(TokenType.ALIAS, advance=False) 4073 and self._parse_var(any_token=True, upper=True) 4074 ) 4075 4076 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4077 # each INTERVAL expression into this canonical form so it's easy to transpile 4078 if this and this.is_number: 4079 this = exp.Literal.string(this.name) 4080 elif this and this.is_string: 4081 parts = this.name.split() 4082 4083 if len(parts) == 2: 4084 if unit: 4085 # This is not actually a unit, it's something else (e.g. a "window side") 4086 unit = None 4087 self._retreat(self._index - 1) 4088 4089 this = exp.Literal.string(parts[0]) 4090 unit = self.expression(exp.Var, this=parts[1].upper()) 4091 4092 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4093 unit = self.expression( 4094 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4095 ) 4096 4097 return self.expression(exp.Interval, this=this, unit=unit) 4098 4099 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4100 this = self._parse_term() 4101 4102 while True: 4103 if self._match_set(self.BITWISE): 4104 this = self.expression( 4105 self.BITWISE[self._prev.token_type], 4106 this=this, 4107 expression=self._parse_term(), 4108 ) 4109 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4110 this = self.expression( 4111 exp.DPipe, 4112 this=this, 4113 expression=self._parse_term(), 4114 safe=not self.dialect.STRICT_STRING_CONCAT, 4115 ) 4116 elif self._match(TokenType.DQMARK): 4117 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4118 elif self._match_pair(TokenType.LT, TokenType.LT): 4119 this = self.expression( 4120 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4121 ) 4122 elif self._match_pair(TokenType.GT, TokenType.GT): 4123 this = self.expression( 4124 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4125 ) 4126 else: 4127 break 4128 4129 return this 4130 4131 def _parse_term(self) -> t.Optional[exp.Expression]: 4132 return self._parse_tokens(self._parse_factor, self.TERM) 4133 4134 def _parse_factor(self) -> t.Optional[exp.Expression]: 4135 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4136 this = parse_method() 4137 4138 while self._match_set(self.FACTOR): 4139 this = self.expression( 4140 self.FACTOR[self._prev.token_type], 4141 this=this, 4142 comments=self._prev_comments, 4143 expression=parse_method(), 4144 ) 4145 if isinstance(this, exp.Div): 4146 this.args["typed"] = self.dialect.TYPED_DIVISION 4147 this.args["safe"] = self.dialect.SAFE_DIVISION 4148 4149 return this 4150 4151 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4152 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4153 4154 def _parse_unary(self) -> t.Optional[exp.Expression]: 4155 if self._match_set(self.UNARY_PARSERS): 4156 return self.UNARY_PARSERS[self._prev.token_type](self) 4157 return self._parse_at_time_zone(self._parse_type()) 4158 4159 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4160 interval = parse_interval and self._parse_interval() 4161 if interval: 4162 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4163 while True: 4164 index = self._index 4165 self._match(TokenType.PLUS) 4166 4167 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4168 self._retreat(index) 4169 break 4170 4171 interval = self.expression( # type: ignore 4172 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4173 ) 4174 4175 return interval 4176 4177 index = self._index 4178 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4179 this = self._parse_column() 4180 4181 if data_type: 4182 if isinstance(this, exp.Literal): 4183 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4184 if parser: 4185 return parser(self, this, data_type) 4186 return self.expression(exp.Cast, this=this, to=data_type) 4187 if not data_type.expressions: 4188 self._retreat(index) 4189 return self._parse_column() 4190 return self._parse_column_ops(data_type) 4191 4192 return this and self._parse_column_ops(this) 4193 4194 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4195 this = self._parse_type() 4196 if not this: 4197 return None 4198 4199 if isinstance(this, exp.Column) and not this.table: 4200 this = exp.var(this.name.upper()) 4201 4202 return self.expression( 4203 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4204 ) 4205 4206 def _parse_types( 4207 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4208 ) -> t.Optional[exp.Expression]: 4209 index = self._index 4210 4211 this: t.Optional[exp.Expression] = None 4212 prefix = self._match_text_seq("SYSUDTLIB", ".") 4213 4214 if not self._match_set(self.TYPE_TOKENS): 4215 identifier = allow_identifiers and self._parse_id_var( 4216 any_token=False, tokens=(TokenType.VAR,) 4217 ) 4218 if identifier: 4219 tokens = self.dialect.tokenize(identifier.name) 4220 4221 if len(tokens) != 1: 4222 self.raise_error("Unexpected identifier", self._prev) 4223 4224 if tokens[0].token_type in self.TYPE_TOKENS: 4225 self._prev = tokens[0] 4226 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4227 type_name = identifier.name 4228 4229 while self._match(TokenType.DOT): 4230 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4231 4232 this = exp.DataType.build(type_name, udt=True) 4233 else: 4234 self._retreat(self._index - 1) 4235 return None 4236 else: 4237 return None 4238 4239 type_token = self._prev.token_type 4240 4241 if type_token == TokenType.PSEUDO_TYPE: 4242 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4243 4244 if type_token == TokenType.OBJECT_IDENTIFIER: 4245 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4246 4247 nested = type_token in self.NESTED_TYPE_TOKENS 4248 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4249 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4250 expressions = None 4251 maybe_func = False 4252 4253 if self._match(TokenType.L_PAREN): 4254 if is_struct: 4255 expressions = self._parse_csv(self._parse_struct_types) 4256 elif nested: 4257 expressions = self._parse_csv( 4258 lambda: self._parse_types( 4259 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4260 ) 4261 ) 4262 elif type_token in self.ENUM_TYPE_TOKENS: 4263 expressions = self._parse_csv(self._parse_equality) 4264 elif is_aggregate: 4265 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4266 any_token=False, tokens=(TokenType.VAR,) 4267 ) 4268 if not func_or_ident or not self._match(TokenType.COMMA): 4269 return None 4270 expressions = self._parse_csv( 4271 lambda: self._parse_types( 4272 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4273 ) 4274 ) 4275 expressions.insert(0, func_or_ident) 4276 else: 4277 expressions = self._parse_csv(self._parse_type_size) 4278 4279 if not expressions or not self._match(TokenType.R_PAREN): 4280 self._retreat(index) 4281 return None 4282 4283 maybe_func = True 4284 4285 values: t.Optional[t.List[exp.Expression]] = None 4286 4287 if nested and self._match(TokenType.LT): 4288 if is_struct: 4289 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4290 else: 4291 expressions = self._parse_csv( 4292 lambda: self._parse_types( 4293 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4294 ) 4295 ) 4296 4297 if not self._match(TokenType.GT): 4298 self.raise_error("Expecting >") 4299 4300 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4301 values = self._parse_csv(self._parse_conjunction) 4302 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4303 4304 if type_token in self.TIMESTAMPS: 4305 if self._match_text_seq("WITH", "TIME", "ZONE"): 4306 maybe_func = False 4307 tz_type = ( 4308 exp.DataType.Type.TIMETZ 4309 if type_token in self.TIMES 4310 else exp.DataType.Type.TIMESTAMPTZ 4311 ) 4312 this = exp.DataType(this=tz_type, expressions=expressions) 4313 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4314 maybe_func = False 4315 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4316 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4317 maybe_func = False 4318 elif type_token == TokenType.INTERVAL: 4319 unit = self._parse_var(upper=True) 4320 if unit: 4321 if self._match_text_seq("TO"): 4322 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4323 4324 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4325 else: 4326 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4327 4328 if maybe_func and check_func: 4329 index2 = self._index 4330 peek = self._parse_string() 4331 4332 if not peek: 4333 self._retreat(index) 4334 return None 4335 4336 self._retreat(index2) 4337 4338 if not this: 4339 if self._match_text_seq("UNSIGNED"): 4340 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4341 if not unsigned_type_token: 4342 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4343 4344 type_token = unsigned_type_token or type_token 4345 4346 this = exp.DataType( 4347 this=exp.DataType.Type[type_token.value], 4348 expressions=expressions, 4349 nested=nested, 4350 values=values, 4351 prefix=prefix, 4352 ) 4353 elif expressions: 4354 this.set("expressions", expressions) 4355 4356 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4357 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4358 4359 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4360 converter = self.TYPE_CONVERTER.get(this.this) 4361 if converter: 4362 this = converter(t.cast(exp.DataType, this)) 4363 4364 return this 4365 4366 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4367 index = self._index 4368 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4369 self._match(TokenType.COLON) 4370 column_def = self._parse_column_def(this) 4371 4372 if type_required and ( 4373 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4374 ): 4375 self._retreat(index) 4376 return self._parse_types() 4377 4378 return column_def 4379 4380 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4381 if not self._match_text_seq("AT", "TIME", "ZONE"): 4382 return this 4383 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4384 4385 def _parse_column(self) -> t.Optional[exp.Expression]: 4386 this = self._parse_column_reference() 4387 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4388 4389 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4390 this = self._parse_field() 4391 if ( 4392 not this 4393 and self._match(TokenType.VALUES, advance=False) 4394 and self.VALUES_FOLLOWED_BY_PAREN 4395 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4396 ): 4397 this = self._parse_id_var() 4398 4399 if isinstance(this, exp.Identifier): 4400 # We bubble up comments from the Identifier to the Column 4401 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4402 4403 return this 4404 4405 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4406 this = self._parse_bracket(this) 4407 4408 while self._match_set(self.COLUMN_OPERATORS): 4409 op_token = self._prev.token_type 4410 op = self.COLUMN_OPERATORS.get(op_token) 4411 4412 if op_token == TokenType.DCOLON: 4413 field = self._parse_types() 4414 if not field: 4415 self.raise_error("Expected type") 4416 elif op and self._curr: 4417 field = self._parse_column_reference() 4418 else: 4419 field = self._parse_field(any_token=True, anonymous_func=True) 4420 4421 if isinstance(field, exp.Func) and this: 4422 # bigquery allows function calls like x.y.count(...) 4423 # SAFE.SUBSTR(...) 4424 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4425 this = exp.replace_tree( 4426 this, 4427 lambda n: ( 4428 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4429 if n.table 4430 else n.this 4431 ) 4432 if isinstance(n, exp.Column) 4433 else n, 4434 ) 4435 4436 if op: 4437 this = op(self, this, field) 4438 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4439 this = self.expression( 4440 exp.Column, 4441 this=field, 4442 table=this.this, 4443 db=this.args.get("table"), 4444 catalog=this.args.get("db"), 4445 ) 4446 else: 4447 this = self.expression(exp.Dot, this=this, expression=field) 4448 this = self._parse_bracket(this) 4449 return this 4450 4451 def _parse_primary(self) -> t.Optional[exp.Expression]: 4452 if self._match_set(self.PRIMARY_PARSERS): 4453 token_type = self._prev.token_type 4454 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4455 4456 if token_type == TokenType.STRING: 4457 expressions = [primary] 4458 while self._match(TokenType.STRING): 4459 expressions.append(exp.Literal.string(self._prev.text)) 4460 4461 if len(expressions) > 1: 4462 return self.expression(exp.Concat, expressions=expressions) 4463 4464 return primary 4465 4466 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4467 return exp.Literal.number(f"0.{self._prev.text}") 4468 4469 if self._match(TokenType.L_PAREN): 4470 comments = self._prev_comments 4471 query = self._parse_select() 4472 4473 if query: 4474 expressions = [query] 4475 else: 4476 expressions = self._parse_expressions() 4477 4478 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4479 4480 if not this and self._match(TokenType.R_PAREN, advance=False): 4481 this = self.expression(exp.Tuple) 4482 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4483 this = self._parse_subquery(this=this, parse_alias=False) 4484 elif isinstance(this, exp.Subquery): 4485 this = self._parse_subquery( 4486 this=self._parse_set_operations(this), parse_alias=False 4487 ) 4488 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4489 this = self.expression(exp.Tuple, expressions=expressions) 4490 else: 4491 this = self.expression(exp.Paren, this=this) 4492 4493 if this: 4494 this.add_comments(comments) 4495 4496 self._match_r_paren(expression=this) 4497 return this 4498 4499 return None 4500 4501 def _parse_field( 4502 self, 4503 any_token: bool = False, 4504 tokens: t.Optional[t.Collection[TokenType]] = None, 4505 anonymous_func: bool = False, 4506 ) -> t.Optional[exp.Expression]: 4507 if anonymous_func: 4508 field = ( 4509 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4510 or self._parse_primary() 4511 ) 4512 else: 4513 field = self._parse_primary() or self._parse_function( 4514 anonymous=anonymous_func, any_token=any_token 4515 ) 4516 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4517 4518 def _parse_function( 4519 self, 4520 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4521 anonymous: bool = False, 4522 optional_parens: bool = True, 4523 any_token: bool = False, 4524 ) -> t.Optional[exp.Expression]: 4525 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4526 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4527 fn_syntax = False 4528 if ( 4529 self._match(TokenType.L_BRACE, advance=False) 4530 and self._next 4531 and self._next.text.upper() == "FN" 4532 ): 4533 self._advance(2) 4534 fn_syntax = True 4535 4536 func = self._parse_function_call( 4537 functions=functions, 4538 anonymous=anonymous, 4539 optional_parens=optional_parens, 4540 any_token=any_token, 4541 ) 4542 4543 if fn_syntax: 4544 self._match(TokenType.R_BRACE) 4545 4546 return func 4547 4548 def _parse_function_call( 4549 self, 4550 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4551 anonymous: bool = False, 4552 optional_parens: bool = True, 4553 any_token: bool = False, 4554 ) -> t.Optional[exp.Expression]: 4555 if not self._curr: 4556 return None 4557 4558 comments = self._curr.comments 4559 token_type = self._curr.token_type 4560 this = self._curr.text 4561 upper = this.upper() 4562 4563 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4564 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4565 self._advance() 4566 return self._parse_window(parser(self)) 4567 4568 if not self._next or self._next.token_type != TokenType.L_PAREN: 4569 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4570 self._advance() 4571 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4572 4573 return None 4574 4575 if any_token: 4576 if token_type in self.RESERVED_TOKENS: 4577 return None 4578 elif token_type not in self.FUNC_TOKENS: 4579 return None 4580 4581 self._advance(2) 4582 4583 parser = self.FUNCTION_PARSERS.get(upper) 4584 if parser and not anonymous: 4585 this = parser(self) 4586 else: 4587 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4588 4589 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4590 this = self.expression(subquery_predicate, this=self._parse_select()) 4591 self._match_r_paren() 4592 return this 4593 4594 if functions is None: 4595 functions = self.FUNCTIONS 4596 4597 function = functions.get(upper) 4598 4599 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4600 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4601 4602 if alias: 4603 args = self._kv_to_prop_eq(args) 4604 4605 if function and not anonymous: 4606 if "dialect" in function.__code__.co_varnames: 4607 func = function(args, dialect=self.dialect) 4608 else: 4609 func = function(args) 4610 4611 func = self.validate_expression(func, args) 4612 if not self.dialect.NORMALIZE_FUNCTIONS: 4613 func.meta["name"] = this 4614 4615 this = func 4616 else: 4617 if token_type == TokenType.IDENTIFIER: 4618 this = exp.Identifier(this=this, quoted=True) 4619 this = self.expression(exp.Anonymous, this=this, expressions=args) 4620 4621 if isinstance(this, exp.Expression): 4622 this.add_comments(comments) 4623 4624 self._match_r_paren(this) 4625 return self._parse_window(this) 4626 4627 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4628 transformed = [] 4629 4630 for e in expressions: 4631 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4632 if isinstance(e, exp.Alias): 4633 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4634 4635 if not isinstance(e, exp.PropertyEQ): 4636 e = self.expression( 4637 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4638 ) 4639 4640 if isinstance(e.this, exp.Column): 4641 e.this.replace(e.this.this) 4642 4643 transformed.append(e) 4644 4645 return transformed 4646 4647 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4648 return self._parse_column_def(self._parse_id_var()) 4649 4650 def _parse_user_defined_function( 4651 self, kind: t.Optional[TokenType] = None 4652 ) -> t.Optional[exp.Expression]: 4653 this = self._parse_id_var() 4654 4655 while self._match(TokenType.DOT): 4656 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4657 4658 if not self._match(TokenType.L_PAREN): 4659 return this 4660 4661 expressions = self._parse_csv(self._parse_function_parameter) 4662 self._match_r_paren() 4663 return self.expression( 4664 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4665 ) 4666 4667 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4668 literal = self._parse_primary() 4669 if literal: 4670 return self.expression(exp.Introducer, this=token.text, expression=literal) 4671 4672 return self.expression(exp.Identifier, this=token.text) 4673 4674 def _parse_session_parameter(self) -> exp.SessionParameter: 4675 kind = None 4676 this = self._parse_id_var() or self._parse_primary() 4677 4678 if this and self._match(TokenType.DOT): 4679 kind = this.name 4680 this = self._parse_var() or self._parse_primary() 4681 4682 return self.expression(exp.SessionParameter, this=this, kind=kind) 4683 4684 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4685 index = self._index 4686 4687 if self._match(TokenType.L_PAREN): 4688 expressions = t.cast( 4689 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4690 ) 4691 4692 if not self._match(TokenType.R_PAREN): 4693 self._retreat(index) 4694 else: 4695 expressions = [self._parse_id_var()] 4696 4697 if self._match_set(self.LAMBDAS): 4698 return self.LAMBDAS[self._prev.token_type](self, expressions) 4699 4700 self._retreat(index) 4701 4702 this: t.Optional[exp.Expression] 4703 4704 if self._match(TokenType.DISTINCT): 4705 this = self.expression( 4706 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4707 ) 4708 else: 4709 this = self._parse_select_or_expression(alias=alias) 4710 4711 return self._parse_limit( 4712 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4713 ) 4714 4715 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4716 index = self._index 4717 if not self._match(TokenType.L_PAREN): 4718 return this 4719 4720 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4721 # expr can be of both types 4722 if self._match_set(self.SELECT_START_TOKENS): 4723 self._retreat(index) 4724 return this 4725 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4726 self._match_r_paren() 4727 return self.expression(exp.Schema, this=this, expressions=args) 4728 4729 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4730 return self._parse_column_def(self._parse_field(any_token=True)) 4731 4732 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4733 # column defs are not really columns, they're identifiers 4734 if isinstance(this, exp.Column): 4735 this = this.this 4736 4737 kind = self._parse_types(schema=True) 4738 4739 if self._match_text_seq("FOR", "ORDINALITY"): 4740 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4741 4742 constraints: t.List[exp.Expression] = [] 4743 4744 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4745 ("ALIAS", "MATERIALIZED") 4746 ): 4747 persisted = self._prev.text.upper() == "MATERIALIZED" 4748 constraints.append( 4749 self.expression( 4750 exp.ComputedColumnConstraint, 4751 this=self._parse_conjunction(), 4752 persisted=persisted or self._match_text_seq("PERSISTED"), 4753 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4754 ) 4755 ) 4756 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4757 self._match(TokenType.ALIAS) 4758 constraints.append( 4759 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4760 ) 4761 4762 while True: 4763 constraint = self._parse_column_constraint() 4764 if not constraint: 4765 break 4766 constraints.append(constraint) 4767 4768 if not kind and not constraints: 4769 return this 4770 4771 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4772 4773 def _parse_auto_increment( 4774 self, 4775 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4776 start = None 4777 increment = None 4778 4779 if self._match(TokenType.L_PAREN, advance=False): 4780 args = self._parse_wrapped_csv(self._parse_bitwise) 4781 start = seq_get(args, 0) 4782 increment = seq_get(args, 1) 4783 elif self._match_text_seq("START"): 4784 start = self._parse_bitwise() 4785 self._match_text_seq("INCREMENT") 4786 increment = self._parse_bitwise() 4787 4788 if start and increment: 4789 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4790 4791 return exp.AutoIncrementColumnConstraint() 4792 4793 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4794 if not self._match_text_seq("REFRESH"): 4795 self._retreat(self._index - 1) 4796 return None 4797 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4798 4799 def _parse_compress(self) -> exp.CompressColumnConstraint: 4800 if self._match(TokenType.L_PAREN, advance=False): 4801 return self.expression( 4802 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4803 ) 4804 4805 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4806 4807 def _parse_generated_as_identity( 4808 self, 4809 ) -> ( 4810 exp.GeneratedAsIdentityColumnConstraint 4811 | exp.ComputedColumnConstraint 4812 | exp.GeneratedAsRowColumnConstraint 4813 ): 4814 if self._match_text_seq("BY", "DEFAULT"): 4815 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4816 this = self.expression( 4817 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4818 ) 4819 else: 4820 self._match_text_seq("ALWAYS") 4821 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4822 4823 self._match(TokenType.ALIAS) 4824 4825 if self._match_text_seq("ROW"): 4826 start = self._match_text_seq("START") 4827 if not start: 4828 self._match(TokenType.END) 4829 hidden = self._match_text_seq("HIDDEN") 4830 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4831 4832 identity = self._match_text_seq("IDENTITY") 4833 4834 if self._match(TokenType.L_PAREN): 4835 if self._match(TokenType.START_WITH): 4836 this.set("start", self._parse_bitwise()) 4837 if self._match_text_seq("INCREMENT", "BY"): 4838 this.set("increment", self._parse_bitwise()) 4839 if self._match_text_seq("MINVALUE"): 4840 this.set("minvalue", self._parse_bitwise()) 4841 if self._match_text_seq("MAXVALUE"): 4842 this.set("maxvalue", self._parse_bitwise()) 4843 4844 if self._match_text_seq("CYCLE"): 4845 this.set("cycle", True) 4846 elif self._match_text_seq("NO", "CYCLE"): 4847 this.set("cycle", False) 4848 4849 if not identity: 4850 this.set("expression", self._parse_range()) 4851 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4852 args = self._parse_csv(self._parse_bitwise) 4853 this.set("start", seq_get(args, 0)) 4854 this.set("increment", seq_get(args, 1)) 4855 4856 self._match_r_paren() 4857 4858 return this 4859 4860 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4861 self._match_text_seq("LENGTH") 4862 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4863 4864 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4865 if self._match_text_seq("NULL"): 4866 return self.expression(exp.NotNullColumnConstraint) 4867 if self._match_text_seq("CASESPECIFIC"): 4868 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4869 if self._match_text_seq("FOR", "REPLICATION"): 4870 return self.expression(exp.NotForReplicationColumnConstraint) 4871 return None 4872 4873 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4874 if self._match(TokenType.CONSTRAINT): 4875 this = self._parse_id_var() 4876 else: 4877 this = None 4878 4879 if self._match_texts(self.CONSTRAINT_PARSERS): 4880 return self.expression( 4881 exp.ColumnConstraint, 4882 this=this, 4883 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4884 ) 4885 4886 return this 4887 4888 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4889 if not self._match(TokenType.CONSTRAINT): 4890 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4891 4892 return self.expression( 4893 exp.Constraint, 4894 this=self._parse_id_var(), 4895 expressions=self._parse_unnamed_constraints(), 4896 ) 4897 4898 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4899 constraints = [] 4900 while True: 4901 constraint = self._parse_unnamed_constraint() or self._parse_function() 4902 if not constraint: 4903 break 4904 constraints.append(constraint) 4905 4906 return constraints 4907 4908 def _parse_unnamed_constraint( 4909 self, constraints: t.Optional[t.Collection[str]] = None 4910 ) -> t.Optional[exp.Expression]: 4911 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4912 constraints or self.CONSTRAINT_PARSERS 4913 ): 4914 return None 4915 4916 constraint = self._prev.text.upper() 4917 if constraint not in self.CONSTRAINT_PARSERS: 4918 self.raise_error(f"No parser found for schema constraint {constraint}.") 4919 4920 return self.CONSTRAINT_PARSERS[constraint](self) 4921 4922 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4923 self._match_text_seq("KEY") 4924 return self.expression( 4925 exp.UniqueColumnConstraint, 4926 this=self._parse_schema(self._parse_id_var(any_token=False)), 4927 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4928 on_conflict=self._parse_on_conflict(), 4929 ) 4930 4931 def _parse_key_constraint_options(self) -> t.List[str]: 4932 options = [] 4933 while True: 4934 if not self._curr: 4935 break 4936 4937 if self._match(TokenType.ON): 4938 action = None 4939 on = self._advance_any() and self._prev.text 4940 4941 if self._match_text_seq("NO", "ACTION"): 4942 action = "NO ACTION" 4943 elif self._match_text_seq("CASCADE"): 4944 action = "CASCADE" 4945 elif self._match_text_seq("RESTRICT"): 4946 action = "RESTRICT" 4947 elif self._match_pair(TokenType.SET, TokenType.NULL): 4948 action = "SET NULL" 4949 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4950 action = "SET DEFAULT" 4951 else: 4952 self.raise_error("Invalid key constraint") 4953 4954 options.append(f"ON {on} {action}") 4955 elif self._match_text_seq("NOT", "ENFORCED"): 4956 options.append("NOT ENFORCED") 4957 elif self._match_text_seq("DEFERRABLE"): 4958 options.append("DEFERRABLE") 4959 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4960 options.append("INITIALLY DEFERRED") 4961 elif self._match_text_seq("NORELY"): 4962 options.append("NORELY") 4963 elif self._match_text_seq("MATCH", "FULL"): 4964 options.append("MATCH FULL") 4965 else: 4966 break 4967 4968 return options 4969 4970 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4971 if match and not self._match(TokenType.REFERENCES): 4972 return None 4973 4974 expressions = None 4975 this = self._parse_table(schema=True) 4976 options = self._parse_key_constraint_options() 4977 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4978 4979 def _parse_foreign_key(self) -> exp.ForeignKey: 4980 expressions = self._parse_wrapped_id_vars() 4981 reference = self._parse_references() 4982 options = {} 4983 4984 while self._match(TokenType.ON): 4985 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4986 self.raise_error("Expected DELETE or UPDATE") 4987 4988 kind = self._prev.text.lower() 4989 4990 if self._match_text_seq("NO", "ACTION"): 4991 action = "NO ACTION" 4992 elif self._match(TokenType.SET): 4993 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4994 action = "SET " + self._prev.text.upper() 4995 else: 4996 self._advance() 4997 action = self._prev.text.upper() 4998 4999 options[kind] = action 5000 5001 return self.expression( 5002 exp.ForeignKey, 5003 expressions=expressions, 5004 reference=reference, 5005 **options, # type: ignore 5006 ) 5007 5008 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5009 return self._parse_field() 5010 5011 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5012 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5013 self._retreat(self._index - 1) 5014 return None 5015 5016 id_vars = self._parse_wrapped_id_vars() 5017 return self.expression( 5018 exp.PeriodForSystemTimeConstraint, 5019 this=seq_get(id_vars, 0), 5020 expression=seq_get(id_vars, 1), 5021 ) 5022 5023 def _parse_primary_key( 5024 self, wrapped_optional: bool = False, in_props: bool = False 5025 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5026 desc = ( 5027 self._match_set((TokenType.ASC, TokenType.DESC)) 5028 and self._prev.token_type == TokenType.DESC 5029 ) 5030 5031 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5032 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5033 5034 expressions = self._parse_wrapped_csv( 5035 self._parse_primary_key_part, optional=wrapped_optional 5036 ) 5037 options = self._parse_key_constraint_options() 5038 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5039 5040 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5041 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5042 5043 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5044 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5045 return this 5046 5047 bracket_kind = self._prev.token_type 5048 expressions = self._parse_csv( 5049 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5050 ) 5051 5052 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5053 self.raise_error("Expected ]") 5054 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5055 self.raise_error("Expected }") 5056 5057 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5058 if bracket_kind == TokenType.L_BRACE: 5059 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5060 elif not this or this.name.upper() == "ARRAY": 5061 this = self.expression(exp.Array, expressions=expressions) 5062 else: 5063 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5064 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5065 5066 self._add_comments(this) 5067 return self._parse_bracket(this) 5068 5069 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5070 if self._match(TokenType.COLON): 5071 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5072 return this 5073 5074 def _parse_case(self) -> t.Optional[exp.Expression]: 5075 ifs = [] 5076 default = None 5077 5078 comments = self._prev_comments 5079 expression = self._parse_conjunction() 5080 5081 while self._match(TokenType.WHEN): 5082 this = self._parse_conjunction() 5083 self._match(TokenType.THEN) 5084 then = self._parse_conjunction() 5085 ifs.append(self.expression(exp.If, this=this, true=then)) 5086 5087 if self._match(TokenType.ELSE): 5088 default = self._parse_conjunction() 5089 5090 if not self._match(TokenType.END): 5091 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5092 default = exp.column("interval") 5093 else: 5094 self.raise_error("Expected END after CASE", self._prev) 5095 5096 return self.expression( 5097 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5098 ) 5099 5100 def _parse_if(self) -> t.Optional[exp.Expression]: 5101 if self._match(TokenType.L_PAREN): 5102 args = self._parse_csv(self._parse_conjunction) 5103 this = self.validate_expression(exp.If.from_arg_list(args), args) 5104 self._match_r_paren() 5105 else: 5106 index = self._index - 1 5107 5108 if self.NO_PAREN_IF_COMMANDS and index == 0: 5109 return self._parse_as_command(self._prev) 5110 5111 condition = self._parse_conjunction() 5112 5113 if not condition: 5114 self._retreat(index) 5115 return None 5116 5117 self._match(TokenType.THEN) 5118 true = self._parse_conjunction() 5119 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5120 self._match(TokenType.END) 5121 this = self.expression(exp.If, this=condition, true=true, false=false) 5122 5123 return this 5124 5125 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5126 if not self._match_text_seq("VALUE", "FOR"): 5127 self._retreat(self._index - 1) 5128 return None 5129 5130 return self.expression( 5131 exp.NextValueFor, 5132 this=self._parse_column(), 5133 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5134 ) 5135 5136 def _parse_extract(self) -> exp.Extract: 5137 this = self._parse_function() or self._parse_var() or self._parse_type() 5138 5139 if self._match(TokenType.FROM): 5140 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5141 5142 if not self._match(TokenType.COMMA): 5143 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5144 5145 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5146 5147 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5148 this = self._parse_conjunction() 5149 5150 if not self._match(TokenType.ALIAS): 5151 if self._match(TokenType.COMMA): 5152 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5153 5154 self.raise_error("Expected AS after CAST") 5155 5156 fmt = None 5157 to = self._parse_types() 5158 5159 if self._match(TokenType.FORMAT): 5160 fmt_string = self._parse_string() 5161 fmt = self._parse_at_time_zone(fmt_string) 5162 5163 if not to: 5164 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5165 if to.this in exp.DataType.TEMPORAL_TYPES: 5166 this = self.expression( 5167 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5168 this=this, 5169 format=exp.Literal.string( 5170 format_time( 5171 fmt_string.this if fmt_string else "", 5172 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5173 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5174 ) 5175 ), 5176 ) 5177 5178 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5179 this.set("zone", fmt.args["zone"]) 5180 return this 5181 elif not to: 5182 self.raise_error("Expected TYPE after CAST") 5183 elif isinstance(to, exp.Identifier): 5184 to = exp.DataType.build(to.name, udt=True) 5185 elif to.this == exp.DataType.Type.CHAR: 5186 if self._match(TokenType.CHARACTER_SET): 5187 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5188 5189 return self.expression( 5190 exp.Cast if strict else exp.TryCast, 5191 this=this, 5192 to=to, 5193 format=fmt, 5194 safe=safe, 5195 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5196 ) 5197 5198 def _parse_string_agg(self) -> exp.Expression: 5199 if self._match(TokenType.DISTINCT): 5200 args: t.List[t.Optional[exp.Expression]] = [ 5201 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5202 ] 5203 if self._match(TokenType.COMMA): 5204 args.extend(self._parse_csv(self._parse_conjunction)) 5205 else: 5206 args = self._parse_csv(self._parse_conjunction) # type: ignore 5207 5208 index = self._index 5209 if not self._match(TokenType.R_PAREN) and args: 5210 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5211 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5212 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5213 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5214 5215 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5216 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5217 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5218 if not self._match_text_seq("WITHIN", "GROUP"): 5219 self._retreat(index) 5220 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5221 5222 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5223 order = self._parse_order(this=seq_get(args, 0)) 5224 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5225 5226 def _parse_convert( 5227 self, strict: bool, safe: t.Optional[bool] = None 5228 ) -> t.Optional[exp.Expression]: 5229 this = self._parse_bitwise() 5230 5231 if self._match(TokenType.USING): 5232 to: t.Optional[exp.Expression] = self.expression( 5233 exp.CharacterSet, this=self._parse_var() 5234 ) 5235 elif self._match(TokenType.COMMA): 5236 to = self._parse_types() 5237 else: 5238 to = None 5239 5240 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5241 5242 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5243 """ 5244 There are generally two variants of the DECODE function: 5245 5246 - DECODE(bin, charset) 5247 - DECODE(expression, search, result [, search, result] ... [, default]) 5248 5249 The second variant will always be parsed into a CASE expression. Note that NULL 5250 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5251 instead of relying on pattern matching. 5252 """ 5253 args = self._parse_csv(self._parse_conjunction) 5254 5255 if len(args) < 3: 5256 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5257 5258 expression, *expressions = args 5259 if not expression: 5260 return None 5261 5262 ifs = [] 5263 for search, result in zip(expressions[::2], expressions[1::2]): 5264 if not search or not result: 5265 return None 5266 5267 if isinstance(search, exp.Literal): 5268 ifs.append( 5269 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5270 ) 5271 elif isinstance(search, exp.Null): 5272 ifs.append( 5273 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5274 ) 5275 else: 5276 cond = exp.or_( 5277 exp.EQ(this=expression.copy(), expression=search), 5278 exp.and_( 5279 exp.Is(this=expression.copy(), expression=exp.Null()), 5280 exp.Is(this=search.copy(), expression=exp.Null()), 5281 copy=False, 5282 ), 5283 copy=False, 5284 ) 5285 ifs.append(exp.If(this=cond, true=result)) 5286 5287 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5288 5289 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5290 self._match_text_seq("KEY") 5291 key = self._parse_column() 5292 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5293 self._match_text_seq("VALUE") 5294 value = self._parse_bitwise() 5295 5296 if not key and not value: 5297 return None 5298 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5299 5300 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5301 if not this or not self._match_text_seq("FORMAT", "JSON"): 5302 return this 5303 5304 return self.expression(exp.FormatJson, this=this) 5305 5306 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5307 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5308 for value in values: 5309 if self._match_text_seq(value, "ON", on): 5310 return f"{value} ON {on}" 5311 5312 return None 5313 5314 @t.overload 5315 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5316 5317 @t.overload 5318 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5319 5320 def _parse_json_object(self, agg=False): 5321 star = self._parse_star() 5322 expressions = ( 5323 [star] 5324 if star 5325 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5326 ) 5327 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5328 5329 unique_keys = None 5330 if self._match_text_seq("WITH", "UNIQUE"): 5331 unique_keys = True 5332 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5333 unique_keys = False 5334 5335 self._match_text_seq("KEYS") 5336 5337 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5338 self._parse_type() 5339 ) 5340 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5341 5342 return self.expression( 5343 exp.JSONObjectAgg if agg else exp.JSONObject, 5344 expressions=expressions, 5345 null_handling=null_handling, 5346 unique_keys=unique_keys, 5347 return_type=return_type, 5348 encoding=encoding, 5349 ) 5350 5351 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5352 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5353 if not self._match_text_seq("NESTED"): 5354 this = self._parse_id_var() 5355 kind = self._parse_types(allow_identifiers=False) 5356 nested = None 5357 else: 5358 this = None 5359 kind = None 5360 nested = True 5361 5362 path = self._match_text_seq("PATH") and self._parse_string() 5363 nested_schema = nested and self._parse_json_schema() 5364 5365 return self.expression( 5366 exp.JSONColumnDef, 5367 this=this, 5368 kind=kind, 5369 path=path, 5370 nested_schema=nested_schema, 5371 ) 5372 5373 def _parse_json_schema(self) -> exp.JSONSchema: 5374 self._match_text_seq("COLUMNS") 5375 return self.expression( 5376 exp.JSONSchema, 5377 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5378 ) 5379 5380 def _parse_json_table(self) -> exp.JSONTable: 5381 this = self._parse_format_json(self._parse_bitwise()) 5382 path = self._match(TokenType.COMMA) and self._parse_string() 5383 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5384 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5385 schema = self._parse_json_schema() 5386 5387 return exp.JSONTable( 5388 this=this, 5389 schema=schema, 5390 path=path, 5391 error_handling=error_handling, 5392 empty_handling=empty_handling, 5393 ) 5394 5395 def _parse_match_against(self) -> exp.MatchAgainst: 5396 expressions = self._parse_csv(self._parse_column) 5397 5398 self._match_text_seq(")", "AGAINST", "(") 5399 5400 this = self._parse_string() 5401 5402 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5403 modifier = "IN NATURAL LANGUAGE MODE" 5404 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5405 modifier = f"{modifier} WITH QUERY EXPANSION" 5406 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5407 modifier = "IN BOOLEAN MODE" 5408 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5409 modifier = "WITH QUERY EXPANSION" 5410 else: 5411 modifier = None 5412 5413 return self.expression( 5414 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5415 ) 5416 5417 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5418 def _parse_open_json(self) -> exp.OpenJSON: 5419 this = self._parse_bitwise() 5420 path = self._match(TokenType.COMMA) and self._parse_string() 5421 5422 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5423 this = self._parse_field(any_token=True) 5424 kind = self._parse_types() 5425 path = self._parse_string() 5426 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5427 5428 return self.expression( 5429 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5430 ) 5431 5432 expressions = None 5433 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5434 self._match_l_paren() 5435 expressions = self._parse_csv(_parse_open_json_column_def) 5436 5437 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5438 5439 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5440 args = self._parse_csv(self._parse_bitwise) 5441 5442 if self._match(TokenType.IN): 5443 return self.expression( 5444 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5445 ) 5446 5447 if haystack_first: 5448 haystack = seq_get(args, 0) 5449 needle = seq_get(args, 1) 5450 else: 5451 needle = seq_get(args, 0) 5452 haystack = seq_get(args, 1) 5453 5454 return self.expression( 5455 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5456 ) 5457 5458 def _parse_predict(self) -> exp.Predict: 5459 self._match_text_seq("MODEL") 5460 this = self._parse_table() 5461 5462 self._match(TokenType.COMMA) 5463 self._match_text_seq("TABLE") 5464 5465 return self.expression( 5466 exp.Predict, 5467 this=this, 5468 expression=self._parse_table(), 5469 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5470 ) 5471 5472 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5473 args = self._parse_csv(self._parse_table) 5474 return exp.JoinHint(this=func_name.upper(), expressions=args) 5475 5476 def _parse_substring(self) -> exp.Substring: 5477 # Postgres supports the form: substring(string [from int] [for int]) 5478 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5479 5480 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5481 5482 if self._match(TokenType.FROM): 5483 args.append(self._parse_bitwise()) 5484 if self._match(TokenType.FOR): 5485 if len(args) == 1: 5486 args.append(exp.Literal.number(1)) 5487 args.append(self._parse_bitwise()) 5488 5489 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5490 5491 def _parse_trim(self) -> exp.Trim: 5492 # https://www.w3resource.com/sql/character-functions/trim.php 5493 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5494 5495 position = None 5496 collation = None 5497 expression = None 5498 5499 if self._match_texts(self.TRIM_TYPES): 5500 position = self._prev.text.upper() 5501 5502 this = self._parse_bitwise() 5503 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5504 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5505 expression = self._parse_bitwise() 5506 5507 if invert_order: 5508 this, expression = expression, this 5509 5510 if self._match(TokenType.COLLATE): 5511 collation = self._parse_bitwise() 5512 5513 return self.expression( 5514 exp.Trim, this=this, position=position, expression=expression, collation=collation 5515 ) 5516 5517 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5518 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5519 5520 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5521 return self._parse_window(self._parse_id_var(), alias=True) 5522 5523 def _parse_respect_or_ignore_nulls( 5524 self, this: t.Optional[exp.Expression] 5525 ) -> t.Optional[exp.Expression]: 5526 if self._match_text_seq("IGNORE", "NULLS"): 5527 return self.expression(exp.IgnoreNulls, this=this) 5528 if self._match_text_seq("RESPECT", "NULLS"): 5529 return self.expression(exp.RespectNulls, this=this) 5530 return this 5531 5532 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5533 if self._match(TokenType.HAVING): 5534 self._match_texts(("MAX", "MIN")) 5535 max = self._prev.text.upper() != "MIN" 5536 return self.expression( 5537 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5538 ) 5539 5540 return this 5541 5542 def _parse_window( 5543 self, this: t.Optional[exp.Expression], alias: bool = False 5544 ) -> t.Optional[exp.Expression]: 5545 func = this 5546 comments = func.comments if isinstance(func, exp.Expression) else None 5547 5548 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5549 self._match(TokenType.WHERE) 5550 this = self.expression( 5551 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5552 ) 5553 self._match_r_paren() 5554 5555 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5556 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5557 if self._match_text_seq("WITHIN", "GROUP"): 5558 order = self._parse_wrapped(self._parse_order) 5559 this = self.expression(exp.WithinGroup, this=this, expression=order) 5560 5561 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5562 # Some dialects choose to implement and some do not. 5563 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5564 5565 # There is some code above in _parse_lambda that handles 5566 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5567 5568 # The below changes handle 5569 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5570 5571 # Oracle allows both formats 5572 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5573 # and Snowflake chose to do the same for familiarity 5574 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5575 if isinstance(this, exp.AggFunc): 5576 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5577 5578 if ignore_respect and ignore_respect is not this: 5579 ignore_respect.replace(ignore_respect.this) 5580 this = self.expression(ignore_respect.__class__, this=this) 5581 5582 this = self._parse_respect_or_ignore_nulls(this) 5583 5584 # bigquery select from window x AS (partition by ...) 5585 if alias: 5586 over = None 5587 self._match(TokenType.ALIAS) 5588 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5589 return this 5590 else: 5591 over = self._prev.text.upper() 5592 5593 if comments and isinstance(func, exp.Expression): 5594 func.pop_comments() 5595 5596 if not self._match(TokenType.L_PAREN): 5597 return self.expression( 5598 exp.Window, 5599 comments=comments, 5600 this=this, 5601 alias=self._parse_id_var(False), 5602 over=over, 5603 ) 5604 5605 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5606 5607 first = self._match(TokenType.FIRST) 5608 if self._match_text_seq("LAST"): 5609 first = False 5610 5611 partition, order = self._parse_partition_and_order() 5612 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5613 5614 if kind: 5615 self._match(TokenType.BETWEEN) 5616 start = self._parse_window_spec() 5617 self._match(TokenType.AND) 5618 end = self._parse_window_spec() 5619 5620 spec = self.expression( 5621 exp.WindowSpec, 5622 kind=kind, 5623 start=start["value"], 5624 start_side=start["side"], 5625 end=end["value"], 5626 end_side=end["side"], 5627 ) 5628 else: 5629 spec = None 5630 5631 self._match_r_paren() 5632 5633 window = self.expression( 5634 exp.Window, 5635 comments=comments, 5636 this=this, 5637 partition_by=partition, 5638 order=order, 5639 spec=spec, 5640 alias=window_alias, 5641 over=over, 5642 first=first, 5643 ) 5644 5645 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5646 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5647 return self._parse_window(window, alias=alias) 5648 5649 return window 5650 5651 def _parse_partition_and_order( 5652 self, 5653 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5654 return self._parse_partition_by(), self._parse_order() 5655 5656 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5657 self._match(TokenType.BETWEEN) 5658 5659 return { 5660 "value": ( 5661 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5662 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5663 or self._parse_bitwise() 5664 ), 5665 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5666 } 5667 5668 def _parse_alias( 5669 self, this: t.Optional[exp.Expression], explicit: bool = False 5670 ) -> t.Optional[exp.Expression]: 5671 any_token = self._match(TokenType.ALIAS) 5672 comments = self._prev_comments or [] 5673 5674 if explicit and not any_token: 5675 return this 5676 5677 if self._match(TokenType.L_PAREN): 5678 aliases = self.expression( 5679 exp.Aliases, 5680 comments=comments, 5681 this=this, 5682 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5683 ) 5684 self._match_r_paren(aliases) 5685 return aliases 5686 5687 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5688 self.STRING_ALIASES and self._parse_string_as_identifier() 5689 ) 5690 5691 if alias: 5692 comments.extend(alias.pop_comments()) 5693 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5694 column = this.this 5695 5696 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5697 if not this.comments and column and column.comments: 5698 this.comments = column.pop_comments() 5699 5700 return this 5701 5702 def _parse_id_var( 5703 self, 5704 any_token: bool = True, 5705 tokens: t.Optional[t.Collection[TokenType]] = None, 5706 ) -> t.Optional[exp.Expression]: 5707 expression = self._parse_identifier() 5708 if not expression and ( 5709 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5710 ): 5711 quoted = self._prev.token_type == TokenType.STRING 5712 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5713 5714 return expression 5715 5716 def _parse_string(self) -> t.Optional[exp.Expression]: 5717 if self._match_set(self.STRING_PARSERS): 5718 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5719 return self._parse_placeholder() 5720 5721 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5722 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5723 5724 def _parse_number(self) -> t.Optional[exp.Expression]: 5725 if self._match_set(self.NUMERIC_PARSERS): 5726 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5727 return self._parse_placeholder() 5728 5729 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5730 if self._match(TokenType.IDENTIFIER): 5731 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5732 return self._parse_placeholder() 5733 5734 def _parse_var( 5735 self, 5736 any_token: bool = False, 5737 tokens: t.Optional[t.Collection[TokenType]] = None, 5738 upper: bool = False, 5739 ) -> t.Optional[exp.Expression]: 5740 if ( 5741 (any_token and self._advance_any()) 5742 or self._match(TokenType.VAR) 5743 or (self._match_set(tokens) if tokens else False) 5744 ): 5745 return self.expression( 5746 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5747 ) 5748 return self._parse_placeholder() 5749 5750 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5751 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5752 self._advance() 5753 return self._prev 5754 return None 5755 5756 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5757 return self._parse_var() or self._parse_string() 5758 5759 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5760 return self._parse_primary() or self._parse_var(any_token=True) 5761 5762 def _parse_null(self) -> t.Optional[exp.Expression]: 5763 if self._match_set(self.NULL_TOKENS): 5764 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5765 return self._parse_placeholder() 5766 5767 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5768 if self._match(TokenType.TRUE): 5769 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5770 if self._match(TokenType.FALSE): 5771 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5772 return self._parse_placeholder() 5773 5774 def _parse_star(self) -> t.Optional[exp.Expression]: 5775 if self._match(TokenType.STAR): 5776 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5777 return self._parse_placeholder() 5778 5779 def _parse_parameter(self) -> exp.Parameter: 5780 this = self._parse_identifier() or self._parse_primary_or_var() 5781 return self.expression(exp.Parameter, this=this) 5782 5783 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5784 if self._match_set(self.PLACEHOLDER_PARSERS): 5785 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5786 if placeholder: 5787 return placeholder 5788 self._advance(-1) 5789 return None 5790 5791 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5792 if not self._match_texts(keywords): 5793 return None 5794 if self._match(TokenType.L_PAREN, advance=False): 5795 return self._parse_wrapped_csv(self._parse_expression) 5796 5797 expression = self._parse_expression() 5798 return [expression] if expression else None 5799 5800 def _parse_csv( 5801 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5802 ) -> t.List[exp.Expression]: 5803 parse_result = parse_method() 5804 items = [parse_result] if parse_result is not None else [] 5805 5806 while self._match(sep): 5807 self._add_comments(parse_result) 5808 parse_result = parse_method() 5809 if parse_result is not None: 5810 items.append(parse_result) 5811 5812 return items 5813 5814 def _parse_tokens( 5815 self, parse_method: t.Callable, expressions: t.Dict 5816 ) -> t.Optional[exp.Expression]: 5817 this = parse_method() 5818 5819 while self._match_set(expressions): 5820 this = self.expression( 5821 expressions[self._prev.token_type], 5822 this=this, 5823 comments=self._prev_comments, 5824 expression=parse_method(), 5825 ) 5826 5827 return this 5828 5829 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5830 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5831 5832 def _parse_wrapped_csv( 5833 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5834 ) -> t.List[exp.Expression]: 5835 return self._parse_wrapped( 5836 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5837 ) 5838 5839 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5840 wrapped = self._match(TokenType.L_PAREN) 5841 if not wrapped and not optional: 5842 self.raise_error("Expecting (") 5843 parse_result = parse_method() 5844 if wrapped: 5845 self._match_r_paren() 5846 return parse_result 5847 5848 def _parse_expressions(self) -> t.List[exp.Expression]: 5849 return self._parse_csv(self._parse_expression) 5850 5851 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5852 return self._parse_select() or self._parse_set_operations( 5853 self._parse_expression() if alias else self._parse_conjunction() 5854 ) 5855 5856 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5857 return self._parse_query_modifiers( 5858 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5859 ) 5860 5861 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5862 this = None 5863 if self._match_texts(self.TRANSACTION_KIND): 5864 this = self._prev.text 5865 5866 self._match_texts(("TRANSACTION", "WORK")) 5867 5868 modes = [] 5869 while True: 5870 mode = [] 5871 while self._match(TokenType.VAR): 5872 mode.append(self._prev.text) 5873 5874 if mode: 5875 modes.append(" ".join(mode)) 5876 if not self._match(TokenType.COMMA): 5877 break 5878 5879 return self.expression(exp.Transaction, this=this, modes=modes) 5880 5881 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5882 chain = None 5883 savepoint = None 5884 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5885 5886 self._match_texts(("TRANSACTION", "WORK")) 5887 5888 if self._match_text_seq("TO"): 5889 self._match_text_seq("SAVEPOINT") 5890 savepoint = self._parse_id_var() 5891 5892 if self._match(TokenType.AND): 5893 chain = not self._match_text_seq("NO") 5894 self._match_text_seq("CHAIN") 5895 5896 if is_rollback: 5897 return self.expression(exp.Rollback, savepoint=savepoint) 5898 5899 return self.expression(exp.Commit, chain=chain) 5900 5901 def _parse_refresh(self) -> exp.Refresh: 5902 self._match(TokenType.TABLE) 5903 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5904 5905 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5906 if not self._match_text_seq("ADD"): 5907 return None 5908 5909 self._match(TokenType.COLUMN) 5910 exists_column = self._parse_exists(not_=True) 5911 expression = self._parse_field_def() 5912 5913 if expression: 5914 expression.set("exists", exists_column) 5915 5916 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5917 if self._match_texts(("FIRST", "AFTER")): 5918 position = self._prev.text 5919 column_position = self.expression( 5920 exp.ColumnPosition, this=self._parse_column(), position=position 5921 ) 5922 expression.set("position", column_position) 5923 5924 return expression 5925 5926 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5927 drop = self._match(TokenType.DROP) and self._parse_drop() 5928 if drop and not isinstance(drop, exp.Command): 5929 drop.set("kind", drop.args.get("kind", "COLUMN")) 5930 return drop 5931 5932 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5933 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5934 return self.expression( 5935 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5936 ) 5937 5938 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5939 index = self._index - 1 5940 5941 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5942 return self._parse_csv( 5943 lambda: self.expression( 5944 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5945 ) 5946 ) 5947 5948 self._retreat(index) 5949 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5950 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5951 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5952 5953 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5954 if self._match_texts(self.ALTER_ALTER_PARSERS): 5955 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5956 5957 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5958 # keyword after ALTER we default to parsing this statement 5959 self._match(TokenType.COLUMN) 5960 column = self._parse_field(any_token=True) 5961 5962 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5963 return self.expression(exp.AlterColumn, this=column, drop=True) 5964 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5965 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5966 if self._match(TokenType.COMMENT): 5967 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5968 5969 self._match_text_seq("SET", "DATA") 5970 self._match_text_seq("TYPE") 5971 return self.expression( 5972 exp.AlterColumn, 5973 this=column, 5974 dtype=self._parse_types(), 5975 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5976 using=self._match(TokenType.USING) and self._parse_conjunction(), 5977 ) 5978 5979 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5980 if self._match_texts(("ALL", "EVEN", "AUTO")): 5981 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5982 5983 self._match_text_seq("KEY", "DISTKEY") 5984 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5985 5986 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5987 if compound: 5988 self._match_text_seq("SORTKEY") 5989 5990 if self._match(TokenType.L_PAREN, advance=False): 5991 return self.expression( 5992 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5993 ) 5994 5995 self._match_texts(("AUTO", "NONE")) 5996 return self.expression( 5997 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5998 ) 5999 6000 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6001 index = self._index - 1 6002 6003 partition_exists = self._parse_exists() 6004 if self._match(TokenType.PARTITION, advance=False): 6005 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6006 6007 self._retreat(index) 6008 return self._parse_csv(self._parse_drop_column) 6009 6010 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6011 if self._match(TokenType.COLUMN): 6012 exists = self._parse_exists() 6013 old_column = self._parse_column() 6014 to = self._match_text_seq("TO") 6015 new_column = self._parse_column() 6016 6017 if old_column is None or to is None or new_column is None: 6018 return None 6019 6020 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6021 6022 self._match_text_seq("TO") 6023 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6024 6025 def _parse_alter_table_set(self) -> exp.AlterSet: 6026 alter_set = self.expression(exp.AlterSet) 6027 6028 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6029 "TABLE", "PROPERTIES" 6030 ): 6031 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6032 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6033 alter_set.set("expressions", [self._parse_conjunction()]) 6034 elif self._match_texts(("LOGGED", "UNLOGGED")): 6035 alter_set.set("option", exp.var(self._prev.text.upper())) 6036 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6037 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6038 elif self._match_text_seq("LOCATION"): 6039 alter_set.set("location", self._parse_field()) 6040 elif self._match_text_seq("ACCESS", "METHOD"): 6041 alter_set.set("access_method", self._parse_field()) 6042 elif self._match_text_seq("TABLESPACE"): 6043 alter_set.set("tablespace", self._parse_field()) 6044 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6045 alter_set.set("file_format", [self._parse_field()]) 6046 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6047 alter_set.set("file_format", self._parse_wrapped_options()) 6048 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6049 alter_set.set("copy_options", self._parse_wrapped_options()) 6050 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6051 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6052 else: 6053 if self._match_text_seq("SERDE"): 6054 alter_set.set("serde", self._parse_field()) 6055 6056 alter_set.set("expressions", [self._parse_properties()]) 6057 6058 return alter_set 6059 6060 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6061 start = self._prev 6062 6063 if not self._match(TokenType.TABLE): 6064 return self._parse_as_command(start) 6065 6066 exists = self._parse_exists() 6067 only = self._match_text_seq("ONLY") 6068 this = self._parse_table(schema=True) 6069 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6070 6071 if self._next: 6072 self._advance() 6073 6074 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6075 if parser: 6076 actions = ensure_list(parser(self)) 6077 options = self._parse_csv(self._parse_property) 6078 6079 if not self._curr and actions: 6080 return self.expression( 6081 exp.AlterTable, 6082 this=this, 6083 exists=exists, 6084 actions=actions, 6085 only=only, 6086 options=options, 6087 cluster=cluster, 6088 ) 6089 6090 return self._parse_as_command(start) 6091 6092 def _parse_merge(self) -> exp.Merge: 6093 self._match(TokenType.INTO) 6094 target = self._parse_table() 6095 6096 if target and self._match(TokenType.ALIAS, advance=False): 6097 target.set("alias", self._parse_table_alias()) 6098 6099 self._match(TokenType.USING) 6100 using = self._parse_table() 6101 6102 self._match(TokenType.ON) 6103 on = self._parse_conjunction() 6104 6105 return self.expression( 6106 exp.Merge, 6107 this=target, 6108 using=using, 6109 on=on, 6110 expressions=self._parse_when_matched(), 6111 ) 6112 6113 def _parse_when_matched(self) -> t.List[exp.When]: 6114 whens = [] 6115 6116 while self._match(TokenType.WHEN): 6117 matched = not self._match(TokenType.NOT) 6118 self._match_text_seq("MATCHED") 6119 source = ( 6120 False 6121 if self._match_text_seq("BY", "TARGET") 6122 else self._match_text_seq("BY", "SOURCE") 6123 ) 6124 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6125 6126 self._match(TokenType.THEN) 6127 6128 if self._match(TokenType.INSERT): 6129 _this = self._parse_star() 6130 if _this: 6131 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6132 else: 6133 then = self.expression( 6134 exp.Insert, 6135 this=self._parse_value(), 6136 expression=self._match_text_seq("VALUES") and self._parse_value(), 6137 ) 6138 elif self._match(TokenType.UPDATE): 6139 expressions = self._parse_star() 6140 if expressions: 6141 then = self.expression(exp.Update, expressions=expressions) 6142 else: 6143 then = self.expression( 6144 exp.Update, 6145 expressions=self._match(TokenType.SET) 6146 and self._parse_csv(self._parse_equality), 6147 ) 6148 elif self._match(TokenType.DELETE): 6149 then = self.expression(exp.Var, this=self._prev.text) 6150 else: 6151 then = None 6152 6153 whens.append( 6154 self.expression( 6155 exp.When, 6156 matched=matched, 6157 source=source, 6158 condition=condition, 6159 then=then, 6160 ) 6161 ) 6162 return whens 6163 6164 def _parse_show(self) -> t.Optional[exp.Expression]: 6165 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6166 if parser: 6167 return parser(self) 6168 return self._parse_as_command(self._prev) 6169 6170 def _parse_set_item_assignment( 6171 self, kind: t.Optional[str] = None 6172 ) -> t.Optional[exp.Expression]: 6173 index = self._index 6174 6175 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6176 return self._parse_set_transaction(global_=kind == "GLOBAL") 6177 6178 left = self._parse_primary() or self._parse_column() 6179 assignment_delimiter = self._match_texts(("=", "TO")) 6180 6181 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6182 self._retreat(index) 6183 return None 6184 6185 right = self._parse_statement() or self._parse_id_var() 6186 this = self.expression(exp.EQ, this=left, expression=right) 6187 6188 return self.expression(exp.SetItem, this=this, kind=kind) 6189 6190 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6191 self._match_text_seq("TRANSACTION") 6192 characteristics = self._parse_csv( 6193 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6194 ) 6195 return self.expression( 6196 exp.SetItem, 6197 expressions=characteristics, 6198 kind="TRANSACTION", 6199 **{"global": global_}, # type: ignore 6200 ) 6201 6202 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6203 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6204 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6205 6206 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6207 index = self._index 6208 set_ = self.expression( 6209 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6210 ) 6211 6212 if self._curr: 6213 self._retreat(index) 6214 return self._parse_as_command(self._prev) 6215 6216 return set_ 6217 6218 def _parse_var_from_options( 6219 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6220 ) -> t.Optional[exp.Var]: 6221 start = self._curr 6222 if not start: 6223 return None 6224 6225 option = start.text.upper() 6226 continuations = options.get(option) 6227 6228 index = self._index 6229 self._advance() 6230 for keywords in continuations or []: 6231 if isinstance(keywords, str): 6232 keywords = (keywords,) 6233 6234 if self._match_text_seq(*keywords): 6235 option = f"{option} {' '.join(keywords)}" 6236 break 6237 else: 6238 if continuations or continuations is None: 6239 if raise_unmatched: 6240 self.raise_error(f"Unknown option {option}") 6241 6242 self._retreat(index) 6243 return None 6244 6245 return exp.var(option) 6246 6247 def _parse_as_command(self, start: Token) -> exp.Command: 6248 while self._curr: 6249 self._advance() 6250 text = self._find_sql(start, self._prev) 6251 size = len(start.text) 6252 self._warn_unsupported() 6253 return exp.Command(this=text[:size], expression=text[size:]) 6254 6255 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6256 settings = [] 6257 6258 self._match_l_paren() 6259 kind = self._parse_id_var() 6260 6261 if self._match(TokenType.L_PAREN): 6262 while True: 6263 key = self._parse_id_var() 6264 value = self._parse_primary() 6265 6266 if not key and value is None: 6267 break 6268 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6269 self._match(TokenType.R_PAREN) 6270 6271 self._match_r_paren() 6272 6273 return self.expression( 6274 exp.DictProperty, 6275 this=this, 6276 kind=kind.this if kind else None, 6277 settings=settings, 6278 ) 6279 6280 def _parse_dict_range(self, this: str) -> exp.DictRange: 6281 self._match_l_paren() 6282 has_min = self._match_text_seq("MIN") 6283 if has_min: 6284 min = self._parse_var() or self._parse_primary() 6285 self._match_text_seq("MAX") 6286 max = self._parse_var() or self._parse_primary() 6287 else: 6288 max = self._parse_var() or self._parse_primary() 6289 min = exp.Literal.number(0) 6290 self._match_r_paren() 6291 return self.expression(exp.DictRange, this=this, min=min, max=max) 6292 6293 def _parse_comprehension( 6294 self, this: t.Optional[exp.Expression] 6295 ) -> t.Optional[exp.Comprehension]: 6296 index = self._index 6297 expression = self._parse_column() 6298 if not self._match(TokenType.IN): 6299 self._retreat(index - 1) 6300 return None 6301 iterator = self._parse_column() 6302 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6303 return self.expression( 6304 exp.Comprehension, 6305 this=this, 6306 expression=expression, 6307 iterator=iterator, 6308 condition=condition, 6309 ) 6310 6311 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6312 if self._match(TokenType.HEREDOC_STRING): 6313 return self.expression(exp.Heredoc, this=self._prev.text) 6314 6315 if not self._match_text_seq("$"): 6316 return None 6317 6318 tags = ["$"] 6319 tag_text = None 6320 6321 if self._is_connected(): 6322 self._advance() 6323 tags.append(self._prev.text.upper()) 6324 else: 6325 self.raise_error("No closing $ found") 6326 6327 if tags[-1] != "$": 6328 if self._is_connected() and self._match_text_seq("$"): 6329 tag_text = tags[-1] 6330 tags.append("$") 6331 else: 6332 self.raise_error("No closing $ found") 6333 6334 heredoc_start = self._curr 6335 6336 while self._curr: 6337 if self._match_text_seq(*tags, advance=False): 6338 this = self._find_sql(heredoc_start, self._prev) 6339 self._advance(len(tags)) 6340 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6341 6342 self._advance() 6343 6344 self.raise_error(f"No closing {''.join(tags)} found") 6345 return None 6346 6347 def _find_parser( 6348 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6349 ) -> t.Optional[t.Callable]: 6350 if not self._curr: 6351 return None 6352 6353 index = self._index 6354 this = [] 6355 while True: 6356 # The current token might be multiple words 6357 curr = self._curr.text.upper() 6358 key = curr.split(" ") 6359 this.append(curr) 6360 6361 self._advance() 6362 result, trie = in_trie(trie, key) 6363 if result == TrieResult.FAILED: 6364 break 6365 6366 if result == TrieResult.EXISTS: 6367 subparser = parsers[" ".join(this)] 6368 return subparser 6369 6370 self._retreat(index) 6371 return None 6372 6373 def _match(self, token_type, advance=True, expression=None): 6374 if not self._curr: 6375 return None 6376 6377 if self._curr.token_type == token_type: 6378 if advance: 6379 self._advance() 6380 self._add_comments(expression) 6381 return True 6382 6383 return None 6384 6385 def _match_set(self, types, advance=True): 6386 if not self._curr: 6387 return None 6388 6389 if self._curr.token_type in types: 6390 if advance: 6391 self._advance() 6392 return True 6393 6394 return None 6395 6396 def _match_pair(self, token_type_a, token_type_b, advance=True): 6397 if not self._curr or not self._next: 6398 return None 6399 6400 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6401 if advance: 6402 self._advance(2) 6403 return True 6404 6405 return None 6406 6407 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6408 if not self._match(TokenType.L_PAREN, expression=expression): 6409 self.raise_error("Expecting (") 6410 6411 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6412 if not self._match(TokenType.R_PAREN, expression=expression): 6413 self.raise_error("Expecting )") 6414 6415 def _match_texts(self, texts, advance=True): 6416 if self._curr and self._curr.text.upper() in texts: 6417 if advance: 6418 self._advance() 6419 return True 6420 return None 6421 6422 def _match_text_seq(self, *texts, advance=True): 6423 index = self._index 6424 for text in texts: 6425 if self._curr and self._curr.text.upper() == text: 6426 self._advance() 6427 else: 6428 self._retreat(index) 6429 return None 6430 6431 if not advance: 6432 self._retreat(index) 6433 6434 return True 6435 6436 def _replace_lambda( 6437 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6438 ) -> t.Optional[exp.Expression]: 6439 if not node: 6440 return node 6441 6442 for column in node.find_all(exp.Column): 6443 if column.parts[0].name in lambda_variables: 6444 dot_or_id = column.to_dot() if column.table else column.this 6445 parent = column.parent 6446 6447 while isinstance(parent, exp.Dot): 6448 if not isinstance(parent.parent, exp.Dot): 6449 parent.replace(dot_or_id) 6450 break 6451 parent = parent.parent 6452 else: 6453 if column is node: 6454 node = dot_or_id 6455 else: 6456 column.replace(dot_or_id) 6457 return node 6458 6459 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6460 start = self._prev 6461 6462 # Not to be confused with TRUNCATE(number, decimals) function call 6463 if self._match(TokenType.L_PAREN): 6464 self._retreat(self._index - 2) 6465 return self._parse_function() 6466 6467 # Clickhouse supports TRUNCATE DATABASE as well 6468 is_database = self._match(TokenType.DATABASE) 6469 6470 self._match(TokenType.TABLE) 6471 6472 exists = self._parse_exists(not_=False) 6473 6474 expressions = self._parse_csv( 6475 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6476 ) 6477 6478 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6479 6480 if self._match_text_seq("RESTART", "IDENTITY"): 6481 identity = "RESTART" 6482 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6483 identity = "CONTINUE" 6484 else: 6485 identity = None 6486 6487 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6488 option = self._prev.text 6489 else: 6490 option = None 6491 6492 partition = self._parse_partition() 6493 6494 # Fallback case 6495 if self._curr: 6496 return self._parse_as_command(start) 6497 6498 return self.expression( 6499 exp.TruncateTable, 6500 expressions=expressions, 6501 is_database=is_database, 6502 exists=exists, 6503 cluster=cluster, 6504 identity=identity, 6505 option=option, 6506 partition=partition, 6507 ) 6508 6509 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6510 this = self._parse_ordered(self._parse_opclass) 6511 6512 if not self._match(TokenType.WITH): 6513 return this 6514 6515 op = self._parse_var(any_token=True) 6516 6517 return self.expression(exp.WithOperator, this=this, op=op) 6518 6519 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6520 opts = [] 6521 self._match(TokenType.EQ) 6522 self._match(TokenType.L_PAREN) 6523 while self._curr and not self._match(TokenType.R_PAREN): 6524 opts.append(self._parse_conjunction()) 6525 self._match(TokenType.COMMA) 6526 return opts 6527 6528 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6529 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6530 6531 options = [] 6532 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6533 option = self._parse_unquoted_field() 6534 value = None 6535 6536 # Some options are defined as functions with the values as params 6537 if not isinstance(option, exp.Func): 6538 prev = self._prev.text.upper() 6539 # Different dialects might separate options and values by white space, "=" and "AS" 6540 self._match(TokenType.EQ) 6541 self._match(TokenType.ALIAS) 6542 6543 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6544 # Snowflake FILE_FORMAT case 6545 value = self._parse_wrapped_options() 6546 else: 6547 value = self._parse_unquoted_field() 6548 6549 param = self.expression(exp.CopyParameter, this=option, expression=value) 6550 options.append(param) 6551 6552 if sep: 6553 self._match(sep) 6554 6555 return options 6556 6557 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6558 expr = self.expression(exp.Credentials) 6559 6560 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6561 expr.set("storage", self._parse_conjunction()) 6562 if self._match_text_seq("CREDENTIALS"): 6563 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6564 creds = ( 6565 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6566 ) 6567 expr.set("credentials", creds) 6568 if self._match_text_seq("ENCRYPTION"): 6569 expr.set("encryption", self._parse_wrapped_options()) 6570 if self._match_text_seq("IAM_ROLE"): 6571 expr.set("iam_role", self._parse_field()) 6572 if self._match_text_seq("REGION"): 6573 expr.set("region", self._parse_field()) 6574 6575 return expr 6576 6577 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6578 return self._parse_field() 6579 6580 def _parse_copy(self) -> exp.Copy | exp.Command: 6581 start = self._prev 6582 6583 self._match(TokenType.INTO) 6584 6585 this = ( 6586 self._parse_conjunction() 6587 if self._match(TokenType.L_PAREN, advance=False) 6588 else self._parse_table(schema=True) 6589 ) 6590 6591 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6592 6593 files = self._parse_csv(self._parse_file_location) 6594 credentials = self._parse_credentials() 6595 6596 self._match_text_seq("WITH") 6597 6598 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6599 6600 # Fallback case 6601 if self._curr: 6602 return self._parse_as_command(start) 6603 6604 return self.expression( 6605 exp.Copy, 6606 this=this, 6607 kind=kind, 6608 credentials=credentials, 6609 files=files, 6610 params=params, 6611 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1196 def __init__( 1197 self, 1198 error_level: t.Optional[ErrorLevel] = None, 1199 error_message_context: int = 100, 1200 max_errors: int = 3, 1201 dialect: DialectType = None, 1202 ): 1203 from sqlglot.dialects import Dialect 1204 1205 self.error_level = error_level or ErrorLevel.IMMEDIATE 1206 self.error_message_context = error_message_context 1207 self.max_errors = max_errors 1208 self.dialect = Dialect.get_or_raise(dialect) 1209 self.reset()
1221 def parse( 1222 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1223 ) -> t.List[t.Optional[exp.Expression]]: 1224 """ 1225 Parses a list of tokens and returns a list of syntax trees, one tree 1226 per parsed SQL statement. 1227 1228 Args: 1229 raw_tokens: The list of tokens. 1230 sql: The original SQL string, used to produce helpful debug messages. 1231 1232 Returns: 1233 The list of the produced syntax trees. 1234 """ 1235 return self._parse( 1236 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1237 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1239 def parse_into( 1240 self, 1241 expression_types: exp.IntoType, 1242 raw_tokens: t.List[Token], 1243 sql: t.Optional[str] = None, 1244 ) -> t.List[t.Optional[exp.Expression]]: 1245 """ 1246 Parses a list of tokens into a given Expression type. If a collection of Expression 1247 types is given instead, this method will try to parse the token list into each one 1248 of them, stopping at the first for which the parsing succeeds. 1249 1250 Args: 1251 expression_types: The expression type(s) to try and parse the token list into. 1252 raw_tokens: The list of tokens. 1253 sql: The original SQL string, used to produce helpful debug messages. 1254 1255 Returns: 1256 The target Expression. 1257 """ 1258 errors = [] 1259 for expression_type in ensure_list(expression_types): 1260 parser = self.EXPRESSION_PARSERS.get(expression_type) 1261 if not parser: 1262 raise TypeError(f"No parser registered for {expression_type}") 1263 1264 try: 1265 return self._parse(parser, raw_tokens, sql) 1266 except ParseError as e: 1267 e.errors[0]["into_expression"] = expression_type 1268 errors.append(e) 1269 1270 raise ParseError( 1271 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1272 errors=merge_errors(errors), 1273 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1313 def check_errors(self) -> None: 1314 """Logs or raises any found errors, depending on the chosen error level setting.""" 1315 if self.error_level == ErrorLevel.WARN: 1316 for error in self.errors: 1317 logger.error(str(error)) 1318 elif self.error_level == ErrorLevel.RAISE and self.errors: 1319 raise ParseError( 1320 concat_messages(self.errors, self.max_errors), 1321 errors=merge_errors(self.errors), 1322 )
Logs or raises any found errors, depending on the chosen error level setting.
1324 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1325 """ 1326 Appends an error in the list of recorded errors or raises it, depending on the chosen 1327 error level setting. 1328 """ 1329 token = token or self._curr or self._prev or Token.string("") 1330 start = token.start 1331 end = token.end + 1 1332 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1333 highlight = self.sql[start:end] 1334 end_context = self.sql[end : end + self.error_message_context] 1335 1336 error = ParseError.new( 1337 f"{message}. Line {token.line}, Col: {token.col}.\n" 1338 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1339 description=message, 1340 line=token.line, 1341 col=token.col, 1342 start_context=start_context, 1343 highlight=highlight, 1344 end_context=end_context, 1345 ) 1346 1347 if self.error_level == ErrorLevel.IMMEDIATE: 1348 raise error 1349 1350 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1352 def expression( 1353 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1354 ) -> E: 1355 """ 1356 Creates a new, validated Expression. 1357 1358 Args: 1359 exp_class: The expression class to instantiate. 1360 comments: An optional list of comments to attach to the expression. 1361 kwargs: The arguments to set for the expression along with their respective values. 1362 1363 Returns: 1364 The target expression. 1365 """ 1366 instance = exp_class(**kwargs) 1367 instance.add_comments(comments) if comments else self._add_comments(instance) 1368 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1375 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1376 """ 1377 Validates an Expression, making sure that all its mandatory arguments are set. 1378 1379 Args: 1380 expression: The expression to validate. 1381 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1382 1383 Returns: 1384 The validated expression. 1385 """ 1386 if self.error_level != ErrorLevel.IGNORE: 1387 for error_message in expression.error_messages(args): 1388 self.raise_error(error_message) 1389 1390 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.