sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.YEAR, 191 TokenType.UNIQUEIDENTIFIER, 192 TokenType.USERDEFINED, 193 TokenType.MONEY, 194 TokenType.SMALLMONEY, 195 TokenType.ROWVERSION, 196 TokenType.IMAGE, 197 TokenType.VARIANT, 198 TokenType.OBJECT, 199 TokenType.INET, 200 TokenType.IPADDRESS, 201 TokenType.IPPREFIX, 202 TokenType.UNKNOWN, 203 TokenType.NULL, 204 *ENUM_TYPE_TOKENS, 205 *NESTED_TYPE_TOKENS, 206 } 207 208 SUBQUERY_PREDICATES = { 209 TokenType.ANY: exp.Any, 210 TokenType.ALL: exp.All, 211 TokenType.EXISTS: exp.Exists, 212 TokenType.SOME: exp.Any, 213 } 214 215 RESERVED_KEYWORDS = { 216 *Tokenizer.SINGLE_TOKENS.values(), 217 TokenType.SELECT, 218 } 219 220 DB_CREATABLES = { 221 TokenType.DATABASE, 222 TokenType.SCHEMA, 223 TokenType.TABLE, 224 TokenType.VIEW, 225 TokenType.DICTIONARY, 226 } 227 228 CREATABLES = { 229 TokenType.COLUMN, 230 TokenType.FUNCTION, 231 TokenType.INDEX, 232 TokenType.PROCEDURE, 233 *DB_CREATABLES, 234 } 235 236 # Tokens that can represent identifiers 237 ID_VAR_TOKENS = { 238 TokenType.VAR, 239 TokenType.ANTI, 240 TokenType.APPLY, 241 TokenType.ASC, 242 TokenType.AUTO_INCREMENT, 243 TokenType.BEGIN, 244 TokenType.CACHE, 245 TokenType.CASE, 246 TokenType.COLLATE, 247 TokenType.COMMAND, 248 TokenType.COMMENT, 249 TokenType.COMMIT, 250 TokenType.CONSTRAINT, 251 TokenType.DEFAULT, 252 TokenType.DELETE, 253 TokenType.DESC, 254 TokenType.DESCRIBE, 255 TokenType.DICTIONARY, 256 TokenType.DIV, 257 TokenType.END, 258 TokenType.EXECUTE, 259 TokenType.ESCAPE, 260 TokenType.FALSE, 261 TokenType.FIRST, 262 TokenType.FILTER, 263 TokenType.FORMAT, 264 TokenType.FULL, 265 TokenType.IS, 266 TokenType.ISNULL, 267 TokenType.INTERVAL, 268 TokenType.KEEP, 269 TokenType.LEFT, 270 TokenType.LOAD, 271 TokenType.MERGE, 272 TokenType.NATURAL, 273 TokenType.NEXT, 274 TokenType.OFFSET, 275 TokenType.ORDINALITY, 276 TokenType.OVERWRITE, 277 TokenType.PARTITION, 278 TokenType.PERCENT, 279 TokenType.PIVOT, 280 TokenType.PRAGMA, 281 TokenType.RANGE, 282 TokenType.REFERENCES, 283 TokenType.RIGHT, 284 TokenType.ROW, 285 TokenType.ROWS, 286 TokenType.SEMI, 287 TokenType.SET, 288 TokenType.SETTINGS, 289 TokenType.SHOW, 290 TokenType.TEMPORARY, 291 TokenType.TOP, 292 TokenType.TRUE, 293 TokenType.UNIQUE, 294 TokenType.UNPIVOT, 295 TokenType.UPDATE, 296 TokenType.VOLATILE, 297 TokenType.WINDOW, 298 *CREATABLES, 299 *SUBQUERY_PREDICATES, 300 *TYPE_TOKENS, 301 *NO_PAREN_FUNCTIONS, 302 } 303 304 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 305 306 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 307 TokenType.APPLY, 308 TokenType.ASOF, 309 TokenType.FULL, 310 TokenType.LEFT, 311 TokenType.LOCK, 312 TokenType.NATURAL, 313 TokenType.OFFSET, 314 TokenType.RIGHT, 315 TokenType.WINDOW, 316 } 317 318 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 319 320 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 321 322 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 323 324 FUNC_TOKENS = { 325 TokenType.COMMAND, 326 TokenType.CURRENT_DATE, 327 TokenType.CURRENT_DATETIME, 328 TokenType.CURRENT_TIMESTAMP, 329 TokenType.CURRENT_TIME, 330 TokenType.CURRENT_USER, 331 TokenType.FILTER, 332 TokenType.FIRST, 333 TokenType.FORMAT, 334 TokenType.GLOB, 335 TokenType.IDENTIFIER, 336 TokenType.INDEX, 337 TokenType.ISNULL, 338 TokenType.ILIKE, 339 TokenType.INSERT, 340 TokenType.LIKE, 341 TokenType.MERGE, 342 TokenType.OFFSET, 343 TokenType.PRIMARY_KEY, 344 TokenType.RANGE, 345 TokenType.REPLACE, 346 TokenType.RLIKE, 347 TokenType.ROW, 348 TokenType.UNNEST, 349 TokenType.VAR, 350 TokenType.LEFT, 351 TokenType.RIGHT, 352 TokenType.DATE, 353 TokenType.DATETIME, 354 TokenType.TABLE, 355 TokenType.TIMESTAMP, 356 TokenType.TIMESTAMPTZ, 357 TokenType.WINDOW, 358 TokenType.XOR, 359 *TYPE_TOKENS, 360 *SUBQUERY_PREDICATES, 361 } 362 363 CONJUNCTION = { 364 TokenType.AND: exp.And, 365 TokenType.OR: exp.Or, 366 } 367 368 EQUALITY = { 369 TokenType.EQ: exp.EQ, 370 TokenType.NEQ: exp.NEQ, 371 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 372 } 373 374 COMPARISON = { 375 TokenType.GT: exp.GT, 376 TokenType.GTE: exp.GTE, 377 TokenType.LT: exp.LT, 378 TokenType.LTE: exp.LTE, 379 } 380 381 BITWISE = { 382 TokenType.AMP: exp.BitwiseAnd, 383 TokenType.CARET: exp.BitwiseXor, 384 TokenType.PIPE: exp.BitwiseOr, 385 TokenType.DPIPE: exp.DPipe, 386 } 387 388 TERM = { 389 TokenType.DASH: exp.Sub, 390 TokenType.PLUS: exp.Add, 391 TokenType.MOD: exp.Mod, 392 TokenType.COLLATE: exp.Collate, 393 } 394 395 FACTOR = { 396 TokenType.DIV: exp.IntDiv, 397 TokenType.LR_ARROW: exp.Distance, 398 TokenType.SLASH: exp.Div, 399 TokenType.STAR: exp.Mul, 400 } 401 402 TIMES = { 403 TokenType.TIME, 404 TokenType.TIMETZ, 405 } 406 407 TIMESTAMPS = { 408 TokenType.TIMESTAMP, 409 TokenType.TIMESTAMPTZ, 410 TokenType.TIMESTAMPLTZ, 411 *TIMES, 412 } 413 414 SET_OPERATIONS = { 415 TokenType.UNION, 416 TokenType.INTERSECT, 417 TokenType.EXCEPT, 418 } 419 420 JOIN_METHODS = { 421 TokenType.NATURAL, 422 TokenType.ASOF, 423 } 424 425 JOIN_SIDES = { 426 TokenType.LEFT, 427 TokenType.RIGHT, 428 TokenType.FULL, 429 } 430 431 JOIN_KINDS = { 432 TokenType.INNER, 433 TokenType.OUTER, 434 TokenType.CROSS, 435 TokenType.SEMI, 436 TokenType.ANTI, 437 } 438 439 JOIN_HINTS: t.Set[str] = set() 440 441 LAMBDAS = { 442 TokenType.ARROW: lambda self, expressions: self.expression( 443 exp.Lambda, 444 this=self._replace_lambda( 445 self._parse_conjunction(), 446 {node.name for node in expressions}, 447 ), 448 expressions=expressions, 449 ), 450 TokenType.FARROW: lambda self, expressions: self.expression( 451 exp.Kwarg, 452 this=exp.var(expressions[0].name), 453 expression=self._parse_conjunction(), 454 ), 455 } 456 457 COLUMN_OPERATORS = { 458 TokenType.DOT: None, 459 TokenType.DCOLON: lambda self, this, to: self.expression( 460 exp.Cast if self.STRICT_CAST else exp.TryCast, 461 this=this, 462 to=to, 463 ), 464 TokenType.ARROW: lambda self, this, path: self.expression( 465 exp.JSONExtract, 466 this=this, 467 expression=path, 468 ), 469 TokenType.DARROW: lambda self, this, path: self.expression( 470 exp.JSONExtractScalar, 471 this=this, 472 expression=path, 473 ), 474 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtract, 476 this=this, 477 expression=path, 478 ), 479 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 480 exp.JSONBExtractScalar, 481 this=this, 482 expression=path, 483 ), 484 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 485 exp.JSONBContains, 486 this=this, 487 expression=key, 488 ), 489 } 490 491 EXPRESSION_PARSERS = { 492 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 493 exp.Column: lambda self: self._parse_column(), 494 exp.Condition: lambda self: self._parse_conjunction(), 495 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 496 exp.Expression: lambda self: self._parse_statement(), 497 exp.From: lambda self: self._parse_from(), 498 exp.Group: lambda self: self._parse_group(), 499 exp.Having: lambda self: self._parse_having(), 500 exp.Identifier: lambda self: self._parse_id_var(), 501 exp.Join: lambda self: self._parse_join(), 502 exp.Lambda: lambda self: self._parse_lambda(), 503 exp.Lateral: lambda self: self._parse_lateral(), 504 exp.Limit: lambda self: self._parse_limit(), 505 exp.Offset: lambda self: self._parse_offset(), 506 exp.Order: lambda self: self._parse_order(), 507 exp.Ordered: lambda self: self._parse_ordered(), 508 exp.Properties: lambda self: self._parse_properties(), 509 exp.Qualify: lambda self: self._parse_qualify(), 510 exp.Returning: lambda self: self._parse_returning(), 511 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 512 exp.Table: lambda self: self._parse_table_parts(), 513 exp.TableAlias: lambda self: self._parse_table_alias(), 514 exp.Where: lambda self: self._parse_where(), 515 exp.Window: lambda self: self._parse_named_window(), 516 exp.With: lambda self: self._parse_with(), 517 "JOIN_TYPE": lambda self: self._parse_join_parts(), 518 } 519 520 STATEMENT_PARSERS = { 521 TokenType.ALTER: lambda self: self._parse_alter(), 522 TokenType.BEGIN: lambda self: self._parse_transaction(), 523 TokenType.CACHE: lambda self: self._parse_cache(), 524 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 525 TokenType.COMMENT: lambda self: self._parse_comment(), 526 TokenType.CREATE: lambda self: self._parse_create(), 527 TokenType.DELETE: lambda self: self._parse_delete(), 528 TokenType.DESC: lambda self: self._parse_describe(), 529 TokenType.DESCRIBE: lambda self: self._parse_describe(), 530 TokenType.DROP: lambda self: self._parse_drop(), 531 TokenType.INSERT: lambda self: self._parse_insert(), 532 TokenType.LOAD: lambda self: self._parse_load(), 533 TokenType.MERGE: lambda self: self._parse_merge(), 534 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 535 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 536 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 537 TokenType.SET: lambda self: self._parse_set(), 538 TokenType.UNCACHE: lambda self: self._parse_uncache(), 539 TokenType.UPDATE: lambda self: self._parse_update(), 540 TokenType.USE: lambda self: self.expression( 541 exp.Use, 542 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 543 and exp.var(self._prev.text), 544 this=self._parse_table(schema=False), 545 ), 546 } 547 548 UNARY_PARSERS = { 549 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 550 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 551 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 552 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 553 } 554 555 PRIMARY_PARSERS = { 556 TokenType.STRING: lambda self, token: self.expression( 557 exp.Literal, this=token.text, is_string=True 558 ), 559 TokenType.NUMBER: lambda self, token: self.expression( 560 exp.Literal, this=token.text, is_string=False 561 ), 562 TokenType.STAR: lambda self, _: self.expression( 563 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 564 ), 565 TokenType.NULL: lambda self, _: self.expression(exp.Null), 566 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 567 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 568 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 569 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 570 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 571 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 572 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 573 exp.National, this=token.text 574 ), 575 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 576 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 577 } 578 579 PLACEHOLDER_PARSERS = { 580 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 581 TokenType.PARAMETER: lambda self: self._parse_parameter(), 582 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 583 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 584 else None, 585 } 586 587 RANGE_PARSERS = { 588 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 589 TokenType.GLOB: binary_range_parser(exp.Glob), 590 TokenType.ILIKE: binary_range_parser(exp.ILike), 591 TokenType.IN: lambda self, this: self._parse_in(this), 592 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 593 TokenType.IS: lambda self, this: self._parse_is(this), 594 TokenType.LIKE: binary_range_parser(exp.Like), 595 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 596 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 597 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 598 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 599 } 600 601 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 602 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 603 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 604 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 605 "CHARACTER SET": lambda self: self._parse_character_set(), 606 "CHECKSUM": lambda self: self._parse_checksum(), 607 "CLUSTER BY": lambda self: self._parse_cluster(), 608 "CLUSTERED": lambda self: self._parse_clustered_by(), 609 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 610 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 611 "COPY": lambda self: self._parse_copy_property(), 612 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 613 "DEFINER": lambda self: self._parse_definer(), 614 "DETERMINISTIC": lambda self: self.expression( 615 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 616 ), 617 "DISTKEY": lambda self: self._parse_distkey(), 618 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 619 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 620 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 621 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 622 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 623 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 624 "FREESPACE": lambda self: self._parse_freespace(), 625 "HEAP": lambda self: self.expression(exp.HeapProperty), 626 "IMMUTABLE": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 630 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 631 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 632 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 633 "LIKE": lambda self: self._parse_create_like(), 634 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 635 "LOCK": lambda self: self._parse_locking(), 636 "LOCKING": lambda self: self._parse_locking(), 637 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 638 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 639 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 640 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 641 "NO": lambda self: self._parse_no_property(), 642 "ON": lambda self: self._parse_on_property(), 643 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 644 "PARTITION BY": lambda self: self._parse_partitioned_by(), 645 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 647 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 648 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 649 "RETURNS": lambda self: self._parse_returns(), 650 "ROW": lambda self: self._parse_row(), 651 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 652 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 653 "SETTINGS": lambda self: self.expression( 654 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 655 ), 656 "SORTKEY": lambda self: self._parse_sortkey(), 657 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 658 "STABLE": lambda self: self.expression( 659 exp.StabilityProperty, this=exp.Literal.string("STABLE") 660 ), 661 "STORED": lambda self: self._parse_stored(), 662 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 663 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 664 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 665 "TO": lambda self: self._parse_to_table(), 666 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 667 "TTL": lambda self: self._parse_ttl(), 668 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 669 "VOLATILE": lambda self: self._parse_volatile_property(), 670 "WITH": lambda self: self._parse_with_property(), 671 } 672 673 CONSTRAINT_PARSERS = { 674 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 675 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 676 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 677 "CHARACTER SET": lambda self: self.expression( 678 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 679 ), 680 "CHECK": lambda self: self.expression( 681 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 682 ), 683 "COLLATE": lambda self: self.expression( 684 exp.CollateColumnConstraint, this=self._parse_var() 685 ), 686 "COMMENT": lambda self: self.expression( 687 exp.CommentColumnConstraint, this=self._parse_string() 688 ), 689 "COMPRESS": lambda self: self._parse_compress(), 690 "CLUSTERED": lambda self: self.expression( 691 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 692 ), 693 "NONCLUSTERED": lambda self: self.expression( 694 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 695 ), 696 "DEFAULT": lambda self: self.expression( 697 exp.DefaultColumnConstraint, this=self._parse_bitwise() 698 ), 699 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 700 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 701 "FORMAT": lambda self: self.expression( 702 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 703 ), 704 "GENERATED": lambda self: self._parse_generated_as_identity(), 705 "IDENTITY": lambda self: self._parse_auto_increment(), 706 "INLINE": lambda self: self._parse_inline(), 707 "LIKE": lambda self: self._parse_create_like(), 708 "NOT": lambda self: self._parse_not_constraint(), 709 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 710 "ON": lambda self: ( 711 self._match(TokenType.UPDATE) 712 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 713 ) 714 or self.expression(exp.OnProperty, this=self._parse_id_var()), 715 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 716 "PRIMARY KEY": lambda self: self._parse_primary_key(), 717 "REFERENCES": lambda self: self._parse_references(match=False), 718 "TITLE": lambda self: self.expression( 719 exp.TitleColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 722 "UNIQUE": lambda self: self._parse_unique(), 723 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 724 "WITH": lambda self: self.expression( 725 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 726 ), 727 } 728 729 ALTER_PARSERS = { 730 "ADD": lambda self: self._parse_alter_table_add(), 731 "ALTER": lambda self: self._parse_alter_table_alter(), 732 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 733 "DROP": lambda self: self._parse_alter_table_drop(), 734 "RENAME": lambda self: self._parse_alter_table_rename(), 735 } 736 737 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 738 739 NO_PAREN_FUNCTION_PARSERS = { 740 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 741 "CASE": lambda self: self._parse_case(), 742 "IF": lambda self: self._parse_if(), 743 "NEXT": lambda self: self._parse_next_value_for(), 744 } 745 746 INVALID_FUNC_NAME_TOKENS = { 747 TokenType.IDENTIFIER, 748 TokenType.STRING, 749 } 750 751 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 752 753 FUNCTION_PARSERS = { 754 "ANY_VALUE": lambda self: self._parse_any_value(), 755 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 756 "CONCAT": lambda self: self._parse_concat(), 757 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 758 "DECODE": lambda self: self._parse_decode(), 759 "EXTRACT": lambda self: self._parse_extract(), 760 "JSON_OBJECT": lambda self: self._parse_json_object(), 761 "LOG": lambda self: self._parse_logarithm(), 762 "MATCH": lambda self: self._parse_match_against(), 763 "OPENJSON": lambda self: self._parse_open_json(), 764 "POSITION": lambda self: self._parse_position(), 765 "SAFE_CAST": lambda self: self._parse_cast(False), 766 "STRING_AGG": lambda self: self._parse_string_agg(), 767 "SUBSTRING": lambda self: self._parse_substring(), 768 "TRIM": lambda self: self._parse_trim(), 769 "TRY_CAST": lambda self: self._parse_cast(False), 770 "TRY_CONVERT": lambda self: self._parse_convert(False), 771 } 772 773 QUERY_MODIFIER_PARSERS = { 774 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 775 TokenType.WHERE: lambda self: ("where", self._parse_where()), 776 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 777 TokenType.HAVING: lambda self: ("having", self._parse_having()), 778 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 779 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 780 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 781 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 782 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 783 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 784 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 785 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 786 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 787 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.CLUSTER_BY: lambda self: ( 789 "cluster", 790 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 ), 792 TokenType.DISTRIBUTE_BY: lambda self: ( 793 "distribute", 794 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 795 ), 796 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 797 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 798 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 799 } 800 801 SET_PARSERS = { 802 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 803 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 804 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 805 "TRANSACTION": lambda self: self._parse_set_transaction(), 806 } 807 808 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 809 810 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 811 812 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 813 814 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 815 816 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 817 818 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 819 TRANSACTION_CHARACTERISTICS = { 820 "ISOLATION LEVEL REPEATABLE READ", 821 "ISOLATION LEVEL READ COMMITTED", 822 "ISOLATION LEVEL READ UNCOMMITTED", 823 "ISOLATION LEVEL SERIALIZABLE", 824 "READ WRITE", 825 "READ ONLY", 826 } 827 828 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 829 830 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 831 832 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 833 834 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 835 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 836 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 837 838 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 839 840 DISTINCT_TOKENS = {TokenType.DISTINCT} 841 842 STRICT_CAST = True 843 844 # A NULL arg in CONCAT yields NULL by default 845 CONCAT_NULL_OUTPUTS_STRING = False 846 847 PREFIXED_PIVOT_COLUMNS = False 848 IDENTIFY_PIVOT_STRINGS = False 849 850 LOG_BASE_FIRST = True 851 LOG_DEFAULTS_TO_LN = False 852 853 SUPPORTS_USER_DEFINED_TYPES = True 854 855 __slots__ = ( 856 "error_level", 857 "error_message_context", 858 "max_errors", 859 "sql", 860 "errors", 861 "_tokens", 862 "_index", 863 "_curr", 864 "_next", 865 "_prev", 866 "_prev_comments", 867 "_tokenizer", 868 ) 869 870 # Autofilled 871 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 872 INDEX_OFFSET: int = 0 873 UNNEST_COLUMN_ONLY: bool = False 874 ALIAS_POST_TABLESAMPLE: bool = False 875 STRICT_STRING_CONCAT = False 876 NORMALIZE_FUNCTIONS = "upper" 877 NULL_ORDERING: str = "nulls_are_small" 878 SHOW_TRIE: t.Dict = {} 879 SET_TRIE: t.Dict = {} 880 FORMAT_MAPPING: t.Dict[str, str] = {} 881 FORMAT_TRIE: t.Dict = {} 882 TIME_MAPPING: t.Dict[str, str] = {} 883 TIME_TRIE: t.Dict = {} 884 885 def __init__( 886 self, 887 error_level: t.Optional[ErrorLevel] = None, 888 error_message_context: int = 100, 889 max_errors: int = 3, 890 ): 891 self.error_level = error_level or ErrorLevel.IMMEDIATE 892 self.error_message_context = error_message_context 893 self.max_errors = max_errors 894 self._tokenizer = self.TOKENIZER_CLASS() 895 self.reset() 896 897 def reset(self): 898 self.sql = "" 899 self.errors = [] 900 self._tokens = [] 901 self._index = 0 902 self._curr = None 903 self._next = None 904 self._prev = None 905 self._prev_comments = None 906 907 def parse( 908 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 909 ) -> t.List[t.Optional[exp.Expression]]: 910 """ 911 Parses a list of tokens and returns a list of syntax trees, one tree 912 per parsed SQL statement. 913 914 Args: 915 raw_tokens: The list of tokens. 916 sql: The original SQL string, used to produce helpful debug messages. 917 918 Returns: 919 The list of the produced syntax trees. 920 """ 921 return self._parse( 922 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 923 ) 924 925 def parse_into( 926 self, 927 expression_types: exp.IntoType, 928 raw_tokens: t.List[Token], 929 sql: t.Optional[str] = None, 930 ) -> t.List[t.Optional[exp.Expression]]: 931 """ 932 Parses a list of tokens into a given Expression type. If a collection of Expression 933 types is given instead, this method will try to parse the token list into each one 934 of them, stopping at the first for which the parsing succeeds. 935 936 Args: 937 expression_types: The expression type(s) to try and parse the token list into. 938 raw_tokens: The list of tokens. 939 sql: The original SQL string, used to produce helpful debug messages. 940 941 Returns: 942 The target Expression. 943 """ 944 errors = [] 945 for expression_type in ensure_list(expression_types): 946 parser = self.EXPRESSION_PARSERS.get(expression_type) 947 if not parser: 948 raise TypeError(f"No parser registered for {expression_type}") 949 950 try: 951 return self._parse(parser, raw_tokens, sql) 952 except ParseError as e: 953 e.errors[0]["into_expression"] = expression_type 954 errors.append(e) 955 956 raise ParseError( 957 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 958 errors=merge_errors(errors), 959 ) from errors[-1] 960 961 def _parse( 962 self, 963 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 964 raw_tokens: t.List[Token], 965 sql: t.Optional[str] = None, 966 ) -> t.List[t.Optional[exp.Expression]]: 967 self.reset() 968 self.sql = sql or "" 969 970 total = len(raw_tokens) 971 chunks: t.List[t.List[Token]] = [[]] 972 973 for i, token in enumerate(raw_tokens): 974 if token.token_type == TokenType.SEMICOLON: 975 if i < total - 1: 976 chunks.append([]) 977 else: 978 chunks[-1].append(token) 979 980 expressions = [] 981 982 for tokens in chunks: 983 self._index = -1 984 self._tokens = tokens 985 self._advance() 986 987 expressions.append(parse_method(self)) 988 989 if self._index < len(self._tokens): 990 self.raise_error("Invalid expression / Unexpected token") 991 992 self.check_errors() 993 994 return expressions 995 996 def check_errors(self) -> None: 997 """Logs or raises any found errors, depending on the chosen error level setting.""" 998 if self.error_level == ErrorLevel.WARN: 999 for error in self.errors: 1000 logger.error(str(error)) 1001 elif self.error_level == ErrorLevel.RAISE and self.errors: 1002 raise ParseError( 1003 concat_messages(self.errors, self.max_errors), 1004 errors=merge_errors(self.errors), 1005 ) 1006 1007 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1008 """ 1009 Appends an error in the list of recorded errors or raises it, depending on the chosen 1010 error level setting. 1011 """ 1012 token = token or self._curr or self._prev or Token.string("") 1013 start = token.start 1014 end = token.end + 1 1015 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1016 highlight = self.sql[start:end] 1017 end_context = self.sql[end : end + self.error_message_context] 1018 1019 error = ParseError.new( 1020 f"{message}. Line {token.line}, Col: {token.col}.\n" 1021 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1022 description=message, 1023 line=token.line, 1024 col=token.col, 1025 start_context=start_context, 1026 highlight=highlight, 1027 end_context=end_context, 1028 ) 1029 1030 if self.error_level == ErrorLevel.IMMEDIATE: 1031 raise error 1032 1033 self.errors.append(error) 1034 1035 def expression( 1036 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1037 ) -> E: 1038 """ 1039 Creates a new, validated Expression. 1040 1041 Args: 1042 exp_class: The expression class to instantiate. 1043 comments: An optional list of comments to attach to the expression. 1044 kwargs: The arguments to set for the expression along with their respective values. 1045 1046 Returns: 1047 The target expression. 1048 """ 1049 instance = exp_class(**kwargs) 1050 instance.add_comments(comments) if comments else self._add_comments(instance) 1051 return self.validate_expression(instance) 1052 1053 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1054 if expression and self._prev_comments: 1055 expression.add_comments(self._prev_comments) 1056 self._prev_comments = None 1057 1058 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1059 """ 1060 Validates an Expression, making sure that all its mandatory arguments are set. 1061 1062 Args: 1063 expression: The expression to validate. 1064 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1065 1066 Returns: 1067 The validated expression. 1068 """ 1069 if self.error_level != ErrorLevel.IGNORE: 1070 for error_message in expression.error_messages(args): 1071 self.raise_error(error_message) 1072 1073 return expression 1074 1075 def _find_sql(self, start: Token, end: Token) -> str: 1076 return self.sql[start.start : end.end + 1] 1077 1078 def _advance(self, times: int = 1) -> None: 1079 self._index += times 1080 self._curr = seq_get(self._tokens, self._index) 1081 self._next = seq_get(self._tokens, self._index + 1) 1082 1083 if self._index > 0: 1084 self._prev = self._tokens[self._index - 1] 1085 self._prev_comments = self._prev.comments 1086 else: 1087 self._prev = None 1088 self._prev_comments = None 1089 1090 def _retreat(self, index: int) -> None: 1091 if index != self._index: 1092 self._advance(index - self._index) 1093 1094 def _parse_command(self) -> exp.Command: 1095 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1096 1097 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1098 start = self._prev 1099 exists = self._parse_exists() if allow_exists else None 1100 1101 self._match(TokenType.ON) 1102 1103 kind = self._match_set(self.CREATABLES) and self._prev 1104 if not kind: 1105 return self._parse_as_command(start) 1106 1107 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1108 this = self._parse_user_defined_function(kind=kind.token_type) 1109 elif kind.token_type == TokenType.TABLE: 1110 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1111 elif kind.token_type == TokenType.COLUMN: 1112 this = self._parse_column() 1113 else: 1114 this = self._parse_id_var() 1115 1116 self._match(TokenType.IS) 1117 1118 return self.expression( 1119 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1120 ) 1121 1122 def _parse_to_table( 1123 self, 1124 ) -> exp.ToTableProperty: 1125 table = self._parse_table_parts(schema=True) 1126 return self.expression(exp.ToTableProperty, this=table) 1127 1128 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1129 def _parse_ttl(self) -> exp.Expression: 1130 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1131 this = self._parse_bitwise() 1132 1133 if self._match_text_seq("DELETE"): 1134 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1135 if self._match_text_seq("RECOMPRESS"): 1136 return self.expression( 1137 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1138 ) 1139 if self._match_text_seq("TO", "DISK"): 1140 return self.expression( 1141 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1142 ) 1143 if self._match_text_seq("TO", "VOLUME"): 1144 return self.expression( 1145 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1146 ) 1147 1148 return this 1149 1150 expressions = self._parse_csv(_parse_ttl_action) 1151 where = self._parse_where() 1152 group = self._parse_group() 1153 1154 aggregates = None 1155 if group and self._match(TokenType.SET): 1156 aggregates = self._parse_csv(self._parse_set_item) 1157 1158 return self.expression( 1159 exp.MergeTreeTTL, 1160 expressions=expressions, 1161 where=where, 1162 group=group, 1163 aggregates=aggregates, 1164 ) 1165 1166 def _parse_statement(self) -> t.Optional[exp.Expression]: 1167 if self._curr is None: 1168 return None 1169 1170 if self._match_set(self.STATEMENT_PARSERS): 1171 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1172 1173 if self._match_set(Tokenizer.COMMANDS): 1174 return self._parse_command() 1175 1176 expression = self._parse_expression() 1177 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1178 return self._parse_query_modifiers(expression) 1179 1180 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1181 start = self._prev 1182 temporary = self._match(TokenType.TEMPORARY) 1183 materialized = self._match_text_seq("MATERIALIZED") 1184 1185 kind = self._match_set(self.CREATABLES) and self._prev.text 1186 if not kind: 1187 return self._parse_as_command(start) 1188 1189 return self.expression( 1190 exp.Drop, 1191 comments=start.comments, 1192 exists=exists or self._parse_exists(), 1193 this=self._parse_table(schema=True), 1194 kind=kind, 1195 temporary=temporary, 1196 materialized=materialized, 1197 cascade=self._match_text_seq("CASCADE"), 1198 constraints=self._match_text_seq("CONSTRAINTS"), 1199 purge=self._match_text_seq("PURGE"), 1200 ) 1201 1202 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1203 return ( 1204 self._match_text_seq("IF") 1205 and (not not_ or self._match(TokenType.NOT)) 1206 and self._match(TokenType.EXISTS) 1207 ) 1208 1209 def _parse_create(self) -> exp.Create | exp.Command: 1210 # Note: this can't be None because we've matched a statement parser 1211 start = self._prev 1212 comments = self._prev_comments 1213 1214 replace = start.text.upper() == "REPLACE" or self._match_pair( 1215 TokenType.OR, TokenType.REPLACE 1216 ) 1217 unique = self._match(TokenType.UNIQUE) 1218 1219 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1220 self._advance() 1221 1222 properties = None 1223 create_token = self._match_set(self.CREATABLES) and self._prev 1224 1225 if not create_token: 1226 # exp.Properties.Location.POST_CREATE 1227 properties = self._parse_properties() 1228 create_token = self._match_set(self.CREATABLES) and self._prev 1229 1230 if not properties or not create_token: 1231 return self._parse_as_command(start) 1232 1233 exists = self._parse_exists(not_=True) 1234 this = None 1235 expression: t.Optional[exp.Expression] = None 1236 indexes = None 1237 no_schema_binding = None 1238 begin = None 1239 clone = None 1240 1241 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1242 nonlocal properties 1243 if properties and temp_props: 1244 properties.expressions.extend(temp_props.expressions) 1245 elif temp_props: 1246 properties = temp_props 1247 1248 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1249 this = self._parse_user_defined_function(kind=create_token.token_type) 1250 1251 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1252 extend_props(self._parse_properties()) 1253 1254 self._match(TokenType.ALIAS) 1255 1256 if self._match(TokenType.COMMAND): 1257 expression = self._parse_as_command(self._prev) 1258 else: 1259 begin = self._match(TokenType.BEGIN) 1260 return_ = self._match_text_seq("RETURN") 1261 expression = self._parse_statement() 1262 1263 if return_: 1264 expression = self.expression(exp.Return, this=expression) 1265 elif create_token.token_type == TokenType.INDEX: 1266 this = self._parse_index(index=self._parse_id_var()) 1267 elif create_token.token_type in self.DB_CREATABLES: 1268 table_parts = self._parse_table_parts(schema=True) 1269 1270 # exp.Properties.Location.POST_NAME 1271 self._match(TokenType.COMMA) 1272 extend_props(self._parse_properties(before=True)) 1273 1274 this = self._parse_schema(this=table_parts) 1275 1276 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1277 extend_props(self._parse_properties()) 1278 1279 self._match(TokenType.ALIAS) 1280 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1281 # exp.Properties.Location.POST_ALIAS 1282 extend_props(self._parse_properties()) 1283 1284 expression = self._parse_ddl_select() 1285 1286 if create_token.token_type == TokenType.TABLE: 1287 # exp.Properties.Location.POST_EXPRESSION 1288 extend_props(self._parse_properties()) 1289 1290 indexes = [] 1291 while True: 1292 index = self._parse_index() 1293 1294 # exp.Properties.Location.POST_INDEX 1295 extend_props(self._parse_properties()) 1296 1297 if not index: 1298 break 1299 else: 1300 self._match(TokenType.COMMA) 1301 indexes.append(index) 1302 elif create_token.token_type == TokenType.VIEW: 1303 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1304 no_schema_binding = True 1305 1306 if self._match_text_seq("CLONE"): 1307 clone = self._parse_table(schema=True) 1308 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1309 clone_kind = ( 1310 self._match(TokenType.L_PAREN) 1311 and self._match_texts(self.CLONE_KINDS) 1312 and self._prev.text.upper() 1313 ) 1314 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1315 self._match(TokenType.R_PAREN) 1316 clone = self.expression( 1317 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1318 ) 1319 1320 return self.expression( 1321 exp.Create, 1322 comments=comments, 1323 this=this, 1324 kind=create_token.text, 1325 replace=replace, 1326 unique=unique, 1327 expression=expression, 1328 exists=exists, 1329 properties=properties, 1330 indexes=indexes, 1331 no_schema_binding=no_schema_binding, 1332 begin=begin, 1333 clone=clone, 1334 ) 1335 1336 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1337 # only used for teradata currently 1338 self._match(TokenType.COMMA) 1339 1340 kwargs = { 1341 "no": self._match_text_seq("NO"), 1342 "dual": self._match_text_seq("DUAL"), 1343 "before": self._match_text_seq("BEFORE"), 1344 "default": self._match_text_seq("DEFAULT"), 1345 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1346 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1347 "after": self._match_text_seq("AFTER"), 1348 "minimum": self._match_texts(("MIN", "MINIMUM")), 1349 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1350 } 1351 1352 if self._match_texts(self.PROPERTY_PARSERS): 1353 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1354 try: 1355 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1356 except TypeError: 1357 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1358 1359 return None 1360 1361 def _parse_property(self) -> t.Optional[exp.Expression]: 1362 if self._match_texts(self.PROPERTY_PARSERS): 1363 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1364 1365 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1366 return self._parse_character_set(default=True) 1367 1368 if self._match_text_seq("COMPOUND", "SORTKEY"): 1369 return self._parse_sortkey(compound=True) 1370 1371 if self._match_text_seq("SQL", "SECURITY"): 1372 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1373 1374 assignment = self._match_pair( 1375 TokenType.VAR, TokenType.EQ, advance=False 1376 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1377 1378 if assignment: 1379 key = self._parse_var_or_string() 1380 self._match(TokenType.EQ) 1381 return self.expression( 1382 exp.Property, 1383 this=key, 1384 value=self._parse_column() or self._parse_var(any_token=True), 1385 ) 1386 1387 return None 1388 1389 def _parse_stored(self) -> exp.FileFormatProperty: 1390 self._match(TokenType.ALIAS) 1391 1392 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1393 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1394 1395 return self.expression( 1396 exp.FileFormatProperty, 1397 this=self.expression( 1398 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1399 ) 1400 if input_format or output_format 1401 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1402 ) 1403 1404 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1405 self._match(TokenType.EQ) 1406 self._match(TokenType.ALIAS) 1407 return self.expression(exp_class, this=self._parse_field()) 1408 1409 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1410 properties = [] 1411 while True: 1412 if before: 1413 prop = self._parse_property_before() 1414 else: 1415 prop = self._parse_property() 1416 1417 if not prop: 1418 break 1419 for p in ensure_list(prop): 1420 properties.append(p) 1421 1422 if properties: 1423 return self.expression(exp.Properties, expressions=properties) 1424 1425 return None 1426 1427 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1428 return self.expression( 1429 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1430 ) 1431 1432 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1433 if self._index >= 2: 1434 pre_volatile_token = self._tokens[self._index - 2] 1435 else: 1436 pre_volatile_token = None 1437 1438 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1439 return exp.VolatileProperty() 1440 1441 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1442 1443 def _parse_with_property( 1444 self, 1445 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1446 if self._match(TokenType.L_PAREN, advance=False): 1447 return self._parse_wrapped_csv(self._parse_property) 1448 1449 if self._match_text_seq("JOURNAL"): 1450 return self._parse_withjournaltable() 1451 1452 if self._match_text_seq("DATA"): 1453 return self._parse_withdata(no=False) 1454 elif self._match_text_seq("NO", "DATA"): 1455 return self._parse_withdata(no=True) 1456 1457 if not self._next: 1458 return None 1459 1460 return self._parse_withisolatedloading() 1461 1462 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1463 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1464 self._match(TokenType.EQ) 1465 1466 user = self._parse_id_var() 1467 self._match(TokenType.PARAMETER) 1468 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1469 1470 if not user or not host: 1471 return None 1472 1473 return exp.DefinerProperty(this=f"{user}@{host}") 1474 1475 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1476 self._match(TokenType.TABLE) 1477 self._match(TokenType.EQ) 1478 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1479 1480 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1481 return self.expression(exp.LogProperty, no=no) 1482 1483 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1484 return self.expression(exp.JournalProperty, **kwargs) 1485 1486 def _parse_checksum(self) -> exp.ChecksumProperty: 1487 self._match(TokenType.EQ) 1488 1489 on = None 1490 if self._match(TokenType.ON): 1491 on = True 1492 elif self._match_text_seq("OFF"): 1493 on = False 1494 1495 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1496 1497 def _parse_cluster(self) -> exp.Cluster: 1498 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1499 1500 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1501 self._match_text_seq("BY") 1502 1503 self._match_l_paren() 1504 expressions = self._parse_csv(self._parse_column) 1505 self._match_r_paren() 1506 1507 if self._match_text_seq("SORTED", "BY"): 1508 self._match_l_paren() 1509 sorted_by = self._parse_csv(self._parse_ordered) 1510 self._match_r_paren() 1511 else: 1512 sorted_by = None 1513 1514 self._match(TokenType.INTO) 1515 buckets = self._parse_number() 1516 self._match_text_seq("BUCKETS") 1517 1518 return self.expression( 1519 exp.ClusteredByProperty, 1520 expressions=expressions, 1521 sorted_by=sorted_by, 1522 buckets=buckets, 1523 ) 1524 1525 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1526 if not self._match_text_seq("GRANTS"): 1527 self._retreat(self._index - 1) 1528 return None 1529 1530 return self.expression(exp.CopyGrantsProperty) 1531 1532 def _parse_freespace(self) -> exp.FreespaceProperty: 1533 self._match(TokenType.EQ) 1534 return self.expression( 1535 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1536 ) 1537 1538 def _parse_mergeblockratio( 1539 self, no: bool = False, default: bool = False 1540 ) -> exp.MergeBlockRatioProperty: 1541 if self._match(TokenType.EQ): 1542 return self.expression( 1543 exp.MergeBlockRatioProperty, 1544 this=self._parse_number(), 1545 percent=self._match(TokenType.PERCENT), 1546 ) 1547 1548 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1549 1550 def _parse_datablocksize( 1551 self, 1552 default: t.Optional[bool] = None, 1553 minimum: t.Optional[bool] = None, 1554 maximum: t.Optional[bool] = None, 1555 ) -> exp.DataBlocksizeProperty: 1556 self._match(TokenType.EQ) 1557 size = self._parse_number() 1558 1559 units = None 1560 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1561 units = self._prev.text 1562 1563 return self.expression( 1564 exp.DataBlocksizeProperty, 1565 size=size, 1566 units=units, 1567 default=default, 1568 minimum=minimum, 1569 maximum=maximum, 1570 ) 1571 1572 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1573 self._match(TokenType.EQ) 1574 always = self._match_text_seq("ALWAYS") 1575 manual = self._match_text_seq("MANUAL") 1576 never = self._match_text_seq("NEVER") 1577 default = self._match_text_seq("DEFAULT") 1578 1579 autotemp = None 1580 if self._match_text_seq("AUTOTEMP"): 1581 autotemp = self._parse_schema() 1582 1583 return self.expression( 1584 exp.BlockCompressionProperty, 1585 always=always, 1586 manual=manual, 1587 never=never, 1588 default=default, 1589 autotemp=autotemp, 1590 ) 1591 1592 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1593 no = self._match_text_seq("NO") 1594 concurrent = self._match_text_seq("CONCURRENT") 1595 self._match_text_seq("ISOLATED", "LOADING") 1596 for_all = self._match_text_seq("FOR", "ALL") 1597 for_insert = self._match_text_seq("FOR", "INSERT") 1598 for_none = self._match_text_seq("FOR", "NONE") 1599 return self.expression( 1600 exp.IsolatedLoadingProperty, 1601 no=no, 1602 concurrent=concurrent, 1603 for_all=for_all, 1604 for_insert=for_insert, 1605 for_none=for_none, 1606 ) 1607 1608 def _parse_locking(self) -> exp.LockingProperty: 1609 if self._match(TokenType.TABLE): 1610 kind = "TABLE" 1611 elif self._match(TokenType.VIEW): 1612 kind = "VIEW" 1613 elif self._match(TokenType.ROW): 1614 kind = "ROW" 1615 elif self._match_text_seq("DATABASE"): 1616 kind = "DATABASE" 1617 else: 1618 kind = None 1619 1620 if kind in ("DATABASE", "TABLE", "VIEW"): 1621 this = self._parse_table_parts() 1622 else: 1623 this = None 1624 1625 if self._match(TokenType.FOR): 1626 for_or_in = "FOR" 1627 elif self._match(TokenType.IN): 1628 for_or_in = "IN" 1629 else: 1630 for_or_in = None 1631 1632 if self._match_text_seq("ACCESS"): 1633 lock_type = "ACCESS" 1634 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1635 lock_type = "EXCLUSIVE" 1636 elif self._match_text_seq("SHARE"): 1637 lock_type = "SHARE" 1638 elif self._match_text_seq("READ"): 1639 lock_type = "READ" 1640 elif self._match_text_seq("WRITE"): 1641 lock_type = "WRITE" 1642 elif self._match_text_seq("CHECKSUM"): 1643 lock_type = "CHECKSUM" 1644 else: 1645 lock_type = None 1646 1647 override = self._match_text_seq("OVERRIDE") 1648 1649 return self.expression( 1650 exp.LockingProperty, 1651 this=this, 1652 kind=kind, 1653 for_or_in=for_or_in, 1654 lock_type=lock_type, 1655 override=override, 1656 ) 1657 1658 def _parse_partition_by(self) -> t.List[exp.Expression]: 1659 if self._match(TokenType.PARTITION_BY): 1660 return self._parse_csv(self._parse_conjunction) 1661 return [] 1662 1663 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1664 self._match(TokenType.EQ) 1665 return self.expression( 1666 exp.PartitionedByProperty, 1667 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1668 ) 1669 1670 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1671 if self._match_text_seq("AND", "STATISTICS"): 1672 statistics = True 1673 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1674 statistics = False 1675 else: 1676 statistics = None 1677 1678 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1679 1680 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1681 if self._match_text_seq("PRIMARY", "INDEX"): 1682 return exp.NoPrimaryIndexProperty() 1683 return None 1684 1685 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1686 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1687 return exp.OnCommitProperty() 1688 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1689 return exp.OnCommitProperty(delete=True) 1690 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1691 1692 def _parse_distkey(self) -> exp.DistKeyProperty: 1693 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1694 1695 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1696 table = self._parse_table(schema=True) 1697 1698 options = [] 1699 while self._match_texts(("INCLUDING", "EXCLUDING")): 1700 this = self._prev.text.upper() 1701 1702 id_var = self._parse_id_var() 1703 if not id_var: 1704 return None 1705 1706 options.append( 1707 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1708 ) 1709 1710 return self.expression(exp.LikeProperty, this=table, expressions=options) 1711 1712 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1713 return self.expression( 1714 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1715 ) 1716 1717 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1718 self._match(TokenType.EQ) 1719 return self.expression( 1720 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1721 ) 1722 1723 def _parse_returns(self) -> exp.ReturnsProperty: 1724 value: t.Optional[exp.Expression] 1725 is_table = self._match(TokenType.TABLE) 1726 1727 if is_table: 1728 if self._match(TokenType.LT): 1729 value = self.expression( 1730 exp.Schema, 1731 this="TABLE", 1732 expressions=self._parse_csv(self._parse_struct_types), 1733 ) 1734 if not self._match(TokenType.GT): 1735 self.raise_error("Expecting >") 1736 else: 1737 value = self._parse_schema(exp.var("TABLE")) 1738 else: 1739 value = self._parse_types() 1740 1741 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1742 1743 def _parse_describe(self) -> exp.Describe: 1744 kind = self._match_set(self.CREATABLES) and self._prev.text 1745 this = self._parse_table() 1746 return self.expression(exp.Describe, this=this, kind=kind) 1747 1748 def _parse_insert(self) -> exp.Insert: 1749 comments = ensure_list(self._prev_comments) 1750 overwrite = self._match(TokenType.OVERWRITE) 1751 ignore = self._match(TokenType.IGNORE) 1752 local = self._match_text_seq("LOCAL") 1753 alternative = None 1754 1755 if self._match_text_seq("DIRECTORY"): 1756 this: t.Optional[exp.Expression] = self.expression( 1757 exp.Directory, 1758 this=self._parse_var_or_string(), 1759 local=local, 1760 row_format=self._parse_row_format(match_row=True), 1761 ) 1762 else: 1763 if self._match(TokenType.OR): 1764 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1765 1766 self._match(TokenType.INTO) 1767 comments += ensure_list(self._prev_comments) 1768 self._match(TokenType.TABLE) 1769 this = self._parse_table(schema=True) 1770 1771 returning = self._parse_returning() 1772 1773 return self.expression( 1774 exp.Insert, 1775 comments=comments, 1776 this=this, 1777 by_name=self._match_text_seq("BY", "NAME"), 1778 exists=self._parse_exists(), 1779 partition=self._parse_partition(), 1780 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1781 and self._parse_conjunction(), 1782 expression=self._parse_ddl_select(), 1783 conflict=self._parse_on_conflict(), 1784 returning=returning or self._parse_returning(), 1785 overwrite=overwrite, 1786 alternative=alternative, 1787 ignore=ignore, 1788 ) 1789 1790 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1791 conflict = self._match_text_seq("ON", "CONFLICT") 1792 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1793 1794 if not conflict and not duplicate: 1795 return None 1796 1797 nothing = None 1798 expressions = None 1799 key = None 1800 constraint = None 1801 1802 if conflict: 1803 if self._match_text_seq("ON", "CONSTRAINT"): 1804 constraint = self._parse_id_var() 1805 else: 1806 key = self._parse_csv(self._parse_value) 1807 1808 self._match_text_seq("DO") 1809 if self._match_text_seq("NOTHING"): 1810 nothing = True 1811 else: 1812 self._match(TokenType.UPDATE) 1813 self._match(TokenType.SET) 1814 expressions = self._parse_csv(self._parse_equality) 1815 1816 return self.expression( 1817 exp.OnConflict, 1818 duplicate=duplicate, 1819 expressions=expressions, 1820 nothing=nothing, 1821 key=key, 1822 constraint=constraint, 1823 ) 1824 1825 def _parse_returning(self) -> t.Optional[exp.Returning]: 1826 if not self._match(TokenType.RETURNING): 1827 return None 1828 return self.expression( 1829 exp.Returning, 1830 expressions=self._parse_csv(self._parse_expression), 1831 into=self._match(TokenType.INTO) and self._parse_table_part(), 1832 ) 1833 1834 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1835 if not self._match(TokenType.FORMAT): 1836 return None 1837 return self._parse_row_format() 1838 1839 def _parse_row_format( 1840 self, match_row: bool = False 1841 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1842 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1843 return None 1844 1845 if self._match_text_seq("SERDE"): 1846 this = self._parse_string() 1847 1848 serde_properties = None 1849 if self._match(TokenType.SERDE_PROPERTIES): 1850 serde_properties = self.expression( 1851 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1852 ) 1853 1854 return self.expression( 1855 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1856 ) 1857 1858 self._match_text_seq("DELIMITED") 1859 1860 kwargs = {} 1861 1862 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1863 kwargs["fields"] = self._parse_string() 1864 if self._match_text_seq("ESCAPED", "BY"): 1865 kwargs["escaped"] = self._parse_string() 1866 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1867 kwargs["collection_items"] = self._parse_string() 1868 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1869 kwargs["map_keys"] = self._parse_string() 1870 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1871 kwargs["lines"] = self._parse_string() 1872 if self._match_text_seq("NULL", "DEFINED", "AS"): 1873 kwargs["null"] = self._parse_string() 1874 1875 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1876 1877 def _parse_load(self) -> exp.LoadData | exp.Command: 1878 if self._match_text_seq("DATA"): 1879 local = self._match_text_seq("LOCAL") 1880 self._match_text_seq("INPATH") 1881 inpath = self._parse_string() 1882 overwrite = self._match(TokenType.OVERWRITE) 1883 self._match_pair(TokenType.INTO, TokenType.TABLE) 1884 1885 return self.expression( 1886 exp.LoadData, 1887 this=self._parse_table(schema=True), 1888 local=local, 1889 overwrite=overwrite, 1890 inpath=inpath, 1891 partition=self._parse_partition(), 1892 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1893 serde=self._match_text_seq("SERDE") and self._parse_string(), 1894 ) 1895 return self._parse_as_command(self._prev) 1896 1897 def _parse_delete(self) -> exp.Delete: 1898 # This handles MySQL's "Multiple-Table Syntax" 1899 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1900 tables = None 1901 comments = self._prev_comments 1902 if not self._match(TokenType.FROM, advance=False): 1903 tables = self._parse_csv(self._parse_table) or None 1904 1905 returning = self._parse_returning() 1906 1907 return self.expression( 1908 exp.Delete, 1909 comments=comments, 1910 tables=tables, 1911 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1912 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1913 where=self._parse_where(), 1914 returning=returning or self._parse_returning(), 1915 limit=self._parse_limit(), 1916 ) 1917 1918 def _parse_update(self) -> exp.Update: 1919 comments = self._prev_comments 1920 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1921 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1922 returning = self._parse_returning() 1923 return self.expression( 1924 exp.Update, 1925 comments=comments, 1926 **{ # type: ignore 1927 "this": this, 1928 "expressions": expressions, 1929 "from": self._parse_from(joins=True), 1930 "where": self._parse_where(), 1931 "returning": returning or self._parse_returning(), 1932 "limit": self._parse_limit(), 1933 }, 1934 ) 1935 1936 def _parse_uncache(self) -> exp.Uncache: 1937 if not self._match(TokenType.TABLE): 1938 self.raise_error("Expecting TABLE after UNCACHE") 1939 1940 return self.expression( 1941 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1942 ) 1943 1944 def _parse_cache(self) -> exp.Cache: 1945 lazy = self._match_text_seq("LAZY") 1946 self._match(TokenType.TABLE) 1947 table = self._parse_table(schema=True) 1948 1949 options = [] 1950 if self._match_text_seq("OPTIONS"): 1951 self._match_l_paren() 1952 k = self._parse_string() 1953 self._match(TokenType.EQ) 1954 v = self._parse_string() 1955 options = [k, v] 1956 self._match_r_paren() 1957 1958 self._match(TokenType.ALIAS) 1959 return self.expression( 1960 exp.Cache, 1961 this=table, 1962 lazy=lazy, 1963 options=options, 1964 expression=self._parse_select(nested=True), 1965 ) 1966 1967 def _parse_partition(self) -> t.Optional[exp.Partition]: 1968 if not self._match(TokenType.PARTITION): 1969 return None 1970 1971 return self.expression( 1972 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1973 ) 1974 1975 def _parse_value(self) -> exp.Tuple: 1976 if self._match(TokenType.L_PAREN): 1977 expressions = self._parse_csv(self._parse_conjunction) 1978 self._match_r_paren() 1979 return self.expression(exp.Tuple, expressions=expressions) 1980 1981 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1982 # https://prestodb.io/docs/current/sql/values.html 1983 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1984 1985 def _parse_projections(self) -> t.List[exp.Expression]: 1986 return self._parse_expressions() 1987 1988 def _parse_select( 1989 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1990 ) -> t.Optional[exp.Expression]: 1991 cte = self._parse_with() 1992 1993 if cte: 1994 this = self._parse_statement() 1995 1996 if not this: 1997 self.raise_error("Failed to parse any statement following CTE") 1998 return cte 1999 2000 if "with" in this.arg_types: 2001 this.set("with", cte) 2002 else: 2003 self.raise_error(f"{this.key} does not support CTE") 2004 this = cte 2005 2006 return this 2007 2008 # duckdb supports leading with FROM x 2009 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2010 2011 if self._match(TokenType.SELECT): 2012 comments = self._prev_comments 2013 2014 hint = self._parse_hint() 2015 all_ = self._match(TokenType.ALL) 2016 distinct = self._match_set(self.DISTINCT_TOKENS) 2017 2018 kind = ( 2019 self._match(TokenType.ALIAS) 2020 and self._match_texts(("STRUCT", "VALUE")) 2021 and self._prev.text 2022 ) 2023 2024 if distinct: 2025 distinct = self.expression( 2026 exp.Distinct, 2027 on=self._parse_value() if self._match(TokenType.ON) else None, 2028 ) 2029 2030 if all_ and distinct: 2031 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2032 2033 limit = self._parse_limit(top=True) 2034 projections = self._parse_projections() 2035 2036 this = self.expression( 2037 exp.Select, 2038 kind=kind, 2039 hint=hint, 2040 distinct=distinct, 2041 expressions=projections, 2042 limit=limit, 2043 ) 2044 this.comments = comments 2045 2046 into = self._parse_into() 2047 if into: 2048 this.set("into", into) 2049 2050 if not from_: 2051 from_ = self._parse_from() 2052 2053 if from_: 2054 this.set("from", from_) 2055 2056 this = self._parse_query_modifiers(this) 2057 elif (table or nested) and self._match(TokenType.L_PAREN): 2058 if self._match(TokenType.PIVOT): 2059 this = self._parse_simplified_pivot() 2060 elif self._match(TokenType.FROM): 2061 this = exp.select("*").from_( 2062 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2063 ) 2064 else: 2065 this = self._parse_table() if table else self._parse_select(nested=True) 2066 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2067 2068 self._match_r_paren() 2069 2070 # We return early here so that the UNION isn't attached to the subquery by the 2071 # following call to _parse_set_operations, but instead becomes the parent node 2072 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2073 elif self._match(TokenType.VALUES): 2074 this = self.expression( 2075 exp.Values, 2076 expressions=self._parse_csv(self._parse_value), 2077 alias=self._parse_table_alias(), 2078 ) 2079 elif from_: 2080 this = exp.select("*").from_(from_.this, copy=False) 2081 else: 2082 this = None 2083 2084 return self._parse_set_operations(this) 2085 2086 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2087 if not skip_with_token and not self._match(TokenType.WITH): 2088 return None 2089 2090 comments = self._prev_comments 2091 recursive = self._match(TokenType.RECURSIVE) 2092 2093 expressions = [] 2094 while True: 2095 expressions.append(self._parse_cte()) 2096 2097 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2098 break 2099 else: 2100 self._match(TokenType.WITH) 2101 2102 return self.expression( 2103 exp.With, comments=comments, expressions=expressions, recursive=recursive 2104 ) 2105 2106 def _parse_cte(self) -> exp.CTE: 2107 alias = self._parse_table_alias() 2108 if not alias or not alias.this: 2109 self.raise_error("Expected CTE to have alias") 2110 2111 self._match(TokenType.ALIAS) 2112 return self.expression( 2113 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2114 ) 2115 2116 def _parse_table_alias( 2117 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2118 ) -> t.Optional[exp.TableAlias]: 2119 any_token = self._match(TokenType.ALIAS) 2120 alias = ( 2121 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2122 or self._parse_string_as_identifier() 2123 ) 2124 2125 index = self._index 2126 if self._match(TokenType.L_PAREN): 2127 columns = self._parse_csv(self._parse_function_parameter) 2128 self._match_r_paren() if columns else self._retreat(index) 2129 else: 2130 columns = None 2131 2132 if not alias and not columns: 2133 return None 2134 2135 return self.expression(exp.TableAlias, this=alias, columns=columns) 2136 2137 def _parse_subquery( 2138 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2139 ) -> t.Optional[exp.Subquery]: 2140 if not this: 2141 return None 2142 2143 return self.expression( 2144 exp.Subquery, 2145 this=this, 2146 pivots=self._parse_pivots(), 2147 alias=self._parse_table_alias() if parse_alias else None, 2148 ) 2149 2150 def _parse_query_modifiers( 2151 self, this: t.Optional[exp.Expression] 2152 ) -> t.Optional[exp.Expression]: 2153 if isinstance(this, self.MODIFIABLES): 2154 for join in iter(self._parse_join, None): 2155 this.append("joins", join) 2156 for lateral in iter(self._parse_lateral, None): 2157 this.append("laterals", lateral) 2158 2159 while True: 2160 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2161 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2162 key, expression = parser(self) 2163 2164 if expression: 2165 this.set(key, expression) 2166 if key == "limit": 2167 offset = expression.args.pop("offset", None) 2168 if offset: 2169 this.set("offset", exp.Offset(expression=offset)) 2170 continue 2171 break 2172 return this 2173 2174 def _parse_hint(self) -> t.Optional[exp.Hint]: 2175 if self._match(TokenType.HINT): 2176 hints = [] 2177 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2178 hints.extend(hint) 2179 2180 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2181 self.raise_error("Expected */ after HINT") 2182 2183 return self.expression(exp.Hint, expressions=hints) 2184 2185 return None 2186 2187 def _parse_into(self) -> t.Optional[exp.Into]: 2188 if not self._match(TokenType.INTO): 2189 return None 2190 2191 temp = self._match(TokenType.TEMPORARY) 2192 unlogged = self._match_text_seq("UNLOGGED") 2193 self._match(TokenType.TABLE) 2194 2195 return self.expression( 2196 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2197 ) 2198 2199 def _parse_from( 2200 self, joins: bool = False, skip_from_token: bool = False 2201 ) -> t.Optional[exp.From]: 2202 if not skip_from_token and not self._match(TokenType.FROM): 2203 return None 2204 2205 return self.expression( 2206 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2207 ) 2208 2209 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2210 if not self._match(TokenType.MATCH_RECOGNIZE): 2211 return None 2212 2213 self._match_l_paren() 2214 2215 partition = self._parse_partition_by() 2216 order = self._parse_order() 2217 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2218 2219 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2220 rows = exp.var("ONE ROW PER MATCH") 2221 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2222 text = "ALL ROWS PER MATCH" 2223 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2224 text += f" SHOW EMPTY MATCHES" 2225 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2226 text += f" OMIT EMPTY MATCHES" 2227 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2228 text += f" WITH UNMATCHED ROWS" 2229 rows = exp.var(text) 2230 else: 2231 rows = None 2232 2233 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2234 text = "AFTER MATCH SKIP" 2235 if self._match_text_seq("PAST", "LAST", "ROW"): 2236 text += f" PAST LAST ROW" 2237 elif self._match_text_seq("TO", "NEXT", "ROW"): 2238 text += f" TO NEXT ROW" 2239 elif self._match_text_seq("TO", "FIRST"): 2240 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2241 elif self._match_text_seq("TO", "LAST"): 2242 text += f" TO LAST {self._advance_any().text}" # type: ignore 2243 after = exp.var(text) 2244 else: 2245 after = None 2246 2247 if self._match_text_seq("PATTERN"): 2248 self._match_l_paren() 2249 2250 if not self._curr: 2251 self.raise_error("Expecting )", self._curr) 2252 2253 paren = 1 2254 start = self._curr 2255 2256 while self._curr and paren > 0: 2257 if self._curr.token_type == TokenType.L_PAREN: 2258 paren += 1 2259 if self._curr.token_type == TokenType.R_PAREN: 2260 paren -= 1 2261 2262 end = self._prev 2263 self._advance() 2264 2265 if paren > 0: 2266 self.raise_error("Expecting )", self._curr) 2267 2268 pattern = exp.var(self._find_sql(start, end)) 2269 else: 2270 pattern = None 2271 2272 define = ( 2273 self._parse_csv( 2274 lambda: self.expression( 2275 exp.Alias, 2276 alias=self._parse_id_var(any_token=True), 2277 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2278 ) 2279 ) 2280 if self._match_text_seq("DEFINE") 2281 else None 2282 ) 2283 2284 self._match_r_paren() 2285 2286 return self.expression( 2287 exp.MatchRecognize, 2288 partition_by=partition, 2289 order=order, 2290 measures=measures, 2291 rows=rows, 2292 after=after, 2293 pattern=pattern, 2294 define=define, 2295 alias=self._parse_table_alias(), 2296 ) 2297 2298 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2299 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2300 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2301 2302 if outer_apply or cross_apply: 2303 this = self._parse_select(table=True) 2304 view = None 2305 outer = not cross_apply 2306 elif self._match(TokenType.LATERAL): 2307 this = self._parse_select(table=True) 2308 view = self._match(TokenType.VIEW) 2309 outer = self._match(TokenType.OUTER) 2310 else: 2311 return None 2312 2313 if not this: 2314 this = ( 2315 self._parse_unnest() 2316 or self._parse_function() 2317 or self._parse_id_var(any_token=False) 2318 ) 2319 2320 while self._match(TokenType.DOT): 2321 this = exp.Dot( 2322 this=this, 2323 expression=self._parse_function() or self._parse_id_var(any_token=False), 2324 ) 2325 2326 if view: 2327 table = self._parse_id_var(any_token=False) 2328 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2329 table_alias: t.Optional[exp.TableAlias] = self.expression( 2330 exp.TableAlias, this=table, columns=columns 2331 ) 2332 elif isinstance(this, exp.Subquery) and this.alias: 2333 # Ensures parity between the Subquery's and the Lateral's "alias" args 2334 table_alias = this.args["alias"].copy() 2335 else: 2336 table_alias = self._parse_table_alias() 2337 2338 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2339 2340 def _parse_join_parts( 2341 self, 2342 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2343 return ( 2344 self._match_set(self.JOIN_METHODS) and self._prev, 2345 self._match_set(self.JOIN_SIDES) and self._prev, 2346 self._match_set(self.JOIN_KINDS) and self._prev, 2347 ) 2348 2349 def _parse_join( 2350 self, skip_join_token: bool = False, parse_bracket: bool = False 2351 ) -> t.Optional[exp.Join]: 2352 if self._match(TokenType.COMMA): 2353 return self.expression(exp.Join, this=self._parse_table()) 2354 2355 index = self._index 2356 method, side, kind = self._parse_join_parts() 2357 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2358 join = self._match(TokenType.JOIN) 2359 2360 if not skip_join_token and not join: 2361 self._retreat(index) 2362 kind = None 2363 method = None 2364 side = None 2365 2366 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2367 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2368 2369 if not skip_join_token and not join and not outer_apply and not cross_apply: 2370 return None 2371 2372 if outer_apply: 2373 side = Token(TokenType.LEFT, "LEFT") 2374 2375 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2376 2377 if method: 2378 kwargs["method"] = method.text 2379 if side: 2380 kwargs["side"] = side.text 2381 if kind: 2382 kwargs["kind"] = kind.text 2383 if hint: 2384 kwargs["hint"] = hint 2385 2386 if self._match(TokenType.ON): 2387 kwargs["on"] = self._parse_conjunction() 2388 elif self._match(TokenType.USING): 2389 kwargs["using"] = self._parse_wrapped_id_vars() 2390 elif not (kind and kind.token_type == TokenType.CROSS): 2391 index = self._index 2392 joins = self._parse_joins() 2393 2394 if joins and self._match(TokenType.ON): 2395 kwargs["on"] = self._parse_conjunction() 2396 elif joins and self._match(TokenType.USING): 2397 kwargs["using"] = self._parse_wrapped_id_vars() 2398 else: 2399 joins = None 2400 self._retreat(index) 2401 2402 kwargs["this"].set("joins", joins) 2403 2404 comments = [c for token in (method, side, kind) if token for c in token.comments] 2405 return self.expression(exp.Join, comments=comments, **kwargs) 2406 2407 def _parse_index( 2408 self, 2409 index: t.Optional[exp.Expression] = None, 2410 ) -> t.Optional[exp.Index]: 2411 if index: 2412 unique = None 2413 primary = None 2414 amp = None 2415 2416 self._match(TokenType.ON) 2417 self._match(TokenType.TABLE) # hive 2418 table = self._parse_table_parts(schema=True) 2419 else: 2420 unique = self._match(TokenType.UNIQUE) 2421 primary = self._match_text_seq("PRIMARY") 2422 amp = self._match_text_seq("AMP") 2423 2424 if not self._match(TokenType.INDEX): 2425 return None 2426 2427 index = self._parse_id_var() 2428 table = None 2429 2430 using = self._parse_field() if self._match(TokenType.USING) else None 2431 2432 if self._match(TokenType.L_PAREN, advance=False): 2433 columns = self._parse_wrapped_csv(self._parse_ordered) 2434 else: 2435 columns = None 2436 2437 return self.expression( 2438 exp.Index, 2439 this=index, 2440 table=table, 2441 using=using, 2442 columns=columns, 2443 unique=unique, 2444 primary=primary, 2445 amp=amp, 2446 partition_by=self._parse_partition_by(), 2447 ) 2448 2449 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2450 hints: t.List[exp.Expression] = [] 2451 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2452 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2453 hints.append( 2454 self.expression( 2455 exp.WithTableHint, 2456 expressions=self._parse_csv( 2457 lambda: self._parse_function() or self._parse_var(any_token=True) 2458 ), 2459 ) 2460 ) 2461 self._match_r_paren() 2462 else: 2463 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2464 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2465 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2466 2467 self._match_texts({"INDEX", "KEY"}) 2468 if self._match(TokenType.FOR): 2469 hint.set("target", self._advance_any() and self._prev.text.upper()) 2470 2471 hint.set("expressions", self._parse_wrapped_id_vars()) 2472 hints.append(hint) 2473 2474 return hints or None 2475 2476 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2477 return ( 2478 (not schema and self._parse_function(optional_parens=False)) 2479 or self._parse_id_var(any_token=False) 2480 or self._parse_string_as_identifier() 2481 or self._parse_placeholder() 2482 ) 2483 2484 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2485 catalog = None 2486 db = None 2487 table = self._parse_table_part(schema=schema) 2488 2489 while self._match(TokenType.DOT): 2490 if catalog: 2491 # This allows nesting the table in arbitrarily many dot expressions if needed 2492 table = self.expression( 2493 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2494 ) 2495 else: 2496 catalog = db 2497 db = table 2498 table = self._parse_table_part(schema=schema) 2499 2500 if not table: 2501 self.raise_error(f"Expected table name but got {self._curr}") 2502 2503 return self.expression( 2504 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2505 ) 2506 2507 def _parse_table( 2508 self, 2509 schema: bool = False, 2510 joins: bool = False, 2511 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2512 parse_bracket: bool = False, 2513 ) -> t.Optional[exp.Expression]: 2514 lateral = self._parse_lateral() 2515 if lateral: 2516 return lateral 2517 2518 unnest = self._parse_unnest() 2519 if unnest: 2520 return unnest 2521 2522 values = self._parse_derived_table_values() 2523 if values: 2524 return values 2525 2526 subquery = self._parse_select(table=True) 2527 if subquery: 2528 if not subquery.args.get("pivots"): 2529 subquery.set("pivots", self._parse_pivots()) 2530 return subquery 2531 2532 bracket = parse_bracket and self._parse_bracket(None) 2533 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2534 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2535 2536 if schema: 2537 return self._parse_schema(this=this) 2538 2539 if self.ALIAS_POST_TABLESAMPLE: 2540 table_sample = self._parse_table_sample() 2541 2542 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2543 if alias: 2544 this.set("alias", alias) 2545 2546 if not this.args.get("pivots"): 2547 this.set("pivots", self._parse_pivots()) 2548 2549 this.set("hints", self._parse_table_hints()) 2550 2551 if not self.ALIAS_POST_TABLESAMPLE: 2552 table_sample = self._parse_table_sample() 2553 2554 if table_sample: 2555 table_sample.set("this", this) 2556 this = table_sample 2557 2558 if joins: 2559 for join in iter(self._parse_join, None): 2560 this.append("joins", join) 2561 2562 return this 2563 2564 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2565 if not self._match(TokenType.UNNEST): 2566 return None 2567 2568 expressions = self._parse_wrapped_csv(self._parse_type) 2569 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2570 2571 alias = self._parse_table_alias() if with_alias else None 2572 2573 if alias and self.UNNEST_COLUMN_ONLY: 2574 if alias.args.get("columns"): 2575 self.raise_error("Unexpected extra column alias in unnest.") 2576 2577 alias.set("columns", [alias.this]) 2578 alias.set("this", None) 2579 2580 offset = None 2581 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2582 self._match(TokenType.ALIAS) 2583 offset = self._parse_id_var() or exp.to_identifier("offset") 2584 2585 return self.expression( 2586 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2587 ) 2588 2589 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2590 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2591 if not is_derived and not self._match(TokenType.VALUES): 2592 return None 2593 2594 expressions = self._parse_csv(self._parse_value) 2595 alias = self._parse_table_alias() 2596 2597 if is_derived: 2598 self._match_r_paren() 2599 2600 return self.expression( 2601 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2602 ) 2603 2604 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2605 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2606 as_modifier and self._match_text_seq("USING", "SAMPLE") 2607 ): 2608 return None 2609 2610 bucket_numerator = None 2611 bucket_denominator = None 2612 bucket_field = None 2613 percent = None 2614 rows = None 2615 size = None 2616 seed = None 2617 2618 kind = ( 2619 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2620 ) 2621 method = self._parse_var(tokens=(TokenType.ROW,)) 2622 2623 self._match(TokenType.L_PAREN) 2624 2625 num = self._parse_number() 2626 2627 if self._match_text_seq("BUCKET"): 2628 bucket_numerator = self._parse_number() 2629 self._match_text_seq("OUT", "OF") 2630 bucket_denominator = bucket_denominator = self._parse_number() 2631 self._match(TokenType.ON) 2632 bucket_field = self._parse_field() 2633 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2634 percent = num 2635 elif self._match(TokenType.ROWS): 2636 rows = num 2637 else: 2638 size = num 2639 2640 self._match(TokenType.R_PAREN) 2641 2642 if self._match(TokenType.L_PAREN): 2643 method = self._parse_var() 2644 seed = self._match(TokenType.COMMA) and self._parse_number() 2645 self._match_r_paren() 2646 elif self._match_texts(("SEED", "REPEATABLE")): 2647 seed = self._parse_wrapped(self._parse_number) 2648 2649 return self.expression( 2650 exp.TableSample, 2651 method=method, 2652 bucket_numerator=bucket_numerator, 2653 bucket_denominator=bucket_denominator, 2654 bucket_field=bucket_field, 2655 percent=percent, 2656 rows=rows, 2657 size=size, 2658 seed=seed, 2659 kind=kind, 2660 ) 2661 2662 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2663 return list(iter(self._parse_pivot, None)) or None 2664 2665 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2666 return list(iter(self._parse_join, None)) or None 2667 2668 # https://duckdb.org/docs/sql/statements/pivot 2669 def _parse_simplified_pivot(self) -> exp.Pivot: 2670 def _parse_on() -> t.Optional[exp.Expression]: 2671 this = self._parse_bitwise() 2672 return self._parse_in(this) if self._match(TokenType.IN) else this 2673 2674 this = self._parse_table() 2675 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2676 using = self._match(TokenType.USING) and self._parse_csv( 2677 lambda: self._parse_alias(self._parse_function()) 2678 ) 2679 group = self._parse_group() 2680 return self.expression( 2681 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2682 ) 2683 2684 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2685 index = self._index 2686 include_nulls = None 2687 2688 if self._match(TokenType.PIVOT): 2689 unpivot = False 2690 elif self._match(TokenType.UNPIVOT): 2691 unpivot = True 2692 2693 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2694 if self._match_text_seq("INCLUDE", "NULLS"): 2695 include_nulls = True 2696 elif self._match_text_seq("EXCLUDE", "NULLS"): 2697 include_nulls = False 2698 else: 2699 return None 2700 2701 expressions = [] 2702 field = None 2703 2704 if not self._match(TokenType.L_PAREN): 2705 self._retreat(index) 2706 return None 2707 2708 if unpivot: 2709 expressions = self._parse_csv(self._parse_column) 2710 else: 2711 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2712 2713 if not expressions: 2714 self.raise_error("Failed to parse PIVOT's aggregation list") 2715 2716 if not self._match(TokenType.FOR): 2717 self.raise_error("Expecting FOR") 2718 2719 value = self._parse_column() 2720 2721 if not self._match(TokenType.IN): 2722 self.raise_error("Expecting IN") 2723 2724 field = self._parse_in(value, alias=True) 2725 2726 self._match_r_paren() 2727 2728 pivot = self.expression( 2729 exp.Pivot, 2730 expressions=expressions, 2731 field=field, 2732 unpivot=unpivot, 2733 include_nulls=include_nulls, 2734 ) 2735 2736 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2737 pivot.set("alias", self._parse_table_alias()) 2738 2739 if not unpivot: 2740 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2741 2742 columns: t.List[exp.Expression] = [] 2743 for fld in pivot.args["field"].expressions: 2744 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2745 for name in names: 2746 if self.PREFIXED_PIVOT_COLUMNS: 2747 name = f"{name}_{field_name}" if name else field_name 2748 else: 2749 name = f"{field_name}_{name}" if name else field_name 2750 2751 columns.append(exp.to_identifier(name)) 2752 2753 pivot.set("columns", columns) 2754 2755 return pivot 2756 2757 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2758 return [agg.alias for agg in aggregations] 2759 2760 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2761 if not skip_where_token and not self._match(TokenType.WHERE): 2762 return None 2763 2764 return self.expression( 2765 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2766 ) 2767 2768 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2769 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2770 return None 2771 2772 elements = defaultdict(list) 2773 2774 if self._match(TokenType.ALL): 2775 return self.expression(exp.Group, all=True) 2776 2777 while True: 2778 expressions = self._parse_csv(self._parse_conjunction) 2779 if expressions: 2780 elements["expressions"].extend(expressions) 2781 2782 grouping_sets = self._parse_grouping_sets() 2783 if grouping_sets: 2784 elements["grouping_sets"].extend(grouping_sets) 2785 2786 rollup = None 2787 cube = None 2788 totals = None 2789 2790 with_ = self._match(TokenType.WITH) 2791 if self._match(TokenType.ROLLUP): 2792 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2793 elements["rollup"].extend(ensure_list(rollup)) 2794 2795 if self._match(TokenType.CUBE): 2796 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2797 elements["cube"].extend(ensure_list(cube)) 2798 2799 if self._match_text_seq("TOTALS"): 2800 totals = True 2801 elements["totals"] = True # type: ignore 2802 2803 if not (grouping_sets or rollup or cube or totals): 2804 break 2805 2806 return self.expression(exp.Group, **elements) # type: ignore 2807 2808 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2809 if not self._match(TokenType.GROUPING_SETS): 2810 return None 2811 2812 return self._parse_wrapped_csv(self._parse_grouping_set) 2813 2814 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2815 if self._match(TokenType.L_PAREN): 2816 grouping_set = self._parse_csv(self._parse_column) 2817 self._match_r_paren() 2818 return self.expression(exp.Tuple, expressions=grouping_set) 2819 2820 return self._parse_column() 2821 2822 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2823 if not skip_having_token and not self._match(TokenType.HAVING): 2824 return None 2825 return self.expression(exp.Having, this=self._parse_conjunction()) 2826 2827 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2828 if not self._match(TokenType.QUALIFY): 2829 return None 2830 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2831 2832 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2833 if skip_start_token: 2834 start = None 2835 elif self._match(TokenType.START_WITH): 2836 start = self._parse_conjunction() 2837 else: 2838 return None 2839 2840 self._match(TokenType.CONNECT_BY) 2841 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2842 exp.Prior, this=self._parse_bitwise() 2843 ) 2844 connect = self._parse_conjunction() 2845 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2846 return self.expression(exp.Connect, start=start, connect=connect) 2847 2848 def _parse_order( 2849 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2850 ) -> t.Optional[exp.Expression]: 2851 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2852 return this 2853 2854 return self.expression( 2855 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2856 ) 2857 2858 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2859 if not self._match(token): 2860 return None 2861 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2862 2863 def _parse_ordered(self) -> exp.Ordered: 2864 this = self._parse_conjunction() 2865 self._match(TokenType.ASC) 2866 2867 is_desc = self._match(TokenType.DESC) 2868 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2869 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2870 desc = is_desc or False 2871 asc = not desc 2872 nulls_first = is_nulls_first or False 2873 explicitly_null_ordered = is_nulls_first or is_nulls_last 2874 2875 if ( 2876 not explicitly_null_ordered 2877 and ( 2878 (asc and self.NULL_ORDERING == "nulls_are_small") 2879 or (desc and self.NULL_ORDERING != "nulls_are_small") 2880 ) 2881 and self.NULL_ORDERING != "nulls_are_last" 2882 ): 2883 nulls_first = True 2884 2885 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2886 2887 def _parse_limit( 2888 self, this: t.Optional[exp.Expression] = None, top: bool = False 2889 ) -> t.Optional[exp.Expression]: 2890 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2891 comments = self._prev_comments 2892 if top: 2893 limit_paren = self._match(TokenType.L_PAREN) 2894 expression = self._parse_number() 2895 2896 if limit_paren: 2897 self._match_r_paren() 2898 else: 2899 expression = self._parse_term() 2900 2901 if self._match(TokenType.COMMA): 2902 offset = expression 2903 expression = self._parse_term() 2904 else: 2905 offset = None 2906 2907 limit_exp = self.expression( 2908 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2909 ) 2910 2911 return limit_exp 2912 2913 if self._match(TokenType.FETCH): 2914 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2915 direction = self._prev.text if direction else "FIRST" 2916 2917 count = self._parse_number() 2918 percent = self._match(TokenType.PERCENT) 2919 2920 self._match_set((TokenType.ROW, TokenType.ROWS)) 2921 2922 only = self._match_text_seq("ONLY") 2923 with_ties = self._match_text_seq("WITH", "TIES") 2924 2925 if only and with_ties: 2926 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2927 2928 return self.expression( 2929 exp.Fetch, 2930 direction=direction, 2931 count=count, 2932 percent=percent, 2933 with_ties=with_ties, 2934 ) 2935 2936 return this 2937 2938 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2939 if not self._match(TokenType.OFFSET): 2940 return this 2941 2942 count = self._parse_term() 2943 self._match_set((TokenType.ROW, TokenType.ROWS)) 2944 return self.expression(exp.Offset, this=this, expression=count) 2945 2946 def _parse_locks(self) -> t.List[exp.Lock]: 2947 locks = [] 2948 while True: 2949 if self._match_text_seq("FOR", "UPDATE"): 2950 update = True 2951 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2952 "LOCK", "IN", "SHARE", "MODE" 2953 ): 2954 update = False 2955 else: 2956 break 2957 2958 expressions = None 2959 if self._match_text_seq("OF"): 2960 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2961 2962 wait: t.Optional[bool | exp.Expression] = None 2963 if self._match_text_seq("NOWAIT"): 2964 wait = True 2965 elif self._match_text_seq("WAIT"): 2966 wait = self._parse_primary() 2967 elif self._match_text_seq("SKIP", "LOCKED"): 2968 wait = False 2969 2970 locks.append( 2971 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2972 ) 2973 2974 return locks 2975 2976 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2977 if not self._match_set(self.SET_OPERATIONS): 2978 return this 2979 2980 token_type = self._prev.token_type 2981 2982 if token_type == TokenType.UNION: 2983 expression = exp.Union 2984 elif token_type == TokenType.EXCEPT: 2985 expression = exp.Except 2986 else: 2987 expression = exp.Intersect 2988 2989 return self.expression( 2990 expression, 2991 this=this, 2992 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2993 by_name=self._match_text_seq("BY", "NAME"), 2994 expression=self._parse_set_operations(self._parse_select(nested=True)), 2995 ) 2996 2997 def _parse_expression(self) -> t.Optional[exp.Expression]: 2998 return self._parse_alias(self._parse_conjunction()) 2999 3000 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3001 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3002 3003 def _parse_equality(self) -> t.Optional[exp.Expression]: 3004 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3005 3006 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3007 return self._parse_tokens(self._parse_range, self.COMPARISON) 3008 3009 def _parse_range(self) -> t.Optional[exp.Expression]: 3010 this = self._parse_bitwise() 3011 negate = self._match(TokenType.NOT) 3012 3013 if self._match_set(self.RANGE_PARSERS): 3014 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3015 if not expression: 3016 return this 3017 3018 this = expression 3019 elif self._match(TokenType.ISNULL): 3020 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3021 3022 # Postgres supports ISNULL and NOTNULL for conditions. 3023 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3024 if self._match(TokenType.NOTNULL): 3025 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3026 this = self.expression(exp.Not, this=this) 3027 3028 if negate: 3029 this = self.expression(exp.Not, this=this) 3030 3031 if self._match(TokenType.IS): 3032 this = self._parse_is(this) 3033 3034 return this 3035 3036 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3037 index = self._index - 1 3038 negate = self._match(TokenType.NOT) 3039 3040 if self._match_text_seq("DISTINCT", "FROM"): 3041 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3042 return self.expression(klass, this=this, expression=self._parse_expression()) 3043 3044 expression = self._parse_null() or self._parse_boolean() 3045 if not expression: 3046 self._retreat(index) 3047 return None 3048 3049 this = self.expression(exp.Is, this=this, expression=expression) 3050 return self.expression(exp.Not, this=this) if negate else this 3051 3052 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3053 unnest = self._parse_unnest(with_alias=False) 3054 if unnest: 3055 this = self.expression(exp.In, this=this, unnest=unnest) 3056 elif self._match(TokenType.L_PAREN): 3057 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3058 3059 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3060 this = self.expression(exp.In, this=this, query=expressions[0]) 3061 else: 3062 this = self.expression(exp.In, this=this, expressions=expressions) 3063 3064 self._match_r_paren(this) 3065 else: 3066 this = self.expression(exp.In, this=this, field=self._parse_field()) 3067 3068 return this 3069 3070 def _parse_between(self, this: exp.Expression) -> exp.Between: 3071 low = self._parse_bitwise() 3072 self._match(TokenType.AND) 3073 high = self._parse_bitwise() 3074 return self.expression(exp.Between, this=this, low=low, high=high) 3075 3076 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3077 if not self._match(TokenType.ESCAPE): 3078 return this 3079 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3080 3081 def _parse_interval(self) -> t.Optional[exp.Interval]: 3082 index = self._index 3083 3084 if not self._match(TokenType.INTERVAL): 3085 return None 3086 3087 if self._match(TokenType.STRING, advance=False): 3088 this = self._parse_primary() 3089 else: 3090 this = self._parse_term() 3091 3092 if not this: 3093 self._retreat(index) 3094 return None 3095 3096 unit = self._parse_function() or self._parse_var(any_token=True) 3097 3098 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3099 # each INTERVAL expression into this canonical form so it's easy to transpile 3100 if this and this.is_number: 3101 this = exp.Literal.string(this.name) 3102 elif this and this.is_string: 3103 parts = this.name.split() 3104 3105 if len(parts) == 2: 3106 if unit: 3107 # this is not actually a unit, it's something else 3108 unit = None 3109 self._retreat(self._index - 1) 3110 else: 3111 this = exp.Literal.string(parts[0]) 3112 unit = self.expression(exp.Var, this=parts[1]) 3113 3114 return self.expression(exp.Interval, this=this, unit=unit) 3115 3116 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3117 this = self._parse_term() 3118 3119 while True: 3120 if self._match_set(self.BITWISE): 3121 this = self.expression( 3122 self.BITWISE[self._prev.token_type], 3123 this=this, 3124 expression=self._parse_term(), 3125 ) 3126 elif self._match(TokenType.DQMARK): 3127 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3128 elif self._match_pair(TokenType.LT, TokenType.LT): 3129 this = self.expression( 3130 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3131 ) 3132 elif self._match_pair(TokenType.GT, TokenType.GT): 3133 this = self.expression( 3134 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3135 ) 3136 else: 3137 break 3138 3139 return this 3140 3141 def _parse_term(self) -> t.Optional[exp.Expression]: 3142 return self._parse_tokens(self._parse_factor, self.TERM) 3143 3144 def _parse_factor(self) -> t.Optional[exp.Expression]: 3145 return self._parse_tokens(self._parse_unary, self.FACTOR) 3146 3147 def _parse_unary(self) -> t.Optional[exp.Expression]: 3148 if self._match_set(self.UNARY_PARSERS): 3149 return self.UNARY_PARSERS[self._prev.token_type](self) 3150 return self._parse_at_time_zone(self._parse_type()) 3151 3152 def _parse_type(self) -> t.Optional[exp.Expression]: 3153 interval = self._parse_interval() 3154 if interval: 3155 return interval 3156 3157 index = self._index 3158 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3159 this = self._parse_column() 3160 3161 if data_type: 3162 if isinstance(this, exp.Literal): 3163 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3164 if parser: 3165 return parser(self, this, data_type) 3166 return self.expression(exp.Cast, this=this, to=data_type) 3167 if not data_type.expressions: 3168 self._retreat(index) 3169 return self._parse_column() 3170 return self._parse_column_ops(data_type) 3171 3172 return this 3173 3174 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3175 this = self._parse_type() 3176 if not this: 3177 return None 3178 3179 return self.expression( 3180 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3181 ) 3182 3183 def _parse_types( 3184 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3185 ) -> t.Optional[exp.Expression]: 3186 index = self._index 3187 3188 prefix = self._match_text_seq("SYSUDTLIB", ".") 3189 3190 if not self._match_set(self.TYPE_TOKENS): 3191 identifier = allow_identifiers and self._parse_id_var( 3192 any_token=False, tokens=(TokenType.VAR,) 3193 ) 3194 3195 if identifier: 3196 tokens = self._tokenizer.tokenize(identifier.name) 3197 3198 if len(tokens) != 1: 3199 self.raise_error("Unexpected identifier", self._prev) 3200 3201 if tokens[0].token_type in self.TYPE_TOKENS: 3202 self._prev = tokens[0] 3203 elif self.SUPPORTS_USER_DEFINED_TYPES: 3204 return identifier 3205 else: 3206 return None 3207 else: 3208 return None 3209 3210 type_token = self._prev.token_type 3211 3212 if type_token == TokenType.PSEUDO_TYPE: 3213 return self.expression(exp.PseudoType, this=self._prev.text) 3214 3215 nested = type_token in self.NESTED_TYPE_TOKENS 3216 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3217 expressions = None 3218 maybe_func = False 3219 3220 if self._match(TokenType.L_PAREN): 3221 if is_struct: 3222 expressions = self._parse_csv(self._parse_struct_types) 3223 elif nested: 3224 expressions = self._parse_csv( 3225 lambda: self._parse_types( 3226 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3227 ) 3228 ) 3229 elif type_token in self.ENUM_TYPE_TOKENS: 3230 expressions = self._parse_csv(self._parse_equality) 3231 else: 3232 expressions = self._parse_csv(self._parse_type_size) 3233 3234 if not expressions or not self._match(TokenType.R_PAREN): 3235 self._retreat(index) 3236 return None 3237 3238 maybe_func = True 3239 3240 this: t.Optional[exp.Expression] = None 3241 values: t.Optional[t.List[exp.Expression]] = None 3242 3243 if nested and self._match(TokenType.LT): 3244 if is_struct: 3245 expressions = self._parse_csv(self._parse_struct_types) 3246 else: 3247 expressions = self._parse_csv( 3248 lambda: self._parse_types( 3249 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3250 ) 3251 ) 3252 3253 if not self._match(TokenType.GT): 3254 self.raise_error("Expecting >") 3255 3256 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3257 values = self._parse_csv(self._parse_conjunction) 3258 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3259 3260 if type_token in self.TIMESTAMPS: 3261 if self._match_text_seq("WITH", "TIME", "ZONE"): 3262 maybe_func = False 3263 tz_type = ( 3264 exp.DataType.Type.TIMETZ 3265 if type_token in self.TIMES 3266 else exp.DataType.Type.TIMESTAMPTZ 3267 ) 3268 this = exp.DataType(this=tz_type, expressions=expressions) 3269 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3270 maybe_func = False 3271 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3272 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3273 maybe_func = False 3274 elif type_token == TokenType.INTERVAL: 3275 if self._match_text_seq("YEAR", "TO", "MONTH"): 3276 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3277 elif self._match_text_seq("DAY", "TO", "SECOND"): 3278 span = [exp.IntervalDayToSecondSpan()] 3279 else: 3280 span = None 3281 3282 unit = not span and self._parse_var() 3283 if not unit: 3284 this = self.expression( 3285 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3286 ) 3287 else: 3288 this = self.expression(exp.Interval, unit=unit) 3289 3290 if maybe_func and check_func: 3291 index2 = self._index 3292 peek = self._parse_string() 3293 3294 if not peek: 3295 self._retreat(index) 3296 return None 3297 3298 self._retreat(index2) 3299 3300 if not this: 3301 this = exp.DataType( 3302 this=exp.DataType.Type[type_token.value], 3303 expressions=expressions, 3304 nested=nested, 3305 values=values, 3306 prefix=prefix, 3307 ) 3308 3309 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3310 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3311 3312 return this 3313 3314 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3315 this = self._parse_type() or self._parse_id_var() 3316 self._match(TokenType.COLON) 3317 return self._parse_column_def(this) 3318 3319 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3320 if not self._match_text_seq("AT", "TIME", "ZONE"): 3321 return this 3322 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3323 3324 def _parse_column(self) -> t.Optional[exp.Expression]: 3325 this = self._parse_field() 3326 if isinstance(this, exp.Identifier): 3327 this = self.expression(exp.Column, this=this) 3328 elif not this: 3329 return self._parse_bracket(this) 3330 return self._parse_column_ops(this) 3331 3332 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3333 this = self._parse_bracket(this) 3334 3335 while self._match_set(self.COLUMN_OPERATORS): 3336 op_token = self._prev.token_type 3337 op = self.COLUMN_OPERATORS.get(op_token) 3338 3339 if op_token == TokenType.DCOLON: 3340 field = self._parse_types() 3341 if not field: 3342 self.raise_error("Expected type") 3343 elif op and self._curr: 3344 self._advance() 3345 value = self._prev.text 3346 field = ( 3347 exp.Literal.number(value) 3348 if self._prev.token_type == TokenType.NUMBER 3349 else exp.Literal.string(value) 3350 ) 3351 else: 3352 field = self._parse_field(anonymous_func=True, any_token=True) 3353 3354 if isinstance(field, exp.Func): 3355 # bigquery allows function calls like x.y.count(...) 3356 # SAFE.SUBSTR(...) 3357 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3358 this = self._replace_columns_with_dots(this) 3359 3360 if op: 3361 this = op(self, this, field) 3362 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3363 this = self.expression( 3364 exp.Column, 3365 this=field, 3366 table=this.this, 3367 db=this.args.get("table"), 3368 catalog=this.args.get("db"), 3369 ) 3370 else: 3371 this = self.expression(exp.Dot, this=this, expression=field) 3372 this = self._parse_bracket(this) 3373 return this 3374 3375 def _parse_primary(self) -> t.Optional[exp.Expression]: 3376 if self._match_set(self.PRIMARY_PARSERS): 3377 token_type = self._prev.token_type 3378 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3379 3380 if token_type == TokenType.STRING: 3381 expressions = [primary] 3382 while self._match(TokenType.STRING): 3383 expressions.append(exp.Literal.string(self._prev.text)) 3384 3385 if len(expressions) > 1: 3386 return self.expression(exp.Concat, expressions=expressions) 3387 3388 return primary 3389 3390 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3391 return exp.Literal.number(f"0.{self._prev.text}") 3392 3393 if self._match(TokenType.L_PAREN): 3394 comments = self._prev_comments 3395 query = self._parse_select() 3396 3397 if query: 3398 expressions = [query] 3399 else: 3400 expressions = self._parse_expressions() 3401 3402 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3403 3404 if isinstance(this, exp.Subqueryable): 3405 this = self._parse_set_operations( 3406 self._parse_subquery(this=this, parse_alias=False) 3407 ) 3408 elif len(expressions) > 1: 3409 this = self.expression(exp.Tuple, expressions=expressions) 3410 else: 3411 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3412 3413 if this: 3414 this.add_comments(comments) 3415 3416 self._match_r_paren(expression=this) 3417 return this 3418 3419 return None 3420 3421 def _parse_field( 3422 self, 3423 any_token: bool = False, 3424 tokens: t.Optional[t.Collection[TokenType]] = None, 3425 anonymous_func: bool = False, 3426 ) -> t.Optional[exp.Expression]: 3427 return ( 3428 self._parse_primary() 3429 or self._parse_function(anonymous=anonymous_func) 3430 or self._parse_id_var(any_token=any_token, tokens=tokens) 3431 ) 3432 3433 def _parse_function( 3434 self, 3435 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3436 anonymous: bool = False, 3437 optional_parens: bool = True, 3438 ) -> t.Optional[exp.Expression]: 3439 if not self._curr: 3440 return None 3441 3442 token_type = self._curr.token_type 3443 this = self._curr.text 3444 upper = this.upper() 3445 3446 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3447 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3448 self._advance() 3449 return parser(self) 3450 3451 if not self._next or self._next.token_type != TokenType.L_PAREN: 3452 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3453 self._advance() 3454 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3455 3456 return None 3457 3458 if token_type not in self.FUNC_TOKENS: 3459 return None 3460 3461 self._advance(2) 3462 3463 parser = self.FUNCTION_PARSERS.get(upper) 3464 if parser and not anonymous: 3465 this = parser(self) 3466 else: 3467 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3468 3469 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3470 this = self.expression(subquery_predicate, this=self._parse_select()) 3471 self._match_r_paren() 3472 return this 3473 3474 if functions is None: 3475 functions = self.FUNCTIONS 3476 3477 function = functions.get(upper) 3478 3479 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3480 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3481 3482 if function and not anonymous: 3483 func = self.validate_expression(function(args), args) 3484 if not self.NORMALIZE_FUNCTIONS: 3485 func.meta["name"] = this 3486 this = func 3487 else: 3488 this = self.expression(exp.Anonymous, this=this, expressions=args) 3489 3490 self._match_r_paren(this) 3491 return self._parse_window(this) 3492 3493 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3494 return self._parse_column_def(self._parse_id_var()) 3495 3496 def _parse_user_defined_function( 3497 self, kind: t.Optional[TokenType] = None 3498 ) -> t.Optional[exp.Expression]: 3499 this = self._parse_id_var() 3500 3501 while self._match(TokenType.DOT): 3502 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3503 3504 if not self._match(TokenType.L_PAREN): 3505 return this 3506 3507 expressions = self._parse_csv(self._parse_function_parameter) 3508 self._match_r_paren() 3509 return self.expression( 3510 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3511 ) 3512 3513 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3514 literal = self._parse_primary() 3515 if literal: 3516 return self.expression(exp.Introducer, this=token.text, expression=literal) 3517 3518 return self.expression(exp.Identifier, this=token.text) 3519 3520 def _parse_session_parameter(self) -> exp.SessionParameter: 3521 kind = None 3522 this = self._parse_id_var() or self._parse_primary() 3523 3524 if this and self._match(TokenType.DOT): 3525 kind = this.name 3526 this = self._parse_var() or self._parse_primary() 3527 3528 return self.expression(exp.SessionParameter, this=this, kind=kind) 3529 3530 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3531 index = self._index 3532 3533 if self._match(TokenType.L_PAREN): 3534 expressions = t.cast( 3535 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3536 ) 3537 3538 if not self._match(TokenType.R_PAREN): 3539 self._retreat(index) 3540 else: 3541 expressions = [self._parse_id_var()] 3542 3543 if self._match_set(self.LAMBDAS): 3544 return self.LAMBDAS[self._prev.token_type](self, expressions) 3545 3546 self._retreat(index) 3547 3548 this: t.Optional[exp.Expression] 3549 3550 if self._match(TokenType.DISTINCT): 3551 this = self.expression( 3552 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3553 ) 3554 else: 3555 this = self._parse_select_or_expression(alias=alias) 3556 3557 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3558 3559 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3560 index = self._index 3561 3562 if not self.errors: 3563 try: 3564 if self._parse_select(nested=True): 3565 return this 3566 except ParseError: 3567 pass 3568 finally: 3569 self.errors.clear() 3570 self._retreat(index) 3571 3572 if not self._match(TokenType.L_PAREN): 3573 return this 3574 3575 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3576 3577 self._match_r_paren() 3578 return self.expression(exp.Schema, this=this, expressions=args) 3579 3580 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3581 return self._parse_column_def(self._parse_field(any_token=True)) 3582 3583 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3584 # column defs are not really columns, they're identifiers 3585 if isinstance(this, exp.Column): 3586 this = this.this 3587 3588 kind = self._parse_types(schema=True) 3589 3590 if self._match_text_seq("FOR", "ORDINALITY"): 3591 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3592 3593 constraints: t.List[exp.Expression] = [] 3594 3595 if not kind and self._match(TokenType.ALIAS): 3596 constraints.append( 3597 self.expression( 3598 exp.ComputedColumnConstraint, 3599 this=self._parse_conjunction(), 3600 persisted=self._match_text_seq("PERSISTED"), 3601 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3602 ) 3603 ) 3604 3605 while True: 3606 constraint = self._parse_column_constraint() 3607 if not constraint: 3608 break 3609 constraints.append(constraint) 3610 3611 if not kind and not constraints: 3612 return this 3613 3614 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3615 3616 def _parse_auto_increment( 3617 self, 3618 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3619 start = None 3620 increment = None 3621 3622 if self._match(TokenType.L_PAREN, advance=False): 3623 args = self._parse_wrapped_csv(self._parse_bitwise) 3624 start = seq_get(args, 0) 3625 increment = seq_get(args, 1) 3626 elif self._match_text_seq("START"): 3627 start = self._parse_bitwise() 3628 self._match_text_seq("INCREMENT") 3629 increment = self._parse_bitwise() 3630 3631 if start and increment: 3632 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3633 3634 return exp.AutoIncrementColumnConstraint() 3635 3636 def _parse_compress(self) -> exp.CompressColumnConstraint: 3637 if self._match(TokenType.L_PAREN, advance=False): 3638 return self.expression( 3639 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3640 ) 3641 3642 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3643 3644 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3645 if self._match_text_seq("BY", "DEFAULT"): 3646 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3647 this = self.expression( 3648 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3649 ) 3650 else: 3651 self._match_text_seq("ALWAYS") 3652 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3653 3654 self._match(TokenType.ALIAS) 3655 identity = self._match_text_seq("IDENTITY") 3656 3657 if self._match(TokenType.L_PAREN): 3658 if self._match(TokenType.START_WITH): 3659 this.set("start", self._parse_bitwise()) 3660 if self._match_text_seq("INCREMENT", "BY"): 3661 this.set("increment", self._parse_bitwise()) 3662 if self._match_text_seq("MINVALUE"): 3663 this.set("minvalue", self._parse_bitwise()) 3664 if self._match_text_seq("MAXVALUE"): 3665 this.set("maxvalue", self._parse_bitwise()) 3666 3667 if self._match_text_seq("CYCLE"): 3668 this.set("cycle", True) 3669 elif self._match_text_seq("NO", "CYCLE"): 3670 this.set("cycle", False) 3671 3672 if not identity: 3673 this.set("expression", self._parse_bitwise()) 3674 3675 self._match_r_paren() 3676 3677 return this 3678 3679 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3680 self._match_text_seq("LENGTH") 3681 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3682 3683 def _parse_not_constraint( 3684 self, 3685 ) -> t.Optional[exp.Expression]: 3686 if self._match_text_seq("NULL"): 3687 return self.expression(exp.NotNullColumnConstraint) 3688 if self._match_text_seq("CASESPECIFIC"): 3689 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3690 if self._match_text_seq("FOR", "REPLICATION"): 3691 return self.expression(exp.NotForReplicationColumnConstraint) 3692 return None 3693 3694 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3695 if self._match(TokenType.CONSTRAINT): 3696 this = self._parse_id_var() 3697 else: 3698 this = None 3699 3700 if self._match_texts(self.CONSTRAINT_PARSERS): 3701 return self.expression( 3702 exp.ColumnConstraint, 3703 this=this, 3704 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3705 ) 3706 3707 return this 3708 3709 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3710 if not self._match(TokenType.CONSTRAINT): 3711 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3712 3713 this = self._parse_id_var() 3714 expressions = [] 3715 3716 while True: 3717 constraint = self._parse_unnamed_constraint() or self._parse_function() 3718 if not constraint: 3719 break 3720 expressions.append(constraint) 3721 3722 return self.expression(exp.Constraint, this=this, expressions=expressions) 3723 3724 def _parse_unnamed_constraint( 3725 self, constraints: t.Optional[t.Collection[str]] = None 3726 ) -> t.Optional[exp.Expression]: 3727 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3728 return None 3729 3730 constraint = self._prev.text.upper() 3731 if constraint not in self.CONSTRAINT_PARSERS: 3732 self.raise_error(f"No parser found for schema constraint {constraint}.") 3733 3734 return self.CONSTRAINT_PARSERS[constraint](self) 3735 3736 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3737 self._match_text_seq("KEY") 3738 return self.expression( 3739 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3740 ) 3741 3742 def _parse_key_constraint_options(self) -> t.List[str]: 3743 options = [] 3744 while True: 3745 if not self._curr: 3746 break 3747 3748 if self._match(TokenType.ON): 3749 action = None 3750 on = self._advance_any() and self._prev.text 3751 3752 if self._match_text_seq("NO", "ACTION"): 3753 action = "NO ACTION" 3754 elif self._match_text_seq("CASCADE"): 3755 action = "CASCADE" 3756 elif self._match_pair(TokenType.SET, TokenType.NULL): 3757 action = "SET NULL" 3758 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3759 action = "SET DEFAULT" 3760 else: 3761 self.raise_error("Invalid key constraint") 3762 3763 options.append(f"ON {on} {action}") 3764 elif self._match_text_seq("NOT", "ENFORCED"): 3765 options.append("NOT ENFORCED") 3766 elif self._match_text_seq("DEFERRABLE"): 3767 options.append("DEFERRABLE") 3768 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3769 options.append("INITIALLY DEFERRED") 3770 elif self._match_text_seq("NORELY"): 3771 options.append("NORELY") 3772 elif self._match_text_seq("MATCH", "FULL"): 3773 options.append("MATCH FULL") 3774 else: 3775 break 3776 3777 return options 3778 3779 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3780 if match and not self._match(TokenType.REFERENCES): 3781 return None 3782 3783 expressions = None 3784 this = self._parse_table(schema=True) 3785 options = self._parse_key_constraint_options() 3786 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3787 3788 def _parse_foreign_key(self) -> exp.ForeignKey: 3789 expressions = self._parse_wrapped_id_vars() 3790 reference = self._parse_references() 3791 options = {} 3792 3793 while self._match(TokenType.ON): 3794 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3795 self.raise_error("Expected DELETE or UPDATE") 3796 3797 kind = self._prev.text.lower() 3798 3799 if self._match_text_seq("NO", "ACTION"): 3800 action = "NO ACTION" 3801 elif self._match(TokenType.SET): 3802 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3803 action = "SET " + self._prev.text.upper() 3804 else: 3805 self._advance() 3806 action = self._prev.text.upper() 3807 3808 options[kind] = action 3809 3810 return self.expression( 3811 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3812 ) 3813 3814 def _parse_primary_key( 3815 self, wrapped_optional: bool = False, in_props: bool = False 3816 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3817 desc = ( 3818 self._match_set((TokenType.ASC, TokenType.DESC)) 3819 and self._prev.token_type == TokenType.DESC 3820 ) 3821 3822 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3823 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3824 3825 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3826 options = self._parse_key_constraint_options() 3827 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3828 3829 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3830 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3831 return this 3832 3833 bracket_kind = self._prev.token_type 3834 3835 if self._match(TokenType.COLON): 3836 expressions: t.List[exp.Expression] = [ 3837 self.expression(exp.Slice, expression=self._parse_conjunction()) 3838 ] 3839 else: 3840 expressions = self._parse_csv( 3841 lambda: self._parse_slice( 3842 self._parse_alias(self._parse_conjunction(), explicit=True) 3843 ) 3844 ) 3845 3846 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3847 if bracket_kind == TokenType.L_BRACE: 3848 this = self.expression(exp.Struct, expressions=expressions) 3849 elif not this or this.name.upper() == "ARRAY": 3850 this = self.expression(exp.Array, expressions=expressions) 3851 else: 3852 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3853 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3854 3855 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3856 self.raise_error("Expected ]") 3857 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3858 self.raise_error("Expected }") 3859 3860 self._add_comments(this) 3861 return self._parse_bracket(this) 3862 3863 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3864 if self._match(TokenType.COLON): 3865 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3866 return this 3867 3868 def _parse_case(self) -> t.Optional[exp.Expression]: 3869 ifs = [] 3870 default = None 3871 3872 comments = self._prev_comments 3873 expression = self._parse_conjunction() 3874 3875 while self._match(TokenType.WHEN): 3876 this = self._parse_conjunction() 3877 self._match(TokenType.THEN) 3878 then = self._parse_conjunction() 3879 ifs.append(self.expression(exp.If, this=this, true=then)) 3880 3881 if self._match(TokenType.ELSE): 3882 default = self._parse_conjunction() 3883 3884 if not self._match(TokenType.END): 3885 self.raise_error("Expected END after CASE", self._prev) 3886 3887 return self._parse_window( 3888 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3889 ) 3890 3891 def _parse_if(self) -> t.Optional[exp.Expression]: 3892 if self._match(TokenType.L_PAREN): 3893 args = self._parse_csv(self._parse_conjunction) 3894 this = self.validate_expression(exp.If.from_arg_list(args), args) 3895 self._match_r_paren() 3896 else: 3897 index = self._index - 1 3898 condition = self._parse_conjunction() 3899 3900 if not condition: 3901 self._retreat(index) 3902 return None 3903 3904 self._match(TokenType.THEN) 3905 true = self._parse_conjunction() 3906 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3907 self._match(TokenType.END) 3908 this = self.expression(exp.If, this=condition, true=true, false=false) 3909 3910 return self._parse_window(this) 3911 3912 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3913 if not self._match_text_seq("VALUE", "FOR"): 3914 self._retreat(self._index - 1) 3915 return None 3916 3917 return self.expression( 3918 exp.NextValueFor, 3919 this=self._parse_column(), 3920 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3921 ) 3922 3923 def _parse_extract(self) -> exp.Extract: 3924 this = self._parse_function() or self._parse_var() or self._parse_type() 3925 3926 if self._match(TokenType.FROM): 3927 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3928 3929 if not self._match(TokenType.COMMA): 3930 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3931 3932 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3933 3934 def _parse_any_value(self) -> exp.AnyValue: 3935 this = self._parse_lambda() 3936 is_max = None 3937 having = None 3938 3939 if self._match(TokenType.HAVING): 3940 self._match_texts(("MAX", "MIN")) 3941 is_max = self._prev.text == "MAX" 3942 having = self._parse_column() 3943 3944 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3945 3946 def _parse_cast(self, strict: bool) -> exp.Expression: 3947 this = self._parse_conjunction() 3948 3949 if not self._match(TokenType.ALIAS): 3950 if self._match(TokenType.COMMA): 3951 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3952 3953 self.raise_error("Expected AS after CAST") 3954 3955 fmt = None 3956 to = self._parse_types() 3957 3958 if not to: 3959 self.raise_error("Expected TYPE after CAST") 3960 elif isinstance(to, exp.Identifier): 3961 to = exp.DataType.build(to.name, udt=True) 3962 elif to.this == exp.DataType.Type.CHAR: 3963 if self._match(TokenType.CHARACTER_SET): 3964 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3965 elif self._match(TokenType.FORMAT): 3966 fmt_string = self._parse_string() 3967 fmt = self._parse_at_time_zone(fmt_string) 3968 3969 if to.this in exp.DataType.TEMPORAL_TYPES: 3970 this = self.expression( 3971 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3972 this=this, 3973 format=exp.Literal.string( 3974 format_time( 3975 fmt_string.this if fmt_string else "", 3976 self.FORMAT_MAPPING or self.TIME_MAPPING, 3977 self.FORMAT_TRIE or self.TIME_TRIE, 3978 ) 3979 ), 3980 ) 3981 3982 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3983 this.set("zone", fmt.args["zone"]) 3984 3985 return this 3986 3987 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3988 3989 def _parse_concat(self) -> t.Optional[exp.Expression]: 3990 args = self._parse_csv(self._parse_conjunction) 3991 if self.CONCAT_NULL_OUTPUTS_STRING: 3992 args = [ 3993 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3994 for arg in args 3995 if arg 3996 ] 3997 3998 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3999 # we find such a call we replace it with its argument. 4000 if len(args) == 1: 4001 return args[0] 4002 4003 return self.expression( 4004 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4005 ) 4006 4007 def _parse_string_agg(self) -> exp.Expression: 4008 if self._match(TokenType.DISTINCT): 4009 args: t.List[t.Optional[exp.Expression]] = [ 4010 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4011 ] 4012 if self._match(TokenType.COMMA): 4013 args.extend(self._parse_csv(self._parse_conjunction)) 4014 else: 4015 args = self._parse_csv(self._parse_conjunction) # type: ignore 4016 4017 index = self._index 4018 if not self._match(TokenType.R_PAREN) and args: 4019 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4020 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4021 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4022 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4023 4024 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4025 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4026 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4027 if not self._match_text_seq("WITHIN", "GROUP"): 4028 self._retreat(index) 4029 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4030 4031 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4032 order = self._parse_order(this=seq_get(args, 0)) 4033 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4034 4035 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4036 this = self._parse_bitwise() 4037 4038 if self._match(TokenType.USING): 4039 to: t.Optional[exp.Expression] = self.expression( 4040 exp.CharacterSet, this=self._parse_var() 4041 ) 4042 elif self._match(TokenType.COMMA): 4043 to = self._parse_types() 4044 else: 4045 to = None 4046 4047 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4048 4049 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4050 """ 4051 There are generally two variants of the DECODE function: 4052 4053 - DECODE(bin, charset) 4054 - DECODE(expression, search, result [, search, result] ... [, default]) 4055 4056 The second variant will always be parsed into a CASE expression. Note that NULL 4057 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4058 instead of relying on pattern matching. 4059 """ 4060 args = self._parse_csv(self._parse_conjunction) 4061 4062 if len(args) < 3: 4063 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4064 4065 expression, *expressions = args 4066 if not expression: 4067 return None 4068 4069 ifs = [] 4070 for search, result in zip(expressions[::2], expressions[1::2]): 4071 if not search or not result: 4072 return None 4073 4074 if isinstance(search, exp.Literal): 4075 ifs.append( 4076 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4077 ) 4078 elif isinstance(search, exp.Null): 4079 ifs.append( 4080 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4081 ) 4082 else: 4083 cond = exp.or_( 4084 exp.EQ(this=expression.copy(), expression=search), 4085 exp.and_( 4086 exp.Is(this=expression.copy(), expression=exp.Null()), 4087 exp.Is(this=search.copy(), expression=exp.Null()), 4088 copy=False, 4089 ), 4090 copy=False, 4091 ) 4092 ifs.append(exp.If(this=cond, true=result)) 4093 4094 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4095 4096 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4097 self._match_text_seq("KEY") 4098 key = self._parse_field() 4099 self._match(TokenType.COLON) 4100 self._match_text_seq("VALUE") 4101 value = self._parse_field() 4102 4103 if not key and not value: 4104 return None 4105 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4106 4107 def _parse_json_object(self) -> exp.JSONObject: 4108 star = self._parse_star() 4109 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4110 4111 null_handling = None 4112 if self._match_text_seq("NULL", "ON", "NULL"): 4113 null_handling = "NULL ON NULL" 4114 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4115 null_handling = "ABSENT ON NULL" 4116 4117 unique_keys = None 4118 if self._match_text_seq("WITH", "UNIQUE"): 4119 unique_keys = True 4120 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4121 unique_keys = False 4122 4123 self._match_text_seq("KEYS") 4124 4125 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4126 format_json = self._match_text_seq("FORMAT", "JSON") 4127 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4128 4129 return self.expression( 4130 exp.JSONObject, 4131 expressions=expressions, 4132 null_handling=null_handling, 4133 unique_keys=unique_keys, 4134 return_type=return_type, 4135 format_json=format_json, 4136 encoding=encoding, 4137 ) 4138 4139 def _parse_logarithm(self) -> exp.Func: 4140 # Default argument order is base, expression 4141 args = self._parse_csv(self._parse_range) 4142 4143 if len(args) > 1: 4144 if not self.LOG_BASE_FIRST: 4145 args.reverse() 4146 return exp.Log.from_arg_list(args) 4147 4148 return self.expression( 4149 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4150 ) 4151 4152 def _parse_match_against(self) -> exp.MatchAgainst: 4153 expressions = self._parse_csv(self._parse_column) 4154 4155 self._match_text_seq(")", "AGAINST", "(") 4156 4157 this = self._parse_string() 4158 4159 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4160 modifier = "IN NATURAL LANGUAGE MODE" 4161 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4162 modifier = f"{modifier} WITH QUERY EXPANSION" 4163 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4164 modifier = "IN BOOLEAN MODE" 4165 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4166 modifier = "WITH QUERY EXPANSION" 4167 else: 4168 modifier = None 4169 4170 return self.expression( 4171 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4172 ) 4173 4174 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4175 def _parse_open_json(self) -> exp.OpenJSON: 4176 this = self._parse_bitwise() 4177 path = self._match(TokenType.COMMA) and self._parse_string() 4178 4179 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4180 this = self._parse_field(any_token=True) 4181 kind = self._parse_types() 4182 path = self._parse_string() 4183 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4184 4185 return self.expression( 4186 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4187 ) 4188 4189 expressions = None 4190 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4191 self._match_l_paren() 4192 expressions = self._parse_csv(_parse_open_json_column_def) 4193 4194 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4195 4196 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4197 args = self._parse_csv(self._parse_bitwise) 4198 4199 if self._match(TokenType.IN): 4200 return self.expression( 4201 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4202 ) 4203 4204 if haystack_first: 4205 haystack = seq_get(args, 0) 4206 needle = seq_get(args, 1) 4207 else: 4208 needle = seq_get(args, 0) 4209 haystack = seq_get(args, 1) 4210 4211 return self.expression( 4212 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4213 ) 4214 4215 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4216 args = self._parse_csv(self._parse_table) 4217 return exp.JoinHint(this=func_name.upper(), expressions=args) 4218 4219 def _parse_substring(self) -> exp.Substring: 4220 # Postgres supports the form: substring(string [from int] [for int]) 4221 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4222 4223 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4224 4225 if self._match(TokenType.FROM): 4226 args.append(self._parse_bitwise()) 4227 if self._match(TokenType.FOR): 4228 args.append(self._parse_bitwise()) 4229 4230 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4231 4232 def _parse_trim(self) -> exp.Trim: 4233 # https://www.w3resource.com/sql/character-functions/trim.php 4234 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4235 4236 position = None 4237 collation = None 4238 4239 if self._match_texts(self.TRIM_TYPES): 4240 position = self._prev.text.upper() 4241 4242 expression = self._parse_bitwise() 4243 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4244 this = self._parse_bitwise() 4245 else: 4246 this = expression 4247 expression = None 4248 4249 if self._match(TokenType.COLLATE): 4250 collation = self._parse_bitwise() 4251 4252 return self.expression( 4253 exp.Trim, this=this, position=position, expression=expression, collation=collation 4254 ) 4255 4256 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4257 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4258 4259 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4260 return self._parse_window(self._parse_id_var(), alias=True) 4261 4262 def _parse_respect_or_ignore_nulls( 4263 self, this: t.Optional[exp.Expression] 4264 ) -> t.Optional[exp.Expression]: 4265 if self._match_text_seq("IGNORE", "NULLS"): 4266 return self.expression(exp.IgnoreNulls, this=this) 4267 if self._match_text_seq("RESPECT", "NULLS"): 4268 return self.expression(exp.RespectNulls, this=this) 4269 return this 4270 4271 def _parse_window( 4272 self, this: t.Optional[exp.Expression], alias: bool = False 4273 ) -> t.Optional[exp.Expression]: 4274 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4275 self._match(TokenType.WHERE) 4276 this = self.expression( 4277 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4278 ) 4279 self._match_r_paren() 4280 4281 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4282 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4283 if self._match_text_seq("WITHIN", "GROUP"): 4284 order = self._parse_wrapped(self._parse_order) 4285 this = self.expression(exp.WithinGroup, this=this, expression=order) 4286 4287 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4288 # Some dialects choose to implement and some do not. 4289 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4290 4291 # There is some code above in _parse_lambda that handles 4292 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4293 4294 # The below changes handle 4295 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4296 4297 # Oracle allows both formats 4298 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4299 # and Snowflake chose to do the same for familiarity 4300 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4301 this = self._parse_respect_or_ignore_nulls(this) 4302 4303 # bigquery select from window x AS (partition by ...) 4304 if alias: 4305 over = None 4306 self._match(TokenType.ALIAS) 4307 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4308 return this 4309 else: 4310 over = self._prev.text.upper() 4311 4312 if not self._match(TokenType.L_PAREN): 4313 return self.expression( 4314 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4315 ) 4316 4317 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4318 4319 first = self._match(TokenType.FIRST) 4320 if self._match_text_seq("LAST"): 4321 first = False 4322 4323 partition, order = self._parse_partition_and_order() 4324 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4325 4326 if kind: 4327 self._match(TokenType.BETWEEN) 4328 start = self._parse_window_spec() 4329 self._match(TokenType.AND) 4330 end = self._parse_window_spec() 4331 4332 spec = self.expression( 4333 exp.WindowSpec, 4334 kind=kind, 4335 start=start["value"], 4336 start_side=start["side"], 4337 end=end["value"], 4338 end_side=end["side"], 4339 ) 4340 else: 4341 spec = None 4342 4343 self._match_r_paren() 4344 4345 window = self.expression( 4346 exp.Window, 4347 this=this, 4348 partition_by=partition, 4349 order=order, 4350 spec=spec, 4351 alias=window_alias, 4352 over=over, 4353 first=first, 4354 ) 4355 4356 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4357 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4358 return self._parse_window(window, alias=alias) 4359 4360 return window 4361 4362 def _parse_partition_and_order( 4363 self, 4364 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4365 return self._parse_partition_by(), self._parse_order() 4366 4367 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4368 self._match(TokenType.BETWEEN) 4369 4370 return { 4371 "value": ( 4372 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4373 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4374 or self._parse_bitwise() 4375 ), 4376 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4377 } 4378 4379 def _parse_alias( 4380 self, this: t.Optional[exp.Expression], explicit: bool = False 4381 ) -> t.Optional[exp.Expression]: 4382 any_token = self._match(TokenType.ALIAS) 4383 4384 if explicit and not any_token: 4385 return this 4386 4387 if self._match(TokenType.L_PAREN): 4388 aliases = self.expression( 4389 exp.Aliases, 4390 this=this, 4391 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4392 ) 4393 self._match_r_paren(aliases) 4394 return aliases 4395 4396 alias = self._parse_id_var(any_token) 4397 4398 if alias: 4399 return self.expression(exp.Alias, this=this, alias=alias) 4400 4401 return this 4402 4403 def _parse_id_var( 4404 self, 4405 any_token: bool = True, 4406 tokens: t.Optional[t.Collection[TokenType]] = None, 4407 ) -> t.Optional[exp.Expression]: 4408 identifier = self._parse_identifier() 4409 4410 if identifier: 4411 return identifier 4412 4413 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4414 quoted = self._prev.token_type == TokenType.STRING 4415 return exp.Identifier(this=self._prev.text, quoted=quoted) 4416 4417 return None 4418 4419 def _parse_string(self) -> t.Optional[exp.Expression]: 4420 if self._match(TokenType.STRING): 4421 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4422 return self._parse_placeholder() 4423 4424 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4425 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4426 4427 def _parse_number(self) -> t.Optional[exp.Expression]: 4428 if self._match(TokenType.NUMBER): 4429 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4430 return self._parse_placeholder() 4431 4432 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4433 if self._match(TokenType.IDENTIFIER): 4434 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4435 return self._parse_placeholder() 4436 4437 def _parse_var( 4438 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4439 ) -> t.Optional[exp.Expression]: 4440 if ( 4441 (any_token and self._advance_any()) 4442 or self._match(TokenType.VAR) 4443 or (self._match_set(tokens) if tokens else False) 4444 ): 4445 return self.expression(exp.Var, this=self._prev.text) 4446 return self._parse_placeholder() 4447 4448 def _advance_any(self) -> t.Optional[Token]: 4449 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4450 self._advance() 4451 return self._prev 4452 return None 4453 4454 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4455 return self._parse_var() or self._parse_string() 4456 4457 def _parse_null(self) -> t.Optional[exp.Expression]: 4458 if self._match(TokenType.NULL): 4459 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4460 return self._parse_placeholder() 4461 4462 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4463 if self._match(TokenType.TRUE): 4464 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4465 if self._match(TokenType.FALSE): 4466 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4467 return self._parse_placeholder() 4468 4469 def _parse_star(self) -> t.Optional[exp.Expression]: 4470 if self._match(TokenType.STAR): 4471 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4472 return self._parse_placeholder() 4473 4474 def _parse_parameter(self) -> exp.Parameter: 4475 wrapped = self._match(TokenType.L_BRACE) 4476 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4477 self._match(TokenType.R_BRACE) 4478 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4479 4480 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4481 if self._match_set(self.PLACEHOLDER_PARSERS): 4482 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4483 if placeholder: 4484 return placeholder 4485 self._advance(-1) 4486 return None 4487 4488 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4489 if not self._match(TokenType.EXCEPT): 4490 return None 4491 if self._match(TokenType.L_PAREN, advance=False): 4492 return self._parse_wrapped_csv(self._parse_column) 4493 return self._parse_csv(self._parse_column) 4494 4495 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4496 if not self._match(TokenType.REPLACE): 4497 return None 4498 if self._match(TokenType.L_PAREN, advance=False): 4499 return self._parse_wrapped_csv(self._parse_expression) 4500 return self._parse_expressions() 4501 4502 def _parse_csv( 4503 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4504 ) -> t.List[exp.Expression]: 4505 parse_result = parse_method() 4506 items = [parse_result] if parse_result is not None else [] 4507 4508 while self._match(sep): 4509 self._add_comments(parse_result) 4510 parse_result = parse_method() 4511 if parse_result is not None: 4512 items.append(parse_result) 4513 4514 return items 4515 4516 def _parse_tokens( 4517 self, parse_method: t.Callable, expressions: t.Dict 4518 ) -> t.Optional[exp.Expression]: 4519 this = parse_method() 4520 4521 while self._match_set(expressions): 4522 this = self.expression( 4523 expressions[self._prev.token_type], 4524 this=this, 4525 comments=self._prev_comments, 4526 expression=parse_method(), 4527 ) 4528 4529 return this 4530 4531 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4532 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4533 4534 def _parse_wrapped_csv( 4535 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4536 ) -> t.List[exp.Expression]: 4537 return self._parse_wrapped( 4538 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4539 ) 4540 4541 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4542 wrapped = self._match(TokenType.L_PAREN) 4543 if not wrapped and not optional: 4544 self.raise_error("Expecting (") 4545 parse_result = parse_method() 4546 if wrapped: 4547 self._match_r_paren() 4548 return parse_result 4549 4550 def _parse_expressions(self) -> t.List[exp.Expression]: 4551 return self._parse_csv(self._parse_expression) 4552 4553 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4554 return self._parse_select() or self._parse_set_operations( 4555 self._parse_expression() if alias else self._parse_conjunction() 4556 ) 4557 4558 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4559 return self._parse_query_modifiers( 4560 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4561 ) 4562 4563 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4564 this = None 4565 if self._match_texts(self.TRANSACTION_KIND): 4566 this = self._prev.text 4567 4568 self._match_texts({"TRANSACTION", "WORK"}) 4569 4570 modes = [] 4571 while True: 4572 mode = [] 4573 while self._match(TokenType.VAR): 4574 mode.append(self._prev.text) 4575 4576 if mode: 4577 modes.append(" ".join(mode)) 4578 if not self._match(TokenType.COMMA): 4579 break 4580 4581 return self.expression(exp.Transaction, this=this, modes=modes) 4582 4583 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4584 chain = None 4585 savepoint = None 4586 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4587 4588 self._match_texts({"TRANSACTION", "WORK"}) 4589 4590 if self._match_text_seq("TO"): 4591 self._match_text_seq("SAVEPOINT") 4592 savepoint = self._parse_id_var() 4593 4594 if self._match(TokenType.AND): 4595 chain = not self._match_text_seq("NO") 4596 self._match_text_seq("CHAIN") 4597 4598 if is_rollback: 4599 return self.expression(exp.Rollback, savepoint=savepoint) 4600 4601 return self.expression(exp.Commit, chain=chain) 4602 4603 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4604 if not self._match_text_seq("ADD"): 4605 return None 4606 4607 self._match(TokenType.COLUMN) 4608 exists_column = self._parse_exists(not_=True) 4609 expression = self._parse_field_def() 4610 4611 if expression: 4612 expression.set("exists", exists_column) 4613 4614 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4615 if self._match_texts(("FIRST", "AFTER")): 4616 position = self._prev.text 4617 column_position = self.expression( 4618 exp.ColumnPosition, this=self._parse_column(), position=position 4619 ) 4620 expression.set("position", column_position) 4621 4622 return expression 4623 4624 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4625 drop = self._match(TokenType.DROP) and self._parse_drop() 4626 if drop and not isinstance(drop, exp.Command): 4627 drop.set("kind", drop.args.get("kind", "COLUMN")) 4628 return drop 4629 4630 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4631 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4632 return self.expression( 4633 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4634 ) 4635 4636 def _parse_add_constraint(self) -> exp.AddConstraint: 4637 this = None 4638 kind = self._prev.token_type 4639 4640 if kind == TokenType.CONSTRAINT: 4641 this = self._parse_id_var() 4642 4643 if self._match_text_seq("CHECK"): 4644 expression = self._parse_wrapped(self._parse_conjunction) 4645 enforced = self._match_text_seq("ENFORCED") 4646 4647 return self.expression( 4648 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4649 ) 4650 4651 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4652 expression = self._parse_foreign_key() 4653 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4654 expression = self._parse_primary_key() 4655 else: 4656 expression = None 4657 4658 return self.expression(exp.AddConstraint, this=this, expression=expression) 4659 4660 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4661 index = self._index - 1 4662 4663 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4664 return self._parse_csv(self._parse_add_constraint) 4665 4666 self._retreat(index) 4667 return self._parse_csv(self._parse_add_column) 4668 4669 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4670 self._match(TokenType.COLUMN) 4671 column = self._parse_field(any_token=True) 4672 4673 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4674 return self.expression(exp.AlterColumn, this=column, drop=True) 4675 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4676 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4677 4678 self._match_text_seq("SET", "DATA") 4679 return self.expression( 4680 exp.AlterColumn, 4681 this=column, 4682 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4683 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4684 using=self._match(TokenType.USING) and self._parse_conjunction(), 4685 ) 4686 4687 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4688 index = self._index - 1 4689 4690 partition_exists = self._parse_exists() 4691 if self._match(TokenType.PARTITION, advance=False): 4692 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4693 4694 self._retreat(index) 4695 return self._parse_csv(self._parse_drop_column) 4696 4697 def _parse_alter_table_rename(self) -> exp.RenameTable: 4698 self._match_text_seq("TO") 4699 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4700 4701 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4702 start = self._prev 4703 4704 if not self._match(TokenType.TABLE): 4705 return self._parse_as_command(start) 4706 4707 exists = self._parse_exists() 4708 this = self._parse_table(schema=True) 4709 4710 if self._next: 4711 self._advance() 4712 4713 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4714 if parser: 4715 actions = ensure_list(parser(self)) 4716 4717 if not self._curr: 4718 return self.expression( 4719 exp.AlterTable, 4720 this=this, 4721 exists=exists, 4722 actions=actions, 4723 ) 4724 return self._parse_as_command(start) 4725 4726 def _parse_merge(self) -> exp.Merge: 4727 self._match(TokenType.INTO) 4728 target = self._parse_table() 4729 4730 if target and self._match(TokenType.ALIAS, advance=False): 4731 target.set("alias", self._parse_table_alias()) 4732 4733 self._match(TokenType.USING) 4734 using = self._parse_table() 4735 4736 self._match(TokenType.ON) 4737 on = self._parse_conjunction() 4738 4739 whens = [] 4740 while self._match(TokenType.WHEN): 4741 matched = not self._match(TokenType.NOT) 4742 self._match_text_seq("MATCHED") 4743 source = ( 4744 False 4745 if self._match_text_seq("BY", "TARGET") 4746 else self._match_text_seq("BY", "SOURCE") 4747 ) 4748 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4749 4750 self._match(TokenType.THEN) 4751 4752 if self._match(TokenType.INSERT): 4753 _this = self._parse_star() 4754 if _this: 4755 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4756 else: 4757 then = self.expression( 4758 exp.Insert, 4759 this=self._parse_value(), 4760 expression=self._match(TokenType.VALUES) and self._parse_value(), 4761 ) 4762 elif self._match(TokenType.UPDATE): 4763 expressions = self._parse_star() 4764 if expressions: 4765 then = self.expression(exp.Update, expressions=expressions) 4766 else: 4767 then = self.expression( 4768 exp.Update, 4769 expressions=self._match(TokenType.SET) 4770 and self._parse_csv(self._parse_equality), 4771 ) 4772 elif self._match(TokenType.DELETE): 4773 then = self.expression(exp.Var, this=self._prev.text) 4774 else: 4775 then = None 4776 4777 whens.append( 4778 self.expression( 4779 exp.When, 4780 matched=matched, 4781 source=source, 4782 condition=condition, 4783 then=then, 4784 ) 4785 ) 4786 4787 return self.expression( 4788 exp.Merge, 4789 this=target, 4790 using=using, 4791 on=on, 4792 expressions=whens, 4793 ) 4794 4795 def _parse_show(self) -> t.Optional[exp.Expression]: 4796 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4797 if parser: 4798 return parser(self) 4799 self._advance() 4800 return self.expression(exp.Show, this=self._prev.text.upper()) 4801 4802 def _parse_set_item_assignment( 4803 self, kind: t.Optional[str] = None 4804 ) -> t.Optional[exp.Expression]: 4805 index = self._index 4806 4807 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4808 return self._parse_set_transaction(global_=kind == "GLOBAL") 4809 4810 left = self._parse_primary() or self._parse_id_var() 4811 4812 if not self._match_texts(("=", "TO")): 4813 self._retreat(index) 4814 return None 4815 4816 right = self._parse_statement() or self._parse_id_var() 4817 this = self.expression(exp.EQ, this=left, expression=right) 4818 4819 return self.expression(exp.SetItem, this=this, kind=kind) 4820 4821 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4822 self._match_text_seq("TRANSACTION") 4823 characteristics = self._parse_csv( 4824 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4825 ) 4826 return self.expression( 4827 exp.SetItem, 4828 expressions=characteristics, 4829 kind="TRANSACTION", 4830 **{"global": global_}, # type: ignore 4831 ) 4832 4833 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4834 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4835 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4836 4837 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4838 index = self._index 4839 set_ = self.expression( 4840 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4841 ) 4842 4843 if self._curr: 4844 self._retreat(index) 4845 return self._parse_as_command(self._prev) 4846 4847 return set_ 4848 4849 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4850 for option in options: 4851 if self._match_text_seq(*option.split(" ")): 4852 return exp.var(option) 4853 return None 4854 4855 def _parse_as_command(self, start: Token) -> exp.Command: 4856 while self._curr: 4857 self._advance() 4858 text = self._find_sql(start, self._prev) 4859 size = len(start.text) 4860 return exp.Command(this=text[:size], expression=text[size:]) 4861 4862 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4863 settings = [] 4864 4865 self._match_l_paren() 4866 kind = self._parse_id_var() 4867 4868 if self._match(TokenType.L_PAREN): 4869 while True: 4870 key = self._parse_id_var() 4871 value = self._parse_primary() 4872 4873 if not key and value is None: 4874 break 4875 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4876 self._match(TokenType.R_PAREN) 4877 4878 self._match_r_paren() 4879 4880 return self.expression( 4881 exp.DictProperty, 4882 this=this, 4883 kind=kind.this if kind else None, 4884 settings=settings, 4885 ) 4886 4887 def _parse_dict_range(self, this: str) -> exp.DictRange: 4888 self._match_l_paren() 4889 has_min = self._match_text_seq("MIN") 4890 if has_min: 4891 min = self._parse_var() or self._parse_primary() 4892 self._match_text_seq("MAX") 4893 max = self._parse_var() or self._parse_primary() 4894 else: 4895 max = self._parse_var() or self._parse_primary() 4896 min = exp.Literal.number(0) 4897 self._match_r_paren() 4898 return self.expression(exp.DictRange, this=this, min=min, max=max) 4899 4900 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4901 expression = self._parse_column() 4902 self._match(TokenType.IN) 4903 iterator = self._parse_column() 4904 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4905 return self.expression( 4906 exp.Comprehension, 4907 this=this, 4908 expression=expression, 4909 iterator=iterator, 4910 condition=condition, 4911 ) 4912 4913 def _find_parser( 4914 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4915 ) -> t.Optional[t.Callable]: 4916 if not self._curr: 4917 return None 4918 4919 index = self._index 4920 this = [] 4921 while True: 4922 # The current token might be multiple words 4923 curr = self._curr.text.upper() 4924 key = curr.split(" ") 4925 this.append(curr) 4926 4927 self._advance() 4928 result, trie = in_trie(trie, key) 4929 if result == TrieResult.FAILED: 4930 break 4931 4932 if result == TrieResult.EXISTS: 4933 subparser = parsers[" ".join(this)] 4934 return subparser 4935 4936 self._retreat(index) 4937 return None 4938 4939 def _match(self, token_type, advance=True, expression=None): 4940 if not self._curr: 4941 return None 4942 4943 if self._curr.token_type == token_type: 4944 if advance: 4945 self._advance() 4946 self._add_comments(expression) 4947 return True 4948 4949 return None 4950 4951 def _match_set(self, types, advance=True): 4952 if not self._curr: 4953 return None 4954 4955 if self._curr.token_type in types: 4956 if advance: 4957 self._advance() 4958 return True 4959 4960 return None 4961 4962 def _match_pair(self, token_type_a, token_type_b, advance=True): 4963 if not self._curr or not self._next: 4964 return None 4965 4966 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4967 if advance: 4968 self._advance(2) 4969 return True 4970 4971 return None 4972 4973 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4974 if not self._match(TokenType.L_PAREN, expression=expression): 4975 self.raise_error("Expecting (") 4976 4977 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4978 if not self._match(TokenType.R_PAREN, expression=expression): 4979 self.raise_error("Expecting )") 4980 4981 def _match_texts(self, texts, advance=True): 4982 if self._curr and self._curr.text.upper() in texts: 4983 if advance: 4984 self._advance() 4985 return True 4986 return False 4987 4988 def _match_text_seq(self, *texts, advance=True): 4989 index = self._index 4990 for text in texts: 4991 if self._curr and self._curr.text.upper() == text: 4992 self._advance() 4993 else: 4994 self._retreat(index) 4995 return False 4996 4997 if not advance: 4998 self._retreat(index) 4999 5000 return True 5001 5002 @t.overload 5003 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5004 ... 5005 5006 @t.overload 5007 def _replace_columns_with_dots( 5008 self, this: t.Optional[exp.Expression] 5009 ) -> t.Optional[exp.Expression]: 5010 ... 5011 5012 def _replace_columns_with_dots(self, this): 5013 if isinstance(this, exp.Dot): 5014 exp.replace_children(this, self._replace_columns_with_dots) 5015 elif isinstance(this, exp.Column): 5016 exp.replace_children(this, self._replace_columns_with_dots) 5017 table = this.args.get("table") 5018 this = ( 5019 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5020 ) 5021 5022 return this 5023 5024 def _replace_lambda( 5025 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5026 ) -> t.Optional[exp.Expression]: 5027 if not node: 5028 return node 5029 5030 for column in node.find_all(exp.Column): 5031 if column.parts[0].name in lambda_variables: 5032 dot_or_id = column.to_dot() if column.table else column.this 5033 parent = column.parent 5034 5035 while isinstance(parent, exp.Dot): 5036 if not isinstance(parent.parent, exp.Dot): 5037 parent.replace(dot_or_id) 5038 break 5039 parent = parent.parent 5040 else: 5041 if column is node: 5042 node = dot_or_id 5043 else: 5044 column.replace(dot_or_id) 5045 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.INET, 201 TokenType.IPADDRESS, 202 TokenType.IPPREFIX, 203 TokenType.UNKNOWN, 204 TokenType.NULL, 205 *ENUM_TYPE_TOKENS, 206 *NESTED_TYPE_TOKENS, 207 } 208 209 SUBQUERY_PREDICATES = { 210 TokenType.ANY: exp.Any, 211 TokenType.ALL: exp.All, 212 TokenType.EXISTS: exp.Exists, 213 TokenType.SOME: exp.Any, 214 } 215 216 RESERVED_KEYWORDS = { 217 *Tokenizer.SINGLE_TOKENS.values(), 218 TokenType.SELECT, 219 } 220 221 DB_CREATABLES = { 222 TokenType.DATABASE, 223 TokenType.SCHEMA, 224 TokenType.TABLE, 225 TokenType.VIEW, 226 TokenType.DICTIONARY, 227 } 228 229 CREATABLES = { 230 TokenType.COLUMN, 231 TokenType.FUNCTION, 232 TokenType.INDEX, 233 TokenType.PROCEDURE, 234 *DB_CREATABLES, 235 } 236 237 # Tokens that can represent identifiers 238 ID_VAR_TOKENS = { 239 TokenType.VAR, 240 TokenType.ANTI, 241 TokenType.APPLY, 242 TokenType.ASC, 243 TokenType.AUTO_INCREMENT, 244 TokenType.BEGIN, 245 TokenType.CACHE, 246 TokenType.CASE, 247 TokenType.COLLATE, 248 TokenType.COMMAND, 249 TokenType.COMMENT, 250 TokenType.COMMIT, 251 TokenType.CONSTRAINT, 252 TokenType.DEFAULT, 253 TokenType.DELETE, 254 TokenType.DESC, 255 TokenType.DESCRIBE, 256 TokenType.DICTIONARY, 257 TokenType.DIV, 258 TokenType.END, 259 TokenType.EXECUTE, 260 TokenType.ESCAPE, 261 TokenType.FALSE, 262 TokenType.FIRST, 263 TokenType.FILTER, 264 TokenType.FORMAT, 265 TokenType.FULL, 266 TokenType.IS, 267 TokenType.ISNULL, 268 TokenType.INTERVAL, 269 TokenType.KEEP, 270 TokenType.LEFT, 271 TokenType.LOAD, 272 TokenType.MERGE, 273 TokenType.NATURAL, 274 TokenType.NEXT, 275 TokenType.OFFSET, 276 TokenType.ORDINALITY, 277 TokenType.OVERWRITE, 278 TokenType.PARTITION, 279 TokenType.PERCENT, 280 TokenType.PIVOT, 281 TokenType.PRAGMA, 282 TokenType.RANGE, 283 TokenType.REFERENCES, 284 TokenType.RIGHT, 285 TokenType.ROW, 286 TokenType.ROWS, 287 TokenType.SEMI, 288 TokenType.SET, 289 TokenType.SETTINGS, 290 TokenType.SHOW, 291 TokenType.TEMPORARY, 292 TokenType.TOP, 293 TokenType.TRUE, 294 TokenType.UNIQUE, 295 TokenType.UNPIVOT, 296 TokenType.UPDATE, 297 TokenType.VOLATILE, 298 TokenType.WINDOW, 299 *CREATABLES, 300 *SUBQUERY_PREDICATES, 301 *TYPE_TOKENS, 302 *NO_PAREN_FUNCTIONS, 303 } 304 305 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 306 307 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 308 TokenType.APPLY, 309 TokenType.ASOF, 310 TokenType.FULL, 311 TokenType.LEFT, 312 TokenType.LOCK, 313 TokenType.NATURAL, 314 TokenType.OFFSET, 315 TokenType.RIGHT, 316 TokenType.WINDOW, 317 } 318 319 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 320 321 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 322 323 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 324 325 FUNC_TOKENS = { 326 TokenType.COMMAND, 327 TokenType.CURRENT_DATE, 328 TokenType.CURRENT_DATETIME, 329 TokenType.CURRENT_TIMESTAMP, 330 TokenType.CURRENT_TIME, 331 TokenType.CURRENT_USER, 332 TokenType.FILTER, 333 TokenType.FIRST, 334 TokenType.FORMAT, 335 TokenType.GLOB, 336 TokenType.IDENTIFIER, 337 TokenType.INDEX, 338 TokenType.ISNULL, 339 TokenType.ILIKE, 340 TokenType.INSERT, 341 TokenType.LIKE, 342 TokenType.MERGE, 343 TokenType.OFFSET, 344 TokenType.PRIMARY_KEY, 345 TokenType.RANGE, 346 TokenType.REPLACE, 347 TokenType.RLIKE, 348 TokenType.ROW, 349 TokenType.UNNEST, 350 TokenType.VAR, 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.DATE, 354 TokenType.DATETIME, 355 TokenType.TABLE, 356 TokenType.TIMESTAMP, 357 TokenType.TIMESTAMPTZ, 358 TokenType.WINDOW, 359 TokenType.XOR, 360 *TYPE_TOKENS, 361 *SUBQUERY_PREDICATES, 362 } 363 364 CONJUNCTION = { 365 TokenType.AND: exp.And, 366 TokenType.OR: exp.Or, 367 } 368 369 EQUALITY = { 370 TokenType.EQ: exp.EQ, 371 TokenType.NEQ: exp.NEQ, 372 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 373 } 374 375 COMPARISON = { 376 TokenType.GT: exp.GT, 377 TokenType.GTE: exp.GTE, 378 TokenType.LT: exp.LT, 379 TokenType.LTE: exp.LTE, 380 } 381 382 BITWISE = { 383 TokenType.AMP: exp.BitwiseAnd, 384 TokenType.CARET: exp.BitwiseXor, 385 TokenType.PIPE: exp.BitwiseOr, 386 TokenType.DPIPE: exp.DPipe, 387 } 388 389 TERM = { 390 TokenType.DASH: exp.Sub, 391 TokenType.PLUS: exp.Add, 392 TokenType.MOD: exp.Mod, 393 TokenType.COLLATE: exp.Collate, 394 } 395 396 FACTOR = { 397 TokenType.DIV: exp.IntDiv, 398 TokenType.LR_ARROW: exp.Distance, 399 TokenType.SLASH: exp.Div, 400 TokenType.STAR: exp.Mul, 401 } 402 403 TIMES = { 404 TokenType.TIME, 405 TokenType.TIMETZ, 406 } 407 408 TIMESTAMPS = { 409 TokenType.TIMESTAMP, 410 TokenType.TIMESTAMPTZ, 411 TokenType.TIMESTAMPLTZ, 412 *TIMES, 413 } 414 415 SET_OPERATIONS = { 416 TokenType.UNION, 417 TokenType.INTERSECT, 418 TokenType.EXCEPT, 419 } 420 421 JOIN_METHODS = { 422 TokenType.NATURAL, 423 TokenType.ASOF, 424 } 425 426 JOIN_SIDES = { 427 TokenType.LEFT, 428 TokenType.RIGHT, 429 TokenType.FULL, 430 } 431 432 JOIN_KINDS = { 433 TokenType.INNER, 434 TokenType.OUTER, 435 TokenType.CROSS, 436 TokenType.SEMI, 437 TokenType.ANTI, 438 } 439 440 JOIN_HINTS: t.Set[str] = set() 441 442 LAMBDAS = { 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_conjunction(), 447 {node.name for node in expressions}, 448 ), 449 expressions=expressions, 450 ), 451 TokenType.FARROW: lambda self, expressions: self.expression( 452 exp.Kwarg, 453 this=exp.var(expressions[0].name), 454 expression=self._parse_conjunction(), 455 ), 456 } 457 458 COLUMN_OPERATORS = { 459 TokenType.DOT: None, 460 TokenType.DCOLON: lambda self, this, to: self.expression( 461 exp.Cast if self.STRICT_CAST else exp.TryCast, 462 this=this, 463 to=to, 464 ), 465 TokenType.ARROW: lambda self, this, path: self.expression( 466 exp.JSONExtract, 467 this=this, 468 expression=path, 469 ), 470 TokenType.DARROW: lambda self, this, path: self.expression( 471 exp.JSONExtractScalar, 472 this=this, 473 expression=path, 474 ), 475 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 481 exp.JSONBExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 486 exp.JSONBContains, 487 this=this, 488 expression=key, 489 ), 490 } 491 492 EXPRESSION_PARSERS = { 493 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 494 exp.Column: lambda self: self._parse_column(), 495 exp.Condition: lambda self: self._parse_conjunction(), 496 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 497 exp.Expression: lambda self: self._parse_statement(), 498 exp.From: lambda self: self._parse_from(), 499 exp.Group: lambda self: self._parse_group(), 500 exp.Having: lambda self: self._parse_having(), 501 exp.Identifier: lambda self: self._parse_id_var(), 502 exp.Join: lambda self: self._parse_join(), 503 exp.Lambda: lambda self: self._parse_lambda(), 504 exp.Lateral: lambda self: self._parse_lateral(), 505 exp.Limit: lambda self: self._parse_limit(), 506 exp.Offset: lambda self: self._parse_offset(), 507 exp.Order: lambda self: self._parse_order(), 508 exp.Ordered: lambda self: self._parse_ordered(), 509 exp.Properties: lambda self: self._parse_properties(), 510 exp.Qualify: lambda self: self._parse_qualify(), 511 exp.Returning: lambda self: self._parse_returning(), 512 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 513 exp.Table: lambda self: self._parse_table_parts(), 514 exp.TableAlias: lambda self: self._parse_table_alias(), 515 exp.Where: lambda self: self._parse_where(), 516 exp.Window: lambda self: self._parse_named_window(), 517 exp.With: lambda self: self._parse_with(), 518 "JOIN_TYPE": lambda self: self._parse_join_parts(), 519 } 520 521 STATEMENT_PARSERS = { 522 TokenType.ALTER: lambda self: self._parse_alter(), 523 TokenType.BEGIN: lambda self: self._parse_transaction(), 524 TokenType.CACHE: lambda self: self._parse_cache(), 525 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 526 TokenType.COMMENT: lambda self: self._parse_comment(), 527 TokenType.CREATE: lambda self: self._parse_create(), 528 TokenType.DELETE: lambda self: self._parse_delete(), 529 TokenType.DESC: lambda self: self._parse_describe(), 530 TokenType.DESCRIBE: lambda self: self._parse_describe(), 531 TokenType.DROP: lambda self: self._parse_drop(), 532 TokenType.INSERT: lambda self: self._parse_insert(), 533 TokenType.LOAD: lambda self: self._parse_load(), 534 TokenType.MERGE: lambda self: self._parse_merge(), 535 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 536 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 537 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 538 TokenType.SET: lambda self: self._parse_set(), 539 TokenType.UNCACHE: lambda self: self._parse_uncache(), 540 TokenType.UPDATE: lambda self: self._parse_update(), 541 TokenType.USE: lambda self: self.expression( 542 exp.Use, 543 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 544 and exp.var(self._prev.text), 545 this=self._parse_table(schema=False), 546 ), 547 } 548 549 UNARY_PARSERS = { 550 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 551 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 552 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 553 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 554 } 555 556 PRIMARY_PARSERS = { 557 TokenType.STRING: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=True 559 ), 560 TokenType.NUMBER: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=False 562 ), 563 TokenType.STAR: lambda self, _: self.expression( 564 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 565 ), 566 TokenType.NULL: lambda self, _: self.expression(exp.Null), 567 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 568 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 569 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 570 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 571 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 572 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 573 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 574 exp.National, this=token.text 575 ), 576 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 577 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 578 } 579 580 PLACEHOLDER_PARSERS = { 581 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 582 TokenType.PARAMETER: lambda self: self._parse_parameter(), 583 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 584 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 585 else None, 586 } 587 588 RANGE_PARSERS = { 589 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 590 TokenType.GLOB: binary_range_parser(exp.Glob), 591 TokenType.ILIKE: binary_range_parser(exp.ILike), 592 TokenType.IN: lambda self, this: self._parse_in(this), 593 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 594 TokenType.IS: lambda self, this: self._parse_is(this), 595 TokenType.LIKE: binary_range_parser(exp.Like), 596 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 597 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 598 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 599 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 600 } 601 602 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 603 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 604 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 605 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 606 "CHARACTER SET": lambda self: self._parse_character_set(), 607 "CHECKSUM": lambda self: self._parse_checksum(), 608 "CLUSTER BY": lambda self: self._parse_cluster(), 609 "CLUSTERED": lambda self: self._parse_clustered_by(), 610 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 611 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 612 "COPY": lambda self: self._parse_copy_property(), 613 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 614 "DEFINER": lambda self: self._parse_definer(), 615 "DETERMINISTIC": lambda self: self.expression( 616 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 617 ), 618 "DISTKEY": lambda self: self._parse_distkey(), 619 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 620 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 621 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 622 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 623 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 624 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 625 "FREESPACE": lambda self: self._parse_freespace(), 626 "HEAP": lambda self: self.expression(exp.HeapProperty), 627 "IMMUTABLE": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 631 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 632 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 633 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 634 "LIKE": lambda self: self._parse_create_like(), 635 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 636 "LOCK": lambda self: self._parse_locking(), 637 "LOCKING": lambda self: self._parse_locking(), 638 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 639 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 640 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 641 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 642 "NO": lambda self: self._parse_no_property(), 643 "ON": lambda self: self._parse_on_property(), 644 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 645 "PARTITION BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 648 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 649 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 650 "RETURNS": lambda self: self._parse_returns(), 651 "ROW": lambda self: self._parse_row(), 652 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 653 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 654 "SETTINGS": lambda self: self.expression( 655 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 656 ), 657 "SORTKEY": lambda self: self._parse_sortkey(), 658 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 659 "STABLE": lambda self: self.expression( 660 exp.StabilityProperty, this=exp.Literal.string("STABLE") 661 ), 662 "STORED": lambda self: self._parse_stored(), 663 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 664 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 665 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 666 "TO": lambda self: self._parse_to_table(), 667 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 668 "TTL": lambda self: self._parse_ttl(), 669 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 670 "VOLATILE": lambda self: self._parse_volatile_property(), 671 "WITH": lambda self: self._parse_with_property(), 672 } 673 674 CONSTRAINT_PARSERS = { 675 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 676 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 677 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 678 "CHARACTER SET": lambda self: self.expression( 679 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "CHECK": lambda self: self.expression( 682 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 683 ), 684 "COLLATE": lambda self: self.expression( 685 exp.CollateColumnConstraint, this=self._parse_var() 686 ), 687 "COMMENT": lambda self: self.expression( 688 exp.CommentColumnConstraint, this=self._parse_string() 689 ), 690 "COMPRESS": lambda self: self._parse_compress(), 691 "CLUSTERED": lambda self: self.expression( 692 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 693 ), 694 "NONCLUSTERED": lambda self: self.expression( 695 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 696 ), 697 "DEFAULT": lambda self: self.expression( 698 exp.DefaultColumnConstraint, this=self._parse_bitwise() 699 ), 700 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 701 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 702 "FORMAT": lambda self: self.expression( 703 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 704 ), 705 "GENERATED": lambda self: self._parse_generated_as_identity(), 706 "IDENTITY": lambda self: self._parse_auto_increment(), 707 "INLINE": lambda self: self._parse_inline(), 708 "LIKE": lambda self: self._parse_create_like(), 709 "NOT": lambda self: self._parse_not_constraint(), 710 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 711 "ON": lambda self: ( 712 self._match(TokenType.UPDATE) 713 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 714 ) 715 or self.expression(exp.OnProperty, this=self._parse_id_var()), 716 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 717 "PRIMARY KEY": lambda self: self._parse_primary_key(), 718 "REFERENCES": lambda self: self._parse_references(match=False), 719 "TITLE": lambda self: self.expression( 720 exp.TitleColumnConstraint, this=self._parse_var_or_string() 721 ), 722 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 723 "UNIQUE": lambda self: self._parse_unique(), 724 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 725 "WITH": lambda self: self.expression( 726 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 727 ), 728 } 729 730 ALTER_PARSERS = { 731 "ADD": lambda self: self._parse_alter_table_add(), 732 "ALTER": lambda self: self._parse_alter_table_alter(), 733 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 734 "DROP": lambda self: self._parse_alter_table_drop(), 735 "RENAME": lambda self: self._parse_alter_table_rename(), 736 } 737 738 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 739 740 NO_PAREN_FUNCTION_PARSERS = { 741 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 742 "CASE": lambda self: self._parse_case(), 743 "IF": lambda self: self._parse_if(), 744 "NEXT": lambda self: self._parse_next_value_for(), 745 } 746 747 INVALID_FUNC_NAME_TOKENS = { 748 TokenType.IDENTIFIER, 749 TokenType.STRING, 750 } 751 752 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 753 754 FUNCTION_PARSERS = { 755 "ANY_VALUE": lambda self: self._parse_any_value(), 756 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 757 "CONCAT": lambda self: self._parse_concat(), 758 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 759 "DECODE": lambda self: self._parse_decode(), 760 "EXTRACT": lambda self: self._parse_extract(), 761 "JSON_OBJECT": lambda self: self._parse_json_object(), 762 "LOG": lambda self: self._parse_logarithm(), 763 "MATCH": lambda self: self._parse_match_against(), 764 "OPENJSON": lambda self: self._parse_open_json(), 765 "POSITION": lambda self: self._parse_position(), 766 "SAFE_CAST": lambda self: self._parse_cast(False), 767 "STRING_AGG": lambda self: self._parse_string_agg(), 768 "SUBSTRING": lambda self: self._parse_substring(), 769 "TRIM": lambda self: self._parse_trim(), 770 "TRY_CAST": lambda self: self._parse_cast(False), 771 "TRY_CONVERT": lambda self: self._parse_convert(False), 772 } 773 774 QUERY_MODIFIER_PARSERS = { 775 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 776 TokenType.WHERE: lambda self: ("where", self._parse_where()), 777 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 778 TokenType.HAVING: lambda self: ("having", self._parse_having()), 779 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 780 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 781 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 782 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 783 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 784 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 785 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 786 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 787 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 789 TokenType.CLUSTER_BY: lambda self: ( 790 "cluster", 791 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 792 ), 793 TokenType.DISTRIBUTE_BY: lambda self: ( 794 "distribute", 795 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 796 ), 797 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 798 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 799 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 800 } 801 802 SET_PARSERS = { 803 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 804 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 805 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 806 "TRANSACTION": lambda self: self._parse_set_transaction(), 807 } 808 809 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 810 811 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 812 813 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 814 815 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 816 817 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 818 819 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 820 TRANSACTION_CHARACTERISTICS = { 821 "ISOLATION LEVEL REPEATABLE READ", 822 "ISOLATION LEVEL READ COMMITTED", 823 "ISOLATION LEVEL READ UNCOMMITTED", 824 "ISOLATION LEVEL SERIALIZABLE", 825 "READ WRITE", 826 "READ ONLY", 827 } 828 829 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 830 831 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 832 833 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 834 835 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 836 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 837 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 838 839 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 840 841 DISTINCT_TOKENS = {TokenType.DISTINCT} 842 843 STRICT_CAST = True 844 845 # A NULL arg in CONCAT yields NULL by default 846 CONCAT_NULL_OUTPUTS_STRING = False 847 848 PREFIXED_PIVOT_COLUMNS = False 849 IDENTIFY_PIVOT_STRINGS = False 850 851 LOG_BASE_FIRST = True 852 LOG_DEFAULTS_TO_LN = False 853 854 SUPPORTS_USER_DEFINED_TYPES = True 855 856 __slots__ = ( 857 "error_level", 858 "error_message_context", 859 "max_errors", 860 "sql", 861 "errors", 862 "_tokens", 863 "_index", 864 "_curr", 865 "_next", 866 "_prev", 867 "_prev_comments", 868 "_tokenizer", 869 ) 870 871 # Autofilled 872 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 873 INDEX_OFFSET: int = 0 874 UNNEST_COLUMN_ONLY: bool = False 875 ALIAS_POST_TABLESAMPLE: bool = False 876 STRICT_STRING_CONCAT = False 877 NORMALIZE_FUNCTIONS = "upper" 878 NULL_ORDERING: str = "nulls_are_small" 879 SHOW_TRIE: t.Dict = {} 880 SET_TRIE: t.Dict = {} 881 FORMAT_MAPPING: t.Dict[str, str] = {} 882 FORMAT_TRIE: t.Dict = {} 883 TIME_MAPPING: t.Dict[str, str] = {} 884 TIME_TRIE: t.Dict = {} 885 886 def __init__( 887 self, 888 error_level: t.Optional[ErrorLevel] = None, 889 error_message_context: int = 100, 890 max_errors: int = 3, 891 ): 892 self.error_level = error_level or ErrorLevel.IMMEDIATE 893 self.error_message_context = error_message_context 894 self.max_errors = max_errors 895 self._tokenizer = self.TOKENIZER_CLASS() 896 self.reset() 897 898 def reset(self): 899 self.sql = "" 900 self.errors = [] 901 self._tokens = [] 902 self._index = 0 903 self._curr = None 904 self._next = None 905 self._prev = None 906 self._prev_comments = None 907 908 def parse( 909 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 910 ) -> t.List[t.Optional[exp.Expression]]: 911 """ 912 Parses a list of tokens and returns a list of syntax trees, one tree 913 per parsed SQL statement. 914 915 Args: 916 raw_tokens: The list of tokens. 917 sql: The original SQL string, used to produce helpful debug messages. 918 919 Returns: 920 The list of the produced syntax trees. 921 """ 922 return self._parse( 923 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 924 ) 925 926 def parse_into( 927 self, 928 expression_types: exp.IntoType, 929 raw_tokens: t.List[Token], 930 sql: t.Optional[str] = None, 931 ) -> t.List[t.Optional[exp.Expression]]: 932 """ 933 Parses a list of tokens into a given Expression type. If a collection of Expression 934 types is given instead, this method will try to parse the token list into each one 935 of them, stopping at the first for which the parsing succeeds. 936 937 Args: 938 expression_types: The expression type(s) to try and parse the token list into. 939 raw_tokens: The list of tokens. 940 sql: The original SQL string, used to produce helpful debug messages. 941 942 Returns: 943 The target Expression. 944 """ 945 errors = [] 946 for expression_type in ensure_list(expression_types): 947 parser = self.EXPRESSION_PARSERS.get(expression_type) 948 if not parser: 949 raise TypeError(f"No parser registered for {expression_type}") 950 951 try: 952 return self._parse(parser, raw_tokens, sql) 953 except ParseError as e: 954 e.errors[0]["into_expression"] = expression_type 955 errors.append(e) 956 957 raise ParseError( 958 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 959 errors=merge_errors(errors), 960 ) from errors[-1] 961 962 def _parse( 963 self, 964 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 965 raw_tokens: t.List[Token], 966 sql: t.Optional[str] = None, 967 ) -> t.List[t.Optional[exp.Expression]]: 968 self.reset() 969 self.sql = sql or "" 970 971 total = len(raw_tokens) 972 chunks: t.List[t.List[Token]] = [[]] 973 974 for i, token in enumerate(raw_tokens): 975 if token.token_type == TokenType.SEMICOLON: 976 if i < total - 1: 977 chunks.append([]) 978 else: 979 chunks[-1].append(token) 980 981 expressions = [] 982 983 for tokens in chunks: 984 self._index = -1 985 self._tokens = tokens 986 self._advance() 987 988 expressions.append(parse_method(self)) 989 990 if self._index < len(self._tokens): 991 self.raise_error("Invalid expression / Unexpected token") 992 993 self.check_errors() 994 995 return expressions 996 997 def check_errors(self) -> None: 998 """Logs or raises any found errors, depending on the chosen error level setting.""" 999 if self.error_level == ErrorLevel.WARN: 1000 for error in self.errors: 1001 logger.error(str(error)) 1002 elif self.error_level == ErrorLevel.RAISE and self.errors: 1003 raise ParseError( 1004 concat_messages(self.errors, self.max_errors), 1005 errors=merge_errors(self.errors), 1006 ) 1007 1008 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1009 """ 1010 Appends an error in the list of recorded errors or raises it, depending on the chosen 1011 error level setting. 1012 """ 1013 token = token or self._curr or self._prev or Token.string("") 1014 start = token.start 1015 end = token.end + 1 1016 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1017 highlight = self.sql[start:end] 1018 end_context = self.sql[end : end + self.error_message_context] 1019 1020 error = ParseError.new( 1021 f"{message}. Line {token.line}, Col: {token.col}.\n" 1022 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1023 description=message, 1024 line=token.line, 1025 col=token.col, 1026 start_context=start_context, 1027 highlight=highlight, 1028 end_context=end_context, 1029 ) 1030 1031 if self.error_level == ErrorLevel.IMMEDIATE: 1032 raise error 1033 1034 self.errors.append(error) 1035 1036 def expression( 1037 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1038 ) -> E: 1039 """ 1040 Creates a new, validated Expression. 1041 1042 Args: 1043 exp_class: The expression class to instantiate. 1044 comments: An optional list of comments to attach to the expression. 1045 kwargs: The arguments to set for the expression along with their respective values. 1046 1047 Returns: 1048 The target expression. 1049 """ 1050 instance = exp_class(**kwargs) 1051 instance.add_comments(comments) if comments else self._add_comments(instance) 1052 return self.validate_expression(instance) 1053 1054 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1055 if expression and self._prev_comments: 1056 expression.add_comments(self._prev_comments) 1057 self._prev_comments = None 1058 1059 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1060 """ 1061 Validates an Expression, making sure that all its mandatory arguments are set. 1062 1063 Args: 1064 expression: The expression to validate. 1065 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1066 1067 Returns: 1068 The validated expression. 1069 """ 1070 if self.error_level != ErrorLevel.IGNORE: 1071 for error_message in expression.error_messages(args): 1072 self.raise_error(error_message) 1073 1074 return expression 1075 1076 def _find_sql(self, start: Token, end: Token) -> str: 1077 return self.sql[start.start : end.end + 1] 1078 1079 def _advance(self, times: int = 1) -> None: 1080 self._index += times 1081 self._curr = seq_get(self._tokens, self._index) 1082 self._next = seq_get(self._tokens, self._index + 1) 1083 1084 if self._index > 0: 1085 self._prev = self._tokens[self._index - 1] 1086 self._prev_comments = self._prev.comments 1087 else: 1088 self._prev = None 1089 self._prev_comments = None 1090 1091 def _retreat(self, index: int) -> None: 1092 if index != self._index: 1093 self._advance(index - self._index) 1094 1095 def _parse_command(self) -> exp.Command: 1096 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1097 1098 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1099 start = self._prev 1100 exists = self._parse_exists() if allow_exists else None 1101 1102 self._match(TokenType.ON) 1103 1104 kind = self._match_set(self.CREATABLES) and self._prev 1105 if not kind: 1106 return self._parse_as_command(start) 1107 1108 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1109 this = self._parse_user_defined_function(kind=kind.token_type) 1110 elif kind.token_type == TokenType.TABLE: 1111 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1112 elif kind.token_type == TokenType.COLUMN: 1113 this = self._parse_column() 1114 else: 1115 this = self._parse_id_var() 1116 1117 self._match(TokenType.IS) 1118 1119 return self.expression( 1120 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1121 ) 1122 1123 def _parse_to_table( 1124 self, 1125 ) -> exp.ToTableProperty: 1126 table = self._parse_table_parts(schema=True) 1127 return self.expression(exp.ToTableProperty, this=table) 1128 1129 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1130 def _parse_ttl(self) -> exp.Expression: 1131 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1132 this = self._parse_bitwise() 1133 1134 if self._match_text_seq("DELETE"): 1135 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1136 if self._match_text_seq("RECOMPRESS"): 1137 return self.expression( 1138 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1139 ) 1140 if self._match_text_seq("TO", "DISK"): 1141 return self.expression( 1142 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1143 ) 1144 if self._match_text_seq("TO", "VOLUME"): 1145 return self.expression( 1146 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1147 ) 1148 1149 return this 1150 1151 expressions = self._parse_csv(_parse_ttl_action) 1152 where = self._parse_where() 1153 group = self._parse_group() 1154 1155 aggregates = None 1156 if group and self._match(TokenType.SET): 1157 aggregates = self._parse_csv(self._parse_set_item) 1158 1159 return self.expression( 1160 exp.MergeTreeTTL, 1161 expressions=expressions, 1162 where=where, 1163 group=group, 1164 aggregates=aggregates, 1165 ) 1166 1167 def _parse_statement(self) -> t.Optional[exp.Expression]: 1168 if self._curr is None: 1169 return None 1170 1171 if self._match_set(self.STATEMENT_PARSERS): 1172 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1173 1174 if self._match_set(Tokenizer.COMMANDS): 1175 return self._parse_command() 1176 1177 expression = self._parse_expression() 1178 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1179 return self._parse_query_modifiers(expression) 1180 1181 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1182 start = self._prev 1183 temporary = self._match(TokenType.TEMPORARY) 1184 materialized = self._match_text_seq("MATERIALIZED") 1185 1186 kind = self._match_set(self.CREATABLES) and self._prev.text 1187 if not kind: 1188 return self._parse_as_command(start) 1189 1190 return self.expression( 1191 exp.Drop, 1192 comments=start.comments, 1193 exists=exists or self._parse_exists(), 1194 this=self._parse_table(schema=True), 1195 kind=kind, 1196 temporary=temporary, 1197 materialized=materialized, 1198 cascade=self._match_text_seq("CASCADE"), 1199 constraints=self._match_text_seq("CONSTRAINTS"), 1200 purge=self._match_text_seq("PURGE"), 1201 ) 1202 1203 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1204 return ( 1205 self._match_text_seq("IF") 1206 and (not not_ or self._match(TokenType.NOT)) 1207 and self._match(TokenType.EXISTS) 1208 ) 1209 1210 def _parse_create(self) -> exp.Create | exp.Command: 1211 # Note: this can't be None because we've matched a statement parser 1212 start = self._prev 1213 comments = self._prev_comments 1214 1215 replace = start.text.upper() == "REPLACE" or self._match_pair( 1216 TokenType.OR, TokenType.REPLACE 1217 ) 1218 unique = self._match(TokenType.UNIQUE) 1219 1220 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1221 self._advance() 1222 1223 properties = None 1224 create_token = self._match_set(self.CREATABLES) and self._prev 1225 1226 if not create_token: 1227 # exp.Properties.Location.POST_CREATE 1228 properties = self._parse_properties() 1229 create_token = self._match_set(self.CREATABLES) and self._prev 1230 1231 if not properties or not create_token: 1232 return self._parse_as_command(start) 1233 1234 exists = self._parse_exists(not_=True) 1235 this = None 1236 expression: t.Optional[exp.Expression] = None 1237 indexes = None 1238 no_schema_binding = None 1239 begin = None 1240 clone = None 1241 1242 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1243 nonlocal properties 1244 if properties and temp_props: 1245 properties.expressions.extend(temp_props.expressions) 1246 elif temp_props: 1247 properties = temp_props 1248 1249 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1250 this = self._parse_user_defined_function(kind=create_token.token_type) 1251 1252 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1253 extend_props(self._parse_properties()) 1254 1255 self._match(TokenType.ALIAS) 1256 1257 if self._match(TokenType.COMMAND): 1258 expression = self._parse_as_command(self._prev) 1259 else: 1260 begin = self._match(TokenType.BEGIN) 1261 return_ = self._match_text_seq("RETURN") 1262 expression = self._parse_statement() 1263 1264 if return_: 1265 expression = self.expression(exp.Return, this=expression) 1266 elif create_token.token_type == TokenType.INDEX: 1267 this = self._parse_index(index=self._parse_id_var()) 1268 elif create_token.token_type in self.DB_CREATABLES: 1269 table_parts = self._parse_table_parts(schema=True) 1270 1271 # exp.Properties.Location.POST_NAME 1272 self._match(TokenType.COMMA) 1273 extend_props(self._parse_properties(before=True)) 1274 1275 this = self._parse_schema(this=table_parts) 1276 1277 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1278 extend_props(self._parse_properties()) 1279 1280 self._match(TokenType.ALIAS) 1281 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1282 # exp.Properties.Location.POST_ALIAS 1283 extend_props(self._parse_properties()) 1284 1285 expression = self._parse_ddl_select() 1286 1287 if create_token.token_type == TokenType.TABLE: 1288 # exp.Properties.Location.POST_EXPRESSION 1289 extend_props(self._parse_properties()) 1290 1291 indexes = [] 1292 while True: 1293 index = self._parse_index() 1294 1295 # exp.Properties.Location.POST_INDEX 1296 extend_props(self._parse_properties()) 1297 1298 if not index: 1299 break 1300 else: 1301 self._match(TokenType.COMMA) 1302 indexes.append(index) 1303 elif create_token.token_type == TokenType.VIEW: 1304 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1305 no_schema_binding = True 1306 1307 if self._match_text_seq("CLONE"): 1308 clone = self._parse_table(schema=True) 1309 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1310 clone_kind = ( 1311 self._match(TokenType.L_PAREN) 1312 and self._match_texts(self.CLONE_KINDS) 1313 and self._prev.text.upper() 1314 ) 1315 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1316 self._match(TokenType.R_PAREN) 1317 clone = self.expression( 1318 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1319 ) 1320 1321 return self.expression( 1322 exp.Create, 1323 comments=comments, 1324 this=this, 1325 kind=create_token.text, 1326 replace=replace, 1327 unique=unique, 1328 expression=expression, 1329 exists=exists, 1330 properties=properties, 1331 indexes=indexes, 1332 no_schema_binding=no_schema_binding, 1333 begin=begin, 1334 clone=clone, 1335 ) 1336 1337 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1338 # only used for teradata currently 1339 self._match(TokenType.COMMA) 1340 1341 kwargs = { 1342 "no": self._match_text_seq("NO"), 1343 "dual": self._match_text_seq("DUAL"), 1344 "before": self._match_text_seq("BEFORE"), 1345 "default": self._match_text_seq("DEFAULT"), 1346 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1347 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1348 "after": self._match_text_seq("AFTER"), 1349 "minimum": self._match_texts(("MIN", "MINIMUM")), 1350 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1351 } 1352 1353 if self._match_texts(self.PROPERTY_PARSERS): 1354 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1355 try: 1356 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1357 except TypeError: 1358 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1359 1360 return None 1361 1362 def _parse_property(self) -> t.Optional[exp.Expression]: 1363 if self._match_texts(self.PROPERTY_PARSERS): 1364 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1365 1366 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1367 return self._parse_character_set(default=True) 1368 1369 if self._match_text_seq("COMPOUND", "SORTKEY"): 1370 return self._parse_sortkey(compound=True) 1371 1372 if self._match_text_seq("SQL", "SECURITY"): 1373 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1374 1375 assignment = self._match_pair( 1376 TokenType.VAR, TokenType.EQ, advance=False 1377 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1378 1379 if assignment: 1380 key = self._parse_var_or_string() 1381 self._match(TokenType.EQ) 1382 return self.expression( 1383 exp.Property, 1384 this=key, 1385 value=self._parse_column() or self._parse_var(any_token=True), 1386 ) 1387 1388 return None 1389 1390 def _parse_stored(self) -> exp.FileFormatProperty: 1391 self._match(TokenType.ALIAS) 1392 1393 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1394 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1395 1396 return self.expression( 1397 exp.FileFormatProperty, 1398 this=self.expression( 1399 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1400 ) 1401 if input_format or output_format 1402 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1403 ) 1404 1405 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1406 self._match(TokenType.EQ) 1407 self._match(TokenType.ALIAS) 1408 return self.expression(exp_class, this=self._parse_field()) 1409 1410 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1411 properties = [] 1412 while True: 1413 if before: 1414 prop = self._parse_property_before() 1415 else: 1416 prop = self._parse_property() 1417 1418 if not prop: 1419 break 1420 for p in ensure_list(prop): 1421 properties.append(p) 1422 1423 if properties: 1424 return self.expression(exp.Properties, expressions=properties) 1425 1426 return None 1427 1428 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1429 return self.expression( 1430 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1431 ) 1432 1433 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1434 if self._index >= 2: 1435 pre_volatile_token = self._tokens[self._index - 2] 1436 else: 1437 pre_volatile_token = None 1438 1439 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1440 return exp.VolatileProperty() 1441 1442 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1443 1444 def _parse_with_property( 1445 self, 1446 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1447 if self._match(TokenType.L_PAREN, advance=False): 1448 return self._parse_wrapped_csv(self._parse_property) 1449 1450 if self._match_text_seq("JOURNAL"): 1451 return self._parse_withjournaltable() 1452 1453 if self._match_text_seq("DATA"): 1454 return self._parse_withdata(no=False) 1455 elif self._match_text_seq("NO", "DATA"): 1456 return self._parse_withdata(no=True) 1457 1458 if not self._next: 1459 return None 1460 1461 return self._parse_withisolatedloading() 1462 1463 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1464 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1465 self._match(TokenType.EQ) 1466 1467 user = self._parse_id_var() 1468 self._match(TokenType.PARAMETER) 1469 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1470 1471 if not user or not host: 1472 return None 1473 1474 return exp.DefinerProperty(this=f"{user}@{host}") 1475 1476 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1477 self._match(TokenType.TABLE) 1478 self._match(TokenType.EQ) 1479 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1480 1481 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1482 return self.expression(exp.LogProperty, no=no) 1483 1484 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1485 return self.expression(exp.JournalProperty, **kwargs) 1486 1487 def _parse_checksum(self) -> exp.ChecksumProperty: 1488 self._match(TokenType.EQ) 1489 1490 on = None 1491 if self._match(TokenType.ON): 1492 on = True 1493 elif self._match_text_seq("OFF"): 1494 on = False 1495 1496 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1497 1498 def _parse_cluster(self) -> exp.Cluster: 1499 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1500 1501 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1502 self._match_text_seq("BY") 1503 1504 self._match_l_paren() 1505 expressions = self._parse_csv(self._parse_column) 1506 self._match_r_paren() 1507 1508 if self._match_text_seq("SORTED", "BY"): 1509 self._match_l_paren() 1510 sorted_by = self._parse_csv(self._parse_ordered) 1511 self._match_r_paren() 1512 else: 1513 sorted_by = None 1514 1515 self._match(TokenType.INTO) 1516 buckets = self._parse_number() 1517 self._match_text_seq("BUCKETS") 1518 1519 return self.expression( 1520 exp.ClusteredByProperty, 1521 expressions=expressions, 1522 sorted_by=sorted_by, 1523 buckets=buckets, 1524 ) 1525 1526 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1527 if not self._match_text_seq("GRANTS"): 1528 self._retreat(self._index - 1) 1529 return None 1530 1531 return self.expression(exp.CopyGrantsProperty) 1532 1533 def _parse_freespace(self) -> exp.FreespaceProperty: 1534 self._match(TokenType.EQ) 1535 return self.expression( 1536 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1537 ) 1538 1539 def _parse_mergeblockratio( 1540 self, no: bool = False, default: bool = False 1541 ) -> exp.MergeBlockRatioProperty: 1542 if self._match(TokenType.EQ): 1543 return self.expression( 1544 exp.MergeBlockRatioProperty, 1545 this=self._parse_number(), 1546 percent=self._match(TokenType.PERCENT), 1547 ) 1548 1549 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1550 1551 def _parse_datablocksize( 1552 self, 1553 default: t.Optional[bool] = None, 1554 minimum: t.Optional[bool] = None, 1555 maximum: t.Optional[bool] = None, 1556 ) -> exp.DataBlocksizeProperty: 1557 self._match(TokenType.EQ) 1558 size = self._parse_number() 1559 1560 units = None 1561 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1562 units = self._prev.text 1563 1564 return self.expression( 1565 exp.DataBlocksizeProperty, 1566 size=size, 1567 units=units, 1568 default=default, 1569 minimum=minimum, 1570 maximum=maximum, 1571 ) 1572 1573 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1574 self._match(TokenType.EQ) 1575 always = self._match_text_seq("ALWAYS") 1576 manual = self._match_text_seq("MANUAL") 1577 never = self._match_text_seq("NEVER") 1578 default = self._match_text_seq("DEFAULT") 1579 1580 autotemp = None 1581 if self._match_text_seq("AUTOTEMP"): 1582 autotemp = self._parse_schema() 1583 1584 return self.expression( 1585 exp.BlockCompressionProperty, 1586 always=always, 1587 manual=manual, 1588 never=never, 1589 default=default, 1590 autotemp=autotemp, 1591 ) 1592 1593 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1594 no = self._match_text_seq("NO") 1595 concurrent = self._match_text_seq("CONCURRENT") 1596 self._match_text_seq("ISOLATED", "LOADING") 1597 for_all = self._match_text_seq("FOR", "ALL") 1598 for_insert = self._match_text_seq("FOR", "INSERT") 1599 for_none = self._match_text_seq("FOR", "NONE") 1600 return self.expression( 1601 exp.IsolatedLoadingProperty, 1602 no=no, 1603 concurrent=concurrent, 1604 for_all=for_all, 1605 for_insert=for_insert, 1606 for_none=for_none, 1607 ) 1608 1609 def _parse_locking(self) -> exp.LockingProperty: 1610 if self._match(TokenType.TABLE): 1611 kind = "TABLE" 1612 elif self._match(TokenType.VIEW): 1613 kind = "VIEW" 1614 elif self._match(TokenType.ROW): 1615 kind = "ROW" 1616 elif self._match_text_seq("DATABASE"): 1617 kind = "DATABASE" 1618 else: 1619 kind = None 1620 1621 if kind in ("DATABASE", "TABLE", "VIEW"): 1622 this = self._parse_table_parts() 1623 else: 1624 this = None 1625 1626 if self._match(TokenType.FOR): 1627 for_or_in = "FOR" 1628 elif self._match(TokenType.IN): 1629 for_or_in = "IN" 1630 else: 1631 for_or_in = None 1632 1633 if self._match_text_seq("ACCESS"): 1634 lock_type = "ACCESS" 1635 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1636 lock_type = "EXCLUSIVE" 1637 elif self._match_text_seq("SHARE"): 1638 lock_type = "SHARE" 1639 elif self._match_text_seq("READ"): 1640 lock_type = "READ" 1641 elif self._match_text_seq("WRITE"): 1642 lock_type = "WRITE" 1643 elif self._match_text_seq("CHECKSUM"): 1644 lock_type = "CHECKSUM" 1645 else: 1646 lock_type = None 1647 1648 override = self._match_text_seq("OVERRIDE") 1649 1650 return self.expression( 1651 exp.LockingProperty, 1652 this=this, 1653 kind=kind, 1654 for_or_in=for_or_in, 1655 lock_type=lock_type, 1656 override=override, 1657 ) 1658 1659 def _parse_partition_by(self) -> t.List[exp.Expression]: 1660 if self._match(TokenType.PARTITION_BY): 1661 return self._parse_csv(self._parse_conjunction) 1662 return [] 1663 1664 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1665 self._match(TokenType.EQ) 1666 return self.expression( 1667 exp.PartitionedByProperty, 1668 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1669 ) 1670 1671 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1672 if self._match_text_seq("AND", "STATISTICS"): 1673 statistics = True 1674 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1675 statistics = False 1676 else: 1677 statistics = None 1678 1679 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1680 1681 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1682 if self._match_text_seq("PRIMARY", "INDEX"): 1683 return exp.NoPrimaryIndexProperty() 1684 return None 1685 1686 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1687 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1688 return exp.OnCommitProperty() 1689 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1690 return exp.OnCommitProperty(delete=True) 1691 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1692 1693 def _parse_distkey(self) -> exp.DistKeyProperty: 1694 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1695 1696 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1697 table = self._parse_table(schema=True) 1698 1699 options = [] 1700 while self._match_texts(("INCLUDING", "EXCLUDING")): 1701 this = self._prev.text.upper() 1702 1703 id_var = self._parse_id_var() 1704 if not id_var: 1705 return None 1706 1707 options.append( 1708 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1709 ) 1710 1711 return self.expression(exp.LikeProperty, this=table, expressions=options) 1712 1713 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1714 return self.expression( 1715 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1716 ) 1717 1718 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1719 self._match(TokenType.EQ) 1720 return self.expression( 1721 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1722 ) 1723 1724 def _parse_returns(self) -> exp.ReturnsProperty: 1725 value: t.Optional[exp.Expression] 1726 is_table = self._match(TokenType.TABLE) 1727 1728 if is_table: 1729 if self._match(TokenType.LT): 1730 value = self.expression( 1731 exp.Schema, 1732 this="TABLE", 1733 expressions=self._parse_csv(self._parse_struct_types), 1734 ) 1735 if not self._match(TokenType.GT): 1736 self.raise_error("Expecting >") 1737 else: 1738 value = self._parse_schema(exp.var("TABLE")) 1739 else: 1740 value = self._parse_types() 1741 1742 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1743 1744 def _parse_describe(self) -> exp.Describe: 1745 kind = self._match_set(self.CREATABLES) and self._prev.text 1746 this = self._parse_table() 1747 return self.expression(exp.Describe, this=this, kind=kind) 1748 1749 def _parse_insert(self) -> exp.Insert: 1750 comments = ensure_list(self._prev_comments) 1751 overwrite = self._match(TokenType.OVERWRITE) 1752 ignore = self._match(TokenType.IGNORE) 1753 local = self._match_text_seq("LOCAL") 1754 alternative = None 1755 1756 if self._match_text_seq("DIRECTORY"): 1757 this: t.Optional[exp.Expression] = self.expression( 1758 exp.Directory, 1759 this=self._parse_var_or_string(), 1760 local=local, 1761 row_format=self._parse_row_format(match_row=True), 1762 ) 1763 else: 1764 if self._match(TokenType.OR): 1765 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1766 1767 self._match(TokenType.INTO) 1768 comments += ensure_list(self._prev_comments) 1769 self._match(TokenType.TABLE) 1770 this = self._parse_table(schema=True) 1771 1772 returning = self._parse_returning() 1773 1774 return self.expression( 1775 exp.Insert, 1776 comments=comments, 1777 this=this, 1778 by_name=self._match_text_seq("BY", "NAME"), 1779 exists=self._parse_exists(), 1780 partition=self._parse_partition(), 1781 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1782 and self._parse_conjunction(), 1783 expression=self._parse_ddl_select(), 1784 conflict=self._parse_on_conflict(), 1785 returning=returning or self._parse_returning(), 1786 overwrite=overwrite, 1787 alternative=alternative, 1788 ignore=ignore, 1789 ) 1790 1791 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1792 conflict = self._match_text_seq("ON", "CONFLICT") 1793 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1794 1795 if not conflict and not duplicate: 1796 return None 1797 1798 nothing = None 1799 expressions = None 1800 key = None 1801 constraint = None 1802 1803 if conflict: 1804 if self._match_text_seq("ON", "CONSTRAINT"): 1805 constraint = self._parse_id_var() 1806 else: 1807 key = self._parse_csv(self._parse_value) 1808 1809 self._match_text_seq("DO") 1810 if self._match_text_seq("NOTHING"): 1811 nothing = True 1812 else: 1813 self._match(TokenType.UPDATE) 1814 self._match(TokenType.SET) 1815 expressions = self._parse_csv(self._parse_equality) 1816 1817 return self.expression( 1818 exp.OnConflict, 1819 duplicate=duplicate, 1820 expressions=expressions, 1821 nothing=nothing, 1822 key=key, 1823 constraint=constraint, 1824 ) 1825 1826 def _parse_returning(self) -> t.Optional[exp.Returning]: 1827 if not self._match(TokenType.RETURNING): 1828 return None 1829 return self.expression( 1830 exp.Returning, 1831 expressions=self._parse_csv(self._parse_expression), 1832 into=self._match(TokenType.INTO) and self._parse_table_part(), 1833 ) 1834 1835 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1836 if not self._match(TokenType.FORMAT): 1837 return None 1838 return self._parse_row_format() 1839 1840 def _parse_row_format( 1841 self, match_row: bool = False 1842 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1843 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1844 return None 1845 1846 if self._match_text_seq("SERDE"): 1847 this = self._parse_string() 1848 1849 serde_properties = None 1850 if self._match(TokenType.SERDE_PROPERTIES): 1851 serde_properties = self.expression( 1852 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1853 ) 1854 1855 return self.expression( 1856 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1857 ) 1858 1859 self._match_text_seq("DELIMITED") 1860 1861 kwargs = {} 1862 1863 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1864 kwargs["fields"] = self._parse_string() 1865 if self._match_text_seq("ESCAPED", "BY"): 1866 kwargs["escaped"] = self._parse_string() 1867 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1868 kwargs["collection_items"] = self._parse_string() 1869 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1870 kwargs["map_keys"] = self._parse_string() 1871 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1872 kwargs["lines"] = self._parse_string() 1873 if self._match_text_seq("NULL", "DEFINED", "AS"): 1874 kwargs["null"] = self._parse_string() 1875 1876 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1877 1878 def _parse_load(self) -> exp.LoadData | exp.Command: 1879 if self._match_text_seq("DATA"): 1880 local = self._match_text_seq("LOCAL") 1881 self._match_text_seq("INPATH") 1882 inpath = self._parse_string() 1883 overwrite = self._match(TokenType.OVERWRITE) 1884 self._match_pair(TokenType.INTO, TokenType.TABLE) 1885 1886 return self.expression( 1887 exp.LoadData, 1888 this=self._parse_table(schema=True), 1889 local=local, 1890 overwrite=overwrite, 1891 inpath=inpath, 1892 partition=self._parse_partition(), 1893 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1894 serde=self._match_text_seq("SERDE") and self._parse_string(), 1895 ) 1896 return self._parse_as_command(self._prev) 1897 1898 def _parse_delete(self) -> exp.Delete: 1899 # This handles MySQL's "Multiple-Table Syntax" 1900 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1901 tables = None 1902 comments = self._prev_comments 1903 if not self._match(TokenType.FROM, advance=False): 1904 tables = self._parse_csv(self._parse_table) or None 1905 1906 returning = self._parse_returning() 1907 1908 return self.expression( 1909 exp.Delete, 1910 comments=comments, 1911 tables=tables, 1912 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1913 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1914 where=self._parse_where(), 1915 returning=returning or self._parse_returning(), 1916 limit=self._parse_limit(), 1917 ) 1918 1919 def _parse_update(self) -> exp.Update: 1920 comments = self._prev_comments 1921 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1922 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1923 returning = self._parse_returning() 1924 return self.expression( 1925 exp.Update, 1926 comments=comments, 1927 **{ # type: ignore 1928 "this": this, 1929 "expressions": expressions, 1930 "from": self._parse_from(joins=True), 1931 "where": self._parse_where(), 1932 "returning": returning or self._parse_returning(), 1933 "limit": self._parse_limit(), 1934 }, 1935 ) 1936 1937 def _parse_uncache(self) -> exp.Uncache: 1938 if not self._match(TokenType.TABLE): 1939 self.raise_error("Expecting TABLE after UNCACHE") 1940 1941 return self.expression( 1942 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1943 ) 1944 1945 def _parse_cache(self) -> exp.Cache: 1946 lazy = self._match_text_seq("LAZY") 1947 self._match(TokenType.TABLE) 1948 table = self._parse_table(schema=True) 1949 1950 options = [] 1951 if self._match_text_seq("OPTIONS"): 1952 self._match_l_paren() 1953 k = self._parse_string() 1954 self._match(TokenType.EQ) 1955 v = self._parse_string() 1956 options = [k, v] 1957 self._match_r_paren() 1958 1959 self._match(TokenType.ALIAS) 1960 return self.expression( 1961 exp.Cache, 1962 this=table, 1963 lazy=lazy, 1964 options=options, 1965 expression=self._parse_select(nested=True), 1966 ) 1967 1968 def _parse_partition(self) -> t.Optional[exp.Partition]: 1969 if not self._match(TokenType.PARTITION): 1970 return None 1971 1972 return self.expression( 1973 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1974 ) 1975 1976 def _parse_value(self) -> exp.Tuple: 1977 if self._match(TokenType.L_PAREN): 1978 expressions = self._parse_csv(self._parse_conjunction) 1979 self._match_r_paren() 1980 return self.expression(exp.Tuple, expressions=expressions) 1981 1982 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1983 # https://prestodb.io/docs/current/sql/values.html 1984 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1985 1986 def _parse_projections(self) -> t.List[exp.Expression]: 1987 return self._parse_expressions() 1988 1989 def _parse_select( 1990 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1991 ) -> t.Optional[exp.Expression]: 1992 cte = self._parse_with() 1993 1994 if cte: 1995 this = self._parse_statement() 1996 1997 if not this: 1998 self.raise_error("Failed to parse any statement following CTE") 1999 return cte 2000 2001 if "with" in this.arg_types: 2002 this.set("with", cte) 2003 else: 2004 self.raise_error(f"{this.key} does not support CTE") 2005 this = cte 2006 2007 return this 2008 2009 # duckdb supports leading with FROM x 2010 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2011 2012 if self._match(TokenType.SELECT): 2013 comments = self._prev_comments 2014 2015 hint = self._parse_hint() 2016 all_ = self._match(TokenType.ALL) 2017 distinct = self._match_set(self.DISTINCT_TOKENS) 2018 2019 kind = ( 2020 self._match(TokenType.ALIAS) 2021 and self._match_texts(("STRUCT", "VALUE")) 2022 and self._prev.text 2023 ) 2024 2025 if distinct: 2026 distinct = self.expression( 2027 exp.Distinct, 2028 on=self._parse_value() if self._match(TokenType.ON) else None, 2029 ) 2030 2031 if all_ and distinct: 2032 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2033 2034 limit = self._parse_limit(top=True) 2035 projections = self._parse_projections() 2036 2037 this = self.expression( 2038 exp.Select, 2039 kind=kind, 2040 hint=hint, 2041 distinct=distinct, 2042 expressions=projections, 2043 limit=limit, 2044 ) 2045 this.comments = comments 2046 2047 into = self._parse_into() 2048 if into: 2049 this.set("into", into) 2050 2051 if not from_: 2052 from_ = self._parse_from() 2053 2054 if from_: 2055 this.set("from", from_) 2056 2057 this = self._parse_query_modifiers(this) 2058 elif (table or nested) and self._match(TokenType.L_PAREN): 2059 if self._match(TokenType.PIVOT): 2060 this = self._parse_simplified_pivot() 2061 elif self._match(TokenType.FROM): 2062 this = exp.select("*").from_( 2063 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2064 ) 2065 else: 2066 this = self._parse_table() if table else self._parse_select(nested=True) 2067 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2068 2069 self._match_r_paren() 2070 2071 # We return early here so that the UNION isn't attached to the subquery by the 2072 # following call to _parse_set_operations, but instead becomes the parent node 2073 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2074 elif self._match(TokenType.VALUES): 2075 this = self.expression( 2076 exp.Values, 2077 expressions=self._parse_csv(self._parse_value), 2078 alias=self._parse_table_alias(), 2079 ) 2080 elif from_: 2081 this = exp.select("*").from_(from_.this, copy=False) 2082 else: 2083 this = None 2084 2085 return self._parse_set_operations(this) 2086 2087 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2088 if not skip_with_token and not self._match(TokenType.WITH): 2089 return None 2090 2091 comments = self._prev_comments 2092 recursive = self._match(TokenType.RECURSIVE) 2093 2094 expressions = [] 2095 while True: 2096 expressions.append(self._parse_cte()) 2097 2098 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2099 break 2100 else: 2101 self._match(TokenType.WITH) 2102 2103 return self.expression( 2104 exp.With, comments=comments, expressions=expressions, recursive=recursive 2105 ) 2106 2107 def _parse_cte(self) -> exp.CTE: 2108 alias = self._parse_table_alias() 2109 if not alias or not alias.this: 2110 self.raise_error("Expected CTE to have alias") 2111 2112 self._match(TokenType.ALIAS) 2113 return self.expression( 2114 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2115 ) 2116 2117 def _parse_table_alias( 2118 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2119 ) -> t.Optional[exp.TableAlias]: 2120 any_token = self._match(TokenType.ALIAS) 2121 alias = ( 2122 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2123 or self._parse_string_as_identifier() 2124 ) 2125 2126 index = self._index 2127 if self._match(TokenType.L_PAREN): 2128 columns = self._parse_csv(self._parse_function_parameter) 2129 self._match_r_paren() if columns else self._retreat(index) 2130 else: 2131 columns = None 2132 2133 if not alias and not columns: 2134 return None 2135 2136 return self.expression(exp.TableAlias, this=alias, columns=columns) 2137 2138 def _parse_subquery( 2139 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2140 ) -> t.Optional[exp.Subquery]: 2141 if not this: 2142 return None 2143 2144 return self.expression( 2145 exp.Subquery, 2146 this=this, 2147 pivots=self._parse_pivots(), 2148 alias=self._parse_table_alias() if parse_alias else None, 2149 ) 2150 2151 def _parse_query_modifiers( 2152 self, this: t.Optional[exp.Expression] 2153 ) -> t.Optional[exp.Expression]: 2154 if isinstance(this, self.MODIFIABLES): 2155 for join in iter(self._parse_join, None): 2156 this.append("joins", join) 2157 for lateral in iter(self._parse_lateral, None): 2158 this.append("laterals", lateral) 2159 2160 while True: 2161 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2162 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2163 key, expression = parser(self) 2164 2165 if expression: 2166 this.set(key, expression) 2167 if key == "limit": 2168 offset = expression.args.pop("offset", None) 2169 if offset: 2170 this.set("offset", exp.Offset(expression=offset)) 2171 continue 2172 break 2173 return this 2174 2175 def _parse_hint(self) -> t.Optional[exp.Hint]: 2176 if self._match(TokenType.HINT): 2177 hints = [] 2178 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2179 hints.extend(hint) 2180 2181 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2182 self.raise_error("Expected */ after HINT") 2183 2184 return self.expression(exp.Hint, expressions=hints) 2185 2186 return None 2187 2188 def _parse_into(self) -> t.Optional[exp.Into]: 2189 if not self._match(TokenType.INTO): 2190 return None 2191 2192 temp = self._match(TokenType.TEMPORARY) 2193 unlogged = self._match_text_seq("UNLOGGED") 2194 self._match(TokenType.TABLE) 2195 2196 return self.expression( 2197 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2198 ) 2199 2200 def _parse_from( 2201 self, joins: bool = False, skip_from_token: bool = False 2202 ) -> t.Optional[exp.From]: 2203 if not skip_from_token and not self._match(TokenType.FROM): 2204 return None 2205 2206 return self.expression( 2207 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2208 ) 2209 2210 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2211 if not self._match(TokenType.MATCH_RECOGNIZE): 2212 return None 2213 2214 self._match_l_paren() 2215 2216 partition = self._parse_partition_by() 2217 order = self._parse_order() 2218 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2219 2220 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2221 rows = exp.var("ONE ROW PER MATCH") 2222 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2223 text = "ALL ROWS PER MATCH" 2224 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2225 text += f" SHOW EMPTY MATCHES" 2226 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2227 text += f" OMIT EMPTY MATCHES" 2228 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2229 text += f" WITH UNMATCHED ROWS" 2230 rows = exp.var(text) 2231 else: 2232 rows = None 2233 2234 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2235 text = "AFTER MATCH SKIP" 2236 if self._match_text_seq("PAST", "LAST", "ROW"): 2237 text += f" PAST LAST ROW" 2238 elif self._match_text_seq("TO", "NEXT", "ROW"): 2239 text += f" TO NEXT ROW" 2240 elif self._match_text_seq("TO", "FIRST"): 2241 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2242 elif self._match_text_seq("TO", "LAST"): 2243 text += f" TO LAST {self._advance_any().text}" # type: ignore 2244 after = exp.var(text) 2245 else: 2246 after = None 2247 2248 if self._match_text_seq("PATTERN"): 2249 self._match_l_paren() 2250 2251 if not self._curr: 2252 self.raise_error("Expecting )", self._curr) 2253 2254 paren = 1 2255 start = self._curr 2256 2257 while self._curr and paren > 0: 2258 if self._curr.token_type == TokenType.L_PAREN: 2259 paren += 1 2260 if self._curr.token_type == TokenType.R_PAREN: 2261 paren -= 1 2262 2263 end = self._prev 2264 self._advance() 2265 2266 if paren > 0: 2267 self.raise_error("Expecting )", self._curr) 2268 2269 pattern = exp.var(self._find_sql(start, end)) 2270 else: 2271 pattern = None 2272 2273 define = ( 2274 self._parse_csv( 2275 lambda: self.expression( 2276 exp.Alias, 2277 alias=self._parse_id_var(any_token=True), 2278 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2279 ) 2280 ) 2281 if self._match_text_seq("DEFINE") 2282 else None 2283 ) 2284 2285 self._match_r_paren() 2286 2287 return self.expression( 2288 exp.MatchRecognize, 2289 partition_by=partition, 2290 order=order, 2291 measures=measures, 2292 rows=rows, 2293 after=after, 2294 pattern=pattern, 2295 define=define, 2296 alias=self._parse_table_alias(), 2297 ) 2298 2299 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2300 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2301 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2302 2303 if outer_apply or cross_apply: 2304 this = self._parse_select(table=True) 2305 view = None 2306 outer = not cross_apply 2307 elif self._match(TokenType.LATERAL): 2308 this = self._parse_select(table=True) 2309 view = self._match(TokenType.VIEW) 2310 outer = self._match(TokenType.OUTER) 2311 else: 2312 return None 2313 2314 if not this: 2315 this = ( 2316 self._parse_unnest() 2317 or self._parse_function() 2318 or self._parse_id_var(any_token=False) 2319 ) 2320 2321 while self._match(TokenType.DOT): 2322 this = exp.Dot( 2323 this=this, 2324 expression=self._parse_function() or self._parse_id_var(any_token=False), 2325 ) 2326 2327 if view: 2328 table = self._parse_id_var(any_token=False) 2329 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2330 table_alias: t.Optional[exp.TableAlias] = self.expression( 2331 exp.TableAlias, this=table, columns=columns 2332 ) 2333 elif isinstance(this, exp.Subquery) and this.alias: 2334 # Ensures parity between the Subquery's and the Lateral's "alias" args 2335 table_alias = this.args["alias"].copy() 2336 else: 2337 table_alias = self._parse_table_alias() 2338 2339 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2340 2341 def _parse_join_parts( 2342 self, 2343 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2344 return ( 2345 self._match_set(self.JOIN_METHODS) and self._prev, 2346 self._match_set(self.JOIN_SIDES) and self._prev, 2347 self._match_set(self.JOIN_KINDS) and self._prev, 2348 ) 2349 2350 def _parse_join( 2351 self, skip_join_token: bool = False, parse_bracket: bool = False 2352 ) -> t.Optional[exp.Join]: 2353 if self._match(TokenType.COMMA): 2354 return self.expression(exp.Join, this=self._parse_table()) 2355 2356 index = self._index 2357 method, side, kind = self._parse_join_parts() 2358 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2359 join = self._match(TokenType.JOIN) 2360 2361 if not skip_join_token and not join: 2362 self._retreat(index) 2363 kind = None 2364 method = None 2365 side = None 2366 2367 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2368 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2369 2370 if not skip_join_token and not join and not outer_apply and not cross_apply: 2371 return None 2372 2373 if outer_apply: 2374 side = Token(TokenType.LEFT, "LEFT") 2375 2376 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2377 2378 if method: 2379 kwargs["method"] = method.text 2380 if side: 2381 kwargs["side"] = side.text 2382 if kind: 2383 kwargs["kind"] = kind.text 2384 if hint: 2385 kwargs["hint"] = hint 2386 2387 if self._match(TokenType.ON): 2388 kwargs["on"] = self._parse_conjunction() 2389 elif self._match(TokenType.USING): 2390 kwargs["using"] = self._parse_wrapped_id_vars() 2391 elif not (kind and kind.token_type == TokenType.CROSS): 2392 index = self._index 2393 joins = self._parse_joins() 2394 2395 if joins and self._match(TokenType.ON): 2396 kwargs["on"] = self._parse_conjunction() 2397 elif joins and self._match(TokenType.USING): 2398 kwargs["using"] = self._parse_wrapped_id_vars() 2399 else: 2400 joins = None 2401 self._retreat(index) 2402 2403 kwargs["this"].set("joins", joins) 2404 2405 comments = [c for token in (method, side, kind) if token for c in token.comments] 2406 return self.expression(exp.Join, comments=comments, **kwargs) 2407 2408 def _parse_index( 2409 self, 2410 index: t.Optional[exp.Expression] = None, 2411 ) -> t.Optional[exp.Index]: 2412 if index: 2413 unique = None 2414 primary = None 2415 amp = None 2416 2417 self._match(TokenType.ON) 2418 self._match(TokenType.TABLE) # hive 2419 table = self._parse_table_parts(schema=True) 2420 else: 2421 unique = self._match(TokenType.UNIQUE) 2422 primary = self._match_text_seq("PRIMARY") 2423 amp = self._match_text_seq("AMP") 2424 2425 if not self._match(TokenType.INDEX): 2426 return None 2427 2428 index = self._parse_id_var() 2429 table = None 2430 2431 using = self._parse_field() if self._match(TokenType.USING) else None 2432 2433 if self._match(TokenType.L_PAREN, advance=False): 2434 columns = self._parse_wrapped_csv(self._parse_ordered) 2435 else: 2436 columns = None 2437 2438 return self.expression( 2439 exp.Index, 2440 this=index, 2441 table=table, 2442 using=using, 2443 columns=columns, 2444 unique=unique, 2445 primary=primary, 2446 amp=amp, 2447 partition_by=self._parse_partition_by(), 2448 ) 2449 2450 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2451 hints: t.List[exp.Expression] = [] 2452 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2453 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2454 hints.append( 2455 self.expression( 2456 exp.WithTableHint, 2457 expressions=self._parse_csv( 2458 lambda: self._parse_function() or self._parse_var(any_token=True) 2459 ), 2460 ) 2461 ) 2462 self._match_r_paren() 2463 else: 2464 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2465 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2466 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2467 2468 self._match_texts({"INDEX", "KEY"}) 2469 if self._match(TokenType.FOR): 2470 hint.set("target", self._advance_any() and self._prev.text.upper()) 2471 2472 hint.set("expressions", self._parse_wrapped_id_vars()) 2473 hints.append(hint) 2474 2475 return hints or None 2476 2477 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2478 return ( 2479 (not schema and self._parse_function(optional_parens=False)) 2480 or self._parse_id_var(any_token=False) 2481 or self._parse_string_as_identifier() 2482 or self._parse_placeholder() 2483 ) 2484 2485 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2486 catalog = None 2487 db = None 2488 table = self._parse_table_part(schema=schema) 2489 2490 while self._match(TokenType.DOT): 2491 if catalog: 2492 # This allows nesting the table in arbitrarily many dot expressions if needed 2493 table = self.expression( 2494 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2495 ) 2496 else: 2497 catalog = db 2498 db = table 2499 table = self._parse_table_part(schema=schema) 2500 2501 if not table: 2502 self.raise_error(f"Expected table name but got {self._curr}") 2503 2504 return self.expression( 2505 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2506 ) 2507 2508 def _parse_table( 2509 self, 2510 schema: bool = False, 2511 joins: bool = False, 2512 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2513 parse_bracket: bool = False, 2514 ) -> t.Optional[exp.Expression]: 2515 lateral = self._parse_lateral() 2516 if lateral: 2517 return lateral 2518 2519 unnest = self._parse_unnest() 2520 if unnest: 2521 return unnest 2522 2523 values = self._parse_derived_table_values() 2524 if values: 2525 return values 2526 2527 subquery = self._parse_select(table=True) 2528 if subquery: 2529 if not subquery.args.get("pivots"): 2530 subquery.set("pivots", self._parse_pivots()) 2531 return subquery 2532 2533 bracket = parse_bracket and self._parse_bracket(None) 2534 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2535 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2536 2537 if schema: 2538 return self._parse_schema(this=this) 2539 2540 if self.ALIAS_POST_TABLESAMPLE: 2541 table_sample = self._parse_table_sample() 2542 2543 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2544 if alias: 2545 this.set("alias", alias) 2546 2547 if not this.args.get("pivots"): 2548 this.set("pivots", self._parse_pivots()) 2549 2550 this.set("hints", self._parse_table_hints()) 2551 2552 if not self.ALIAS_POST_TABLESAMPLE: 2553 table_sample = self._parse_table_sample() 2554 2555 if table_sample: 2556 table_sample.set("this", this) 2557 this = table_sample 2558 2559 if joins: 2560 for join in iter(self._parse_join, None): 2561 this.append("joins", join) 2562 2563 return this 2564 2565 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2566 if not self._match(TokenType.UNNEST): 2567 return None 2568 2569 expressions = self._parse_wrapped_csv(self._parse_type) 2570 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2571 2572 alias = self._parse_table_alias() if with_alias else None 2573 2574 if alias and self.UNNEST_COLUMN_ONLY: 2575 if alias.args.get("columns"): 2576 self.raise_error("Unexpected extra column alias in unnest.") 2577 2578 alias.set("columns", [alias.this]) 2579 alias.set("this", None) 2580 2581 offset = None 2582 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2583 self._match(TokenType.ALIAS) 2584 offset = self._parse_id_var() or exp.to_identifier("offset") 2585 2586 return self.expression( 2587 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2588 ) 2589 2590 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2591 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2592 if not is_derived and not self._match(TokenType.VALUES): 2593 return None 2594 2595 expressions = self._parse_csv(self._parse_value) 2596 alias = self._parse_table_alias() 2597 2598 if is_derived: 2599 self._match_r_paren() 2600 2601 return self.expression( 2602 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2603 ) 2604 2605 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2606 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2607 as_modifier and self._match_text_seq("USING", "SAMPLE") 2608 ): 2609 return None 2610 2611 bucket_numerator = None 2612 bucket_denominator = None 2613 bucket_field = None 2614 percent = None 2615 rows = None 2616 size = None 2617 seed = None 2618 2619 kind = ( 2620 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2621 ) 2622 method = self._parse_var(tokens=(TokenType.ROW,)) 2623 2624 self._match(TokenType.L_PAREN) 2625 2626 num = self._parse_number() 2627 2628 if self._match_text_seq("BUCKET"): 2629 bucket_numerator = self._parse_number() 2630 self._match_text_seq("OUT", "OF") 2631 bucket_denominator = bucket_denominator = self._parse_number() 2632 self._match(TokenType.ON) 2633 bucket_field = self._parse_field() 2634 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2635 percent = num 2636 elif self._match(TokenType.ROWS): 2637 rows = num 2638 else: 2639 size = num 2640 2641 self._match(TokenType.R_PAREN) 2642 2643 if self._match(TokenType.L_PAREN): 2644 method = self._parse_var() 2645 seed = self._match(TokenType.COMMA) and self._parse_number() 2646 self._match_r_paren() 2647 elif self._match_texts(("SEED", "REPEATABLE")): 2648 seed = self._parse_wrapped(self._parse_number) 2649 2650 return self.expression( 2651 exp.TableSample, 2652 method=method, 2653 bucket_numerator=bucket_numerator, 2654 bucket_denominator=bucket_denominator, 2655 bucket_field=bucket_field, 2656 percent=percent, 2657 rows=rows, 2658 size=size, 2659 seed=seed, 2660 kind=kind, 2661 ) 2662 2663 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2664 return list(iter(self._parse_pivot, None)) or None 2665 2666 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2667 return list(iter(self._parse_join, None)) or None 2668 2669 # https://duckdb.org/docs/sql/statements/pivot 2670 def _parse_simplified_pivot(self) -> exp.Pivot: 2671 def _parse_on() -> t.Optional[exp.Expression]: 2672 this = self._parse_bitwise() 2673 return self._parse_in(this) if self._match(TokenType.IN) else this 2674 2675 this = self._parse_table() 2676 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2677 using = self._match(TokenType.USING) and self._parse_csv( 2678 lambda: self._parse_alias(self._parse_function()) 2679 ) 2680 group = self._parse_group() 2681 return self.expression( 2682 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2683 ) 2684 2685 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2686 index = self._index 2687 include_nulls = None 2688 2689 if self._match(TokenType.PIVOT): 2690 unpivot = False 2691 elif self._match(TokenType.UNPIVOT): 2692 unpivot = True 2693 2694 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2695 if self._match_text_seq("INCLUDE", "NULLS"): 2696 include_nulls = True 2697 elif self._match_text_seq("EXCLUDE", "NULLS"): 2698 include_nulls = False 2699 else: 2700 return None 2701 2702 expressions = [] 2703 field = None 2704 2705 if not self._match(TokenType.L_PAREN): 2706 self._retreat(index) 2707 return None 2708 2709 if unpivot: 2710 expressions = self._parse_csv(self._parse_column) 2711 else: 2712 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2713 2714 if not expressions: 2715 self.raise_error("Failed to parse PIVOT's aggregation list") 2716 2717 if not self._match(TokenType.FOR): 2718 self.raise_error("Expecting FOR") 2719 2720 value = self._parse_column() 2721 2722 if not self._match(TokenType.IN): 2723 self.raise_error("Expecting IN") 2724 2725 field = self._parse_in(value, alias=True) 2726 2727 self._match_r_paren() 2728 2729 pivot = self.expression( 2730 exp.Pivot, 2731 expressions=expressions, 2732 field=field, 2733 unpivot=unpivot, 2734 include_nulls=include_nulls, 2735 ) 2736 2737 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2738 pivot.set("alias", self._parse_table_alias()) 2739 2740 if not unpivot: 2741 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2742 2743 columns: t.List[exp.Expression] = [] 2744 for fld in pivot.args["field"].expressions: 2745 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2746 for name in names: 2747 if self.PREFIXED_PIVOT_COLUMNS: 2748 name = f"{name}_{field_name}" if name else field_name 2749 else: 2750 name = f"{field_name}_{name}" if name else field_name 2751 2752 columns.append(exp.to_identifier(name)) 2753 2754 pivot.set("columns", columns) 2755 2756 return pivot 2757 2758 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2759 return [agg.alias for agg in aggregations] 2760 2761 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2762 if not skip_where_token and not self._match(TokenType.WHERE): 2763 return None 2764 2765 return self.expression( 2766 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2767 ) 2768 2769 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2770 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2771 return None 2772 2773 elements = defaultdict(list) 2774 2775 if self._match(TokenType.ALL): 2776 return self.expression(exp.Group, all=True) 2777 2778 while True: 2779 expressions = self._parse_csv(self._parse_conjunction) 2780 if expressions: 2781 elements["expressions"].extend(expressions) 2782 2783 grouping_sets = self._parse_grouping_sets() 2784 if grouping_sets: 2785 elements["grouping_sets"].extend(grouping_sets) 2786 2787 rollup = None 2788 cube = None 2789 totals = None 2790 2791 with_ = self._match(TokenType.WITH) 2792 if self._match(TokenType.ROLLUP): 2793 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2794 elements["rollup"].extend(ensure_list(rollup)) 2795 2796 if self._match(TokenType.CUBE): 2797 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2798 elements["cube"].extend(ensure_list(cube)) 2799 2800 if self._match_text_seq("TOTALS"): 2801 totals = True 2802 elements["totals"] = True # type: ignore 2803 2804 if not (grouping_sets or rollup or cube or totals): 2805 break 2806 2807 return self.expression(exp.Group, **elements) # type: ignore 2808 2809 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2810 if not self._match(TokenType.GROUPING_SETS): 2811 return None 2812 2813 return self._parse_wrapped_csv(self._parse_grouping_set) 2814 2815 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2816 if self._match(TokenType.L_PAREN): 2817 grouping_set = self._parse_csv(self._parse_column) 2818 self._match_r_paren() 2819 return self.expression(exp.Tuple, expressions=grouping_set) 2820 2821 return self._parse_column() 2822 2823 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2824 if not skip_having_token and not self._match(TokenType.HAVING): 2825 return None 2826 return self.expression(exp.Having, this=self._parse_conjunction()) 2827 2828 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2829 if not self._match(TokenType.QUALIFY): 2830 return None 2831 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2832 2833 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2834 if skip_start_token: 2835 start = None 2836 elif self._match(TokenType.START_WITH): 2837 start = self._parse_conjunction() 2838 else: 2839 return None 2840 2841 self._match(TokenType.CONNECT_BY) 2842 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2843 exp.Prior, this=self._parse_bitwise() 2844 ) 2845 connect = self._parse_conjunction() 2846 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2847 return self.expression(exp.Connect, start=start, connect=connect) 2848 2849 def _parse_order( 2850 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2851 ) -> t.Optional[exp.Expression]: 2852 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2853 return this 2854 2855 return self.expression( 2856 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2857 ) 2858 2859 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2860 if not self._match(token): 2861 return None 2862 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2863 2864 def _parse_ordered(self) -> exp.Ordered: 2865 this = self._parse_conjunction() 2866 self._match(TokenType.ASC) 2867 2868 is_desc = self._match(TokenType.DESC) 2869 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2870 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2871 desc = is_desc or False 2872 asc = not desc 2873 nulls_first = is_nulls_first or False 2874 explicitly_null_ordered = is_nulls_first or is_nulls_last 2875 2876 if ( 2877 not explicitly_null_ordered 2878 and ( 2879 (asc and self.NULL_ORDERING == "nulls_are_small") 2880 or (desc and self.NULL_ORDERING != "nulls_are_small") 2881 ) 2882 and self.NULL_ORDERING != "nulls_are_last" 2883 ): 2884 nulls_first = True 2885 2886 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2887 2888 def _parse_limit( 2889 self, this: t.Optional[exp.Expression] = None, top: bool = False 2890 ) -> t.Optional[exp.Expression]: 2891 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2892 comments = self._prev_comments 2893 if top: 2894 limit_paren = self._match(TokenType.L_PAREN) 2895 expression = self._parse_number() 2896 2897 if limit_paren: 2898 self._match_r_paren() 2899 else: 2900 expression = self._parse_term() 2901 2902 if self._match(TokenType.COMMA): 2903 offset = expression 2904 expression = self._parse_term() 2905 else: 2906 offset = None 2907 2908 limit_exp = self.expression( 2909 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2910 ) 2911 2912 return limit_exp 2913 2914 if self._match(TokenType.FETCH): 2915 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2916 direction = self._prev.text if direction else "FIRST" 2917 2918 count = self._parse_number() 2919 percent = self._match(TokenType.PERCENT) 2920 2921 self._match_set((TokenType.ROW, TokenType.ROWS)) 2922 2923 only = self._match_text_seq("ONLY") 2924 with_ties = self._match_text_seq("WITH", "TIES") 2925 2926 if only and with_ties: 2927 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2928 2929 return self.expression( 2930 exp.Fetch, 2931 direction=direction, 2932 count=count, 2933 percent=percent, 2934 with_ties=with_ties, 2935 ) 2936 2937 return this 2938 2939 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2940 if not self._match(TokenType.OFFSET): 2941 return this 2942 2943 count = self._parse_term() 2944 self._match_set((TokenType.ROW, TokenType.ROWS)) 2945 return self.expression(exp.Offset, this=this, expression=count) 2946 2947 def _parse_locks(self) -> t.List[exp.Lock]: 2948 locks = [] 2949 while True: 2950 if self._match_text_seq("FOR", "UPDATE"): 2951 update = True 2952 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2953 "LOCK", "IN", "SHARE", "MODE" 2954 ): 2955 update = False 2956 else: 2957 break 2958 2959 expressions = None 2960 if self._match_text_seq("OF"): 2961 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2962 2963 wait: t.Optional[bool | exp.Expression] = None 2964 if self._match_text_seq("NOWAIT"): 2965 wait = True 2966 elif self._match_text_seq("WAIT"): 2967 wait = self._parse_primary() 2968 elif self._match_text_seq("SKIP", "LOCKED"): 2969 wait = False 2970 2971 locks.append( 2972 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2973 ) 2974 2975 return locks 2976 2977 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2978 if not self._match_set(self.SET_OPERATIONS): 2979 return this 2980 2981 token_type = self._prev.token_type 2982 2983 if token_type == TokenType.UNION: 2984 expression = exp.Union 2985 elif token_type == TokenType.EXCEPT: 2986 expression = exp.Except 2987 else: 2988 expression = exp.Intersect 2989 2990 return self.expression( 2991 expression, 2992 this=this, 2993 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2994 by_name=self._match_text_seq("BY", "NAME"), 2995 expression=self._parse_set_operations(self._parse_select(nested=True)), 2996 ) 2997 2998 def _parse_expression(self) -> t.Optional[exp.Expression]: 2999 return self._parse_alias(self._parse_conjunction()) 3000 3001 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3002 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3003 3004 def _parse_equality(self) -> t.Optional[exp.Expression]: 3005 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3006 3007 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3008 return self._parse_tokens(self._parse_range, self.COMPARISON) 3009 3010 def _parse_range(self) -> t.Optional[exp.Expression]: 3011 this = self._parse_bitwise() 3012 negate = self._match(TokenType.NOT) 3013 3014 if self._match_set(self.RANGE_PARSERS): 3015 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3016 if not expression: 3017 return this 3018 3019 this = expression 3020 elif self._match(TokenType.ISNULL): 3021 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3022 3023 # Postgres supports ISNULL and NOTNULL for conditions. 3024 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3025 if self._match(TokenType.NOTNULL): 3026 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3027 this = self.expression(exp.Not, this=this) 3028 3029 if negate: 3030 this = self.expression(exp.Not, this=this) 3031 3032 if self._match(TokenType.IS): 3033 this = self._parse_is(this) 3034 3035 return this 3036 3037 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3038 index = self._index - 1 3039 negate = self._match(TokenType.NOT) 3040 3041 if self._match_text_seq("DISTINCT", "FROM"): 3042 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3043 return self.expression(klass, this=this, expression=self._parse_expression()) 3044 3045 expression = self._parse_null() or self._parse_boolean() 3046 if not expression: 3047 self._retreat(index) 3048 return None 3049 3050 this = self.expression(exp.Is, this=this, expression=expression) 3051 return self.expression(exp.Not, this=this) if negate else this 3052 3053 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3054 unnest = self._parse_unnest(with_alias=False) 3055 if unnest: 3056 this = self.expression(exp.In, this=this, unnest=unnest) 3057 elif self._match(TokenType.L_PAREN): 3058 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3059 3060 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3061 this = self.expression(exp.In, this=this, query=expressions[0]) 3062 else: 3063 this = self.expression(exp.In, this=this, expressions=expressions) 3064 3065 self._match_r_paren(this) 3066 else: 3067 this = self.expression(exp.In, this=this, field=self._parse_field()) 3068 3069 return this 3070 3071 def _parse_between(self, this: exp.Expression) -> exp.Between: 3072 low = self._parse_bitwise() 3073 self._match(TokenType.AND) 3074 high = self._parse_bitwise() 3075 return self.expression(exp.Between, this=this, low=low, high=high) 3076 3077 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3078 if not self._match(TokenType.ESCAPE): 3079 return this 3080 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3081 3082 def _parse_interval(self) -> t.Optional[exp.Interval]: 3083 index = self._index 3084 3085 if not self._match(TokenType.INTERVAL): 3086 return None 3087 3088 if self._match(TokenType.STRING, advance=False): 3089 this = self._parse_primary() 3090 else: 3091 this = self._parse_term() 3092 3093 if not this: 3094 self._retreat(index) 3095 return None 3096 3097 unit = self._parse_function() or self._parse_var(any_token=True) 3098 3099 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3100 # each INTERVAL expression into this canonical form so it's easy to transpile 3101 if this and this.is_number: 3102 this = exp.Literal.string(this.name) 3103 elif this and this.is_string: 3104 parts = this.name.split() 3105 3106 if len(parts) == 2: 3107 if unit: 3108 # this is not actually a unit, it's something else 3109 unit = None 3110 self._retreat(self._index - 1) 3111 else: 3112 this = exp.Literal.string(parts[0]) 3113 unit = self.expression(exp.Var, this=parts[1]) 3114 3115 return self.expression(exp.Interval, this=this, unit=unit) 3116 3117 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3118 this = self._parse_term() 3119 3120 while True: 3121 if self._match_set(self.BITWISE): 3122 this = self.expression( 3123 self.BITWISE[self._prev.token_type], 3124 this=this, 3125 expression=self._parse_term(), 3126 ) 3127 elif self._match(TokenType.DQMARK): 3128 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3129 elif self._match_pair(TokenType.LT, TokenType.LT): 3130 this = self.expression( 3131 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3132 ) 3133 elif self._match_pair(TokenType.GT, TokenType.GT): 3134 this = self.expression( 3135 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3136 ) 3137 else: 3138 break 3139 3140 return this 3141 3142 def _parse_term(self) -> t.Optional[exp.Expression]: 3143 return self._parse_tokens(self._parse_factor, self.TERM) 3144 3145 def _parse_factor(self) -> t.Optional[exp.Expression]: 3146 return self._parse_tokens(self._parse_unary, self.FACTOR) 3147 3148 def _parse_unary(self) -> t.Optional[exp.Expression]: 3149 if self._match_set(self.UNARY_PARSERS): 3150 return self.UNARY_PARSERS[self._prev.token_type](self) 3151 return self._parse_at_time_zone(self._parse_type()) 3152 3153 def _parse_type(self) -> t.Optional[exp.Expression]: 3154 interval = self._parse_interval() 3155 if interval: 3156 return interval 3157 3158 index = self._index 3159 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3160 this = self._parse_column() 3161 3162 if data_type: 3163 if isinstance(this, exp.Literal): 3164 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3165 if parser: 3166 return parser(self, this, data_type) 3167 return self.expression(exp.Cast, this=this, to=data_type) 3168 if not data_type.expressions: 3169 self._retreat(index) 3170 return self._parse_column() 3171 return self._parse_column_ops(data_type) 3172 3173 return this 3174 3175 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3176 this = self._parse_type() 3177 if not this: 3178 return None 3179 3180 return self.expression( 3181 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3182 ) 3183 3184 def _parse_types( 3185 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3186 ) -> t.Optional[exp.Expression]: 3187 index = self._index 3188 3189 prefix = self._match_text_seq("SYSUDTLIB", ".") 3190 3191 if not self._match_set(self.TYPE_TOKENS): 3192 identifier = allow_identifiers and self._parse_id_var( 3193 any_token=False, tokens=(TokenType.VAR,) 3194 ) 3195 3196 if identifier: 3197 tokens = self._tokenizer.tokenize(identifier.name) 3198 3199 if len(tokens) != 1: 3200 self.raise_error("Unexpected identifier", self._prev) 3201 3202 if tokens[0].token_type in self.TYPE_TOKENS: 3203 self._prev = tokens[0] 3204 elif self.SUPPORTS_USER_DEFINED_TYPES: 3205 return identifier 3206 else: 3207 return None 3208 else: 3209 return None 3210 3211 type_token = self._prev.token_type 3212 3213 if type_token == TokenType.PSEUDO_TYPE: 3214 return self.expression(exp.PseudoType, this=self._prev.text) 3215 3216 nested = type_token in self.NESTED_TYPE_TOKENS 3217 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3218 expressions = None 3219 maybe_func = False 3220 3221 if self._match(TokenType.L_PAREN): 3222 if is_struct: 3223 expressions = self._parse_csv(self._parse_struct_types) 3224 elif nested: 3225 expressions = self._parse_csv( 3226 lambda: self._parse_types( 3227 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3228 ) 3229 ) 3230 elif type_token in self.ENUM_TYPE_TOKENS: 3231 expressions = self._parse_csv(self._parse_equality) 3232 else: 3233 expressions = self._parse_csv(self._parse_type_size) 3234 3235 if not expressions or not self._match(TokenType.R_PAREN): 3236 self._retreat(index) 3237 return None 3238 3239 maybe_func = True 3240 3241 this: t.Optional[exp.Expression] = None 3242 values: t.Optional[t.List[exp.Expression]] = None 3243 3244 if nested and self._match(TokenType.LT): 3245 if is_struct: 3246 expressions = self._parse_csv(self._parse_struct_types) 3247 else: 3248 expressions = self._parse_csv( 3249 lambda: self._parse_types( 3250 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3251 ) 3252 ) 3253 3254 if not self._match(TokenType.GT): 3255 self.raise_error("Expecting >") 3256 3257 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3258 values = self._parse_csv(self._parse_conjunction) 3259 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3260 3261 if type_token in self.TIMESTAMPS: 3262 if self._match_text_seq("WITH", "TIME", "ZONE"): 3263 maybe_func = False 3264 tz_type = ( 3265 exp.DataType.Type.TIMETZ 3266 if type_token in self.TIMES 3267 else exp.DataType.Type.TIMESTAMPTZ 3268 ) 3269 this = exp.DataType(this=tz_type, expressions=expressions) 3270 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3271 maybe_func = False 3272 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3273 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3274 maybe_func = False 3275 elif type_token == TokenType.INTERVAL: 3276 if self._match_text_seq("YEAR", "TO", "MONTH"): 3277 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3278 elif self._match_text_seq("DAY", "TO", "SECOND"): 3279 span = [exp.IntervalDayToSecondSpan()] 3280 else: 3281 span = None 3282 3283 unit = not span and self._parse_var() 3284 if not unit: 3285 this = self.expression( 3286 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3287 ) 3288 else: 3289 this = self.expression(exp.Interval, unit=unit) 3290 3291 if maybe_func and check_func: 3292 index2 = self._index 3293 peek = self._parse_string() 3294 3295 if not peek: 3296 self._retreat(index) 3297 return None 3298 3299 self._retreat(index2) 3300 3301 if not this: 3302 this = exp.DataType( 3303 this=exp.DataType.Type[type_token.value], 3304 expressions=expressions, 3305 nested=nested, 3306 values=values, 3307 prefix=prefix, 3308 ) 3309 3310 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3311 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3312 3313 return this 3314 3315 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3316 this = self._parse_type() or self._parse_id_var() 3317 self._match(TokenType.COLON) 3318 return self._parse_column_def(this) 3319 3320 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3321 if not self._match_text_seq("AT", "TIME", "ZONE"): 3322 return this 3323 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3324 3325 def _parse_column(self) -> t.Optional[exp.Expression]: 3326 this = self._parse_field() 3327 if isinstance(this, exp.Identifier): 3328 this = self.expression(exp.Column, this=this) 3329 elif not this: 3330 return self._parse_bracket(this) 3331 return self._parse_column_ops(this) 3332 3333 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3334 this = self._parse_bracket(this) 3335 3336 while self._match_set(self.COLUMN_OPERATORS): 3337 op_token = self._prev.token_type 3338 op = self.COLUMN_OPERATORS.get(op_token) 3339 3340 if op_token == TokenType.DCOLON: 3341 field = self._parse_types() 3342 if not field: 3343 self.raise_error("Expected type") 3344 elif op and self._curr: 3345 self._advance() 3346 value = self._prev.text 3347 field = ( 3348 exp.Literal.number(value) 3349 if self._prev.token_type == TokenType.NUMBER 3350 else exp.Literal.string(value) 3351 ) 3352 else: 3353 field = self._parse_field(anonymous_func=True, any_token=True) 3354 3355 if isinstance(field, exp.Func): 3356 # bigquery allows function calls like x.y.count(...) 3357 # SAFE.SUBSTR(...) 3358 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3359 this = self._replace_columns_with_dots(this) 3360 3361 if op: 3362 this = op(self, this, field) 3363 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3364 this = self.expression( 3365 exp.Column, 3366 this=field, 3367 table=this.this, 3368 db=this.args.get("table"), 3369 catalog=this.args.get("db"), 3370 ) 3371 else: 3372 this = self.expression(exp.Dot, this=this, expression=field) 3373 this = self._parse_bracket(this) 3374 return this 3375 3376 def _parse_primary(self) -> t.Optional[exp.Expression]: 3377 if self._match_set(self.PRIMARY_PARSERS): 3378 token_type = self._prev.token_type 3379 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3380 3381 if token_type == TokenType.STRING: 3382 expressions = [primary] 3383 while self._match(TokenType.STRING): 3384 expressions.append(exp.Literal.string(self._prev.text)) 3385 3386 if len(expressions) > 1: 3387 return self.expression(exp.Concat, expressions=expressions) 3388 3389 return primary 3390 3391 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3392 return exp.Literal.number(f"0.{self._prev.text}") 3393 3394 if self._match(TokenType.L_PAREN): 3395 comments = self._prev_comments 3396 query = self._parse_select() 3397 3398 if query: 3399 expressions = [query] 3400 else: 3401 expressions = self._parse_expressions() 3402 3403 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3404 3405 if isinstance(this, exp.Subqueryable): 3406 this = self._parse_set_operations( 3407 self._parse_subquery(this=this, parse_alias=False) 3408 ) 3409 elif len(expressions) > 1: 3410 this = self.expression(exp.Tuple, expressions=expressions) 3411 else: 3412 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3413 3414 if this: 3415 this.add_comments(comments) 3416 3417 self._match_r_paren(expression=this) 3418 return this 3419 3420 return None 3421 3422 def _parse_field( 3423 self, 3424 any_token: bool = False, 3425 tokens: t.Optional[t.Collection[TokenType]] = None, 3426 anonymous_func: bool = False, 3427 ) -> t.Optional[exp.Expression]: 3428 return ( 3429 self._parse_primary() 3430 or self._parse_function(anonymous=anonymous_func) 3431 or self._parse_id_var(any_token=any_token, tokens=tokens) 3432 ) 3433 3434 def _parse_function( 3435 self, 3436 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3437 anonymous: bool = False, 3438 optional_parens: bool = True, 3439 ) -> t.Optional[exp.Expression]: 3440 if not self._curr: 3441 return None 3442 3443 token_type = self._curr.token_type 3444 this = self._curr.text 3445 upper = this.upper() 3446 3447 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3448 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3449 self._advance() 3450 return parser(self) 3451 3452 if not self._next or self._next.token_type != TokenType.L_PAREN: 3453 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3454 self._advance() 3455 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3456 3457 return None 3458 3459 if token_type not in self.FUNC_TOKENS: 3460 return None 3461 3462 self._advance(2) 3463 3464 parser = self.FUNCTION_PARSERS.get(upper) 3465 if parser and not anonymous: 3466 this = parser(self) 3467 else: 3468 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3469 3470 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3471 this = self.expression(subquery_predicate, this=self._parse_select()) 3472 self._match_r_paren() 3473 return this 3474 3475 if functions is None: 3476 functions = self.FUNCTIONS 3477 3478 function = functions.get(upper) 3479 3480 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3481 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3482 3483 if function and not anonymous: 3484 func = self.validate_expression(function(args), args) 3485 if not self.NORMALIZE_FUNCTIONS: 3486 func.meta["name"] = this 3487 this = func 3488 else: 3489 this = self.expression(exp.Anonymous, this=this, expressions=args) 3490 3491 self._match_r_paren(this) 3492 return self._parse_window(this) 3493 3494 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3495 return self._parse_column_def(self._parse_id_var()) 3496 3497 def _parse_user_defined_function( 3498 self, kind: t.Optional[TokenType] = None 3499 ) -> t.Optional[exp.Expression]: 3500 this = self._parse_id_var() 3501 3502 while self._match(TokenType.DOT): 3503 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3504 3505 if not self._match(TokenType.L_PAREN): 3506 return this 3507 3508 expressions = self._parse_csv(self._parse_function_parameter) 3509 self._match_r_paren() 3510 return self.expression( 3511 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3512 ) 3513 3514 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3515 literal = self._parse_primary() 3516 if literal: 3517 return self.expression(exp.Introducer, this=token.text, expression=literal) 3518 3519 return self.expression(exp.Identifier, this=token.text) 3520 3521 def _parse_session_parameter(self) -> exp.SessionParameter: 3522 kind = None 3523 this = self._parse_id_var() or self._parse_primary() 3524 3525 if this and self._match(TokenType.DOT): 3526 kind = this.name 3527 this = self._parse_var() or self._parse_primary() 3528 3529 return self.expression(exp.SessionParameter, this=this, kind=kind) 3530 3531 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3532 index = self._index 3533 3534 if self._match(TokenType.L_PAREN): 3535 expressions = t.cast( 3536 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3537 ) 3538 3539 if not self._match(TokenType.R_PAREN): 3540 self._retreat(index) 3541 else: 3542 expressions = [self._parse_id_var()] 3543 3544 if self._match_set(self.LAMBDAS): 3545 return self.LAMBDAS[self._prev.token_type](self, expressions) 3546 3547 self._retreat(index) 3548 3549 this: t.Optional[exp.Expression] 3550 3551 if self._match(TokenType.DISTINCT): 3552 this = self.expression( 3553 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3554 ) 3555 else: 3556 this = self._parse_select_or_expression(alias=alias) 3557 3558 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3559 3560 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3561 index = self._index 3562 3563 if not self.errors: 3564 try: 3565 if self._parse_select(nested=True): 3566 return this 3567 except ParseError: 3568 pass 3569 finally: 3570 self.errors.clear() 3571 self._retreat(index) 3572 3573 if not self._match(TokenType.L_PAREN): 3574 return this 3575 3576 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3577 3578 self._match_r_paren() 3579 return self.expression(exp.Schema, this=this, expressions=args) 3580 3581 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3582 return self._parse_column_def(self._parse_field(any_token=True)) 3583 3584 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3585 # column defs are not really columns, they're identifiers 3586 if isinstance(this, exp.Column): 3587 this = this.this 3588 3589 kind = self._parse_types(schema=True) 3590 3591 if self._match_text_seq("FOR", "ORDINALITY"): 3592 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3593 3594 constraints: t.List[exp.Expression] = [] 3595 3596 if not kind and self._match(TokenType.ALIAS): 3597 constraints.append( 3598 self.expression( 3599 exp.ComputedColumnConstraint, 3600 this=self._parse_conjunction(), 3601 persisted=self._match_text_seq("PERSISTED"), 3602 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3603 ) 3604 ) 3605 3606 while True: 3607 constraint = self._parse_column_constraint() 3608 if not constraint: 3609 break 3610 constraints.append(constraint) 3611 3612 if not kind and not constraints: 3613 return this 3614 3615 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3616 3617 def _parse_auto_increment( 3618 self, 3619 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3620 start = None 3621 increment = None 3622 3623 if self._match(TokenType.L_PAREN, advance=False): 3624 args = self._parse_wrapped_csv(self._parse_bitwise) 3625 start = seq_get(args, 0) 3626 increment = seq_get(args, 1) 3627 elif self._match_text_seq("START"): 3628 start = self._parse_bitwise() 3629 self._match_text_seq("INCREMENT") 3630 increment = self._parse_bitwise() 3631 3632 if start and increment: 3633 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3634 3635 return exp.AutoIncrementColumnConstraint() 3636 3637 def _parse_compress(self) -> exp.CompressColumnConstraint: 3638 if self._match(TokenType.L_PAREN, advance=False): 3639 return self.expression( 3640 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3641 ) 3642 3643 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3644 3645 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3646 if self._match_text_seq("BY", "DEFAULT"): 3647 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3648 this = self.expression( 3649 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3650 ) 3651 else: 3652 self._match_text_seq("ALWAYS") 3653 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3654 3655 self._match(TokenType.ALIAS) 3656 identity = self._match_text_seq("IDENTITY") 3657 3658 if self._match(TokenType.L_PAREN): 3659 if self._match(TokenType.START_WITH): 3660 this.set("start", self._parse_bitwise()) 3661 if self._match_text_seq("INCREMENT", "BY"): 3662 this.set("increment", self._parse_bitwise()) 3663 if self._match_text_seq("MINVALUE"): 3664 this.set("minvalue", self._parse_bitwise()) 3665 if self._match_text_seq("MAXVALUE"): 3666 this.set("maxvalue", self._parse_bitwise()) 3667 3668 if self._match_text_seq("CYCLE"): 3669 this.set("cycle", True) 3670 elif self._match_text_seq("NO", "CYCLE"): 3671 this.set("cycle", False) 3672 3673 if not identity: 3674 this.set("expression", self._parse_bitwise()) 3675 3676 self._match_r_paren() 3677 3678 return this 3679 3680 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3681 self._match_text_seq("LENGTH") 3682 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3683 3684 def _parse_not_constraint( 3685 self, 3686 ) -> t.Optional[exp.Expression]: 3687 if self._match_text_seq("NULL"): 3688 return self.expression(exp.NotNullColumnConstraint) 3689 if self._match_text_seq("CASESPECIFIC"): 3690 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3691 if self._match_text_seq("FOR", "REPLICATION"): 3692 return self.expression(exp.NotForReplicationColumnConstraint) 3693 return None 3694 3695 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3696 if self._match(TokenType.CONSTRAINT): 3697 this = self._parse_id_var() 3698 else: 3699 this = None 3700 3701 if self._match_texts(self.CONSTRAINT_PARSERS): 3702 return self.expression( 3703 exp.ColumnConstraint, 3704 this=this, 3705 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3706 ) 3707 3708 return this 3709 3710 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3711 if not self._match(TokenType.CONSTRAINT): 3712 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3713 3714 this = self._parse_id_var() 3715 expressions = [] 3716 3717 while True: 3718 constraint = self._parse_unnamed_constraint() or self._parse_function() 3719 if not constraint: 3720 break 3721 expressions.append(constraint) 3722 3723 return self.expression(exp.Constraint, this=this, expressions=expressions) 3724 3725 def _parse_unnamed_constraint( 3726 self, constraints: t.Optional[t.Collection[str]] = None 3727 ) -> t.Optional[exp.Expression]: 3728 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3729 return None 3730 3731 constraint = self._prev.text.upper() 3732 if constraint not in self.CONSTRAINT_PARSERS: 3733 self.raise_error(f"No parser found for schema constraint {constraint}.") 3734 3735 return self.CONSTRAINT_PARSERS[constraint](self) 3736 3737 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3738 self._match_text_seq("KEY") 3739 return self.expression( 3740 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3741 ) 3742 3743 def _parse_key_constraint_options(self) -> t.List[str]: 3744 options = [] 3745 while True: 3746 if not self._curr: 3747 break 3748 3749 if self._match(TokenType.ON): 3750 action = None 3751 on = self._advance_any() and self._prev.text 3752 3753 if self._match_text_seq("NO", "ACTION"): 3754 action = "NO ACTION" 3755 elif self._match_text_seq("CASCADE"): 3756 action = "CASCADE" 3757 elif self._match_pair(TokenType.SET, TokenType.NULL): 3758 action = "SET NULL" 3759 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3760 action = "SET DEFAULT" 3761 else: 3762 self.raise_error("Invalid key constraint") 3763 3764 options.append(f"ON {on} {action}") 3765 elif self._match_text_seq("NOT", "ENFORCED"): 3766 options.append("NOT ENFORCED") 3767 elif self._match_text_seq("DEFERRABLE"): 3768 options.append("DEFERRABLE") 3769 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3770 options.append("INITIALLY DEFERRED") 3771 elif self._match_text_seq("NORELY"): 3772 options.append("NORELY") 3773 elif self._match_text_seq("MATCH", "FULL"): 3774 options.append("MATCH FULL") 3775 else: 3776 break 3777 3778 return options 3779 3780 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3781 if match and not self._match(TokenType.REFERENCES): 3782 return None 3783 3784 expressions = None 3785 this = self._parse_table(schema=True) 3786 options = self._parse_key_constraint_options() 3787 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3788 3789 def _parse_foreign_key(self) -> exp.ForeignKey: 3790 expressions = self._parse_wrapped_id_vars() 3791 reference = self._parse_references() 3792 options = {} 3793 3794 while self._match(TokenType.ON): 3795 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3796 self.raise_error("Expected DELETE or UPDATE") 3797 3798 kind = self._prev.text.lower() 3799 3800 if self._match_text_seq("NO", "ACTION"): 3801 action = "NO ACTION" 3802 elif self._match(TokenType.SET): 3803 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3804 action = "SET " + self._prev.text.upper() 3805 else: 3806 self._advance() 3807 action = self._prev.text.upper() 3808 3809 options[kind] = action 3810 3811 return self.expression( 3812 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3813 ) 3814 3815 def _parse_primary_key( 3816 self, wrapped_optional: bool = False, in_props: bool = False 3817 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3818 desc = ( 3819 self._match_set((TokenType.ASC, TokenType.DESC)) 3820 and self._prev.token_type == TokenType.DESC 3821 ) 3822 3823 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3824 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3825 3826 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3827 options = self._parse_key_constraint_options() 3828 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3829 3830 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3831 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3832 return this 3833 3834 bracket_kind = self._prev.token_type 3835 3836 if self._match(TokenType.COLON): 3837 expressions: t.List[exp.Expression] = [ 3838 self.expression(exp.Slice, expression=self._parse_conjunction()) 3839 ] 3840 else: 3841 expressions = self._parse_csv( 3842 lambda: self._parse_slice( 3843 self._parse_alias(self._parse_conjunction(), explicit=True) 3844 ) 3845 ) 3846 3847 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3848 if bracket_kind == TokenType.L_BRACE: 3849 this = self.expression(exp.Struct, expressions=expressions) 3850 elif not this or this.name.upper() == "ARRAY": 3851 this = self.expression(exp.Array, expressions=expressions) 3852 else: 3853 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3854 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3855 3856 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3857 self.raise_error("Expected ]") 3858 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3859 self.raise_error("Expected }") 3860 3861 self._add_comments(this) 3862 return self._parse_bracket(this) 3863 3864 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3865 if self._match(TokenType.COLON): 3866 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3867 return this 3868 3869 def _parse_case(self) -> t.Optional[exp.Expression]: 3870 ifs = [] 3871 default = None 3872 3873 comments = self._prev_comments 3874 expression = self._parse_conjunction() 3875 3876 while self._match(TokenType.WHEN): 3877 this = self._parse_conjunction() 3878 self._match(TokenType.THEN) 3879 then = self._parse_conjunction() 3880 ifs.append(self.expression(exp.If, this=this, true=then)) 3881 3882 if self._match(TokenType.ELSE): 3883 default = self._parse_conjunction() 3884 3885 if not self._match(TokenType.END): 3886 self.raise_error("Expected END after CASE", self._prev) 3887 3888 return self._parse_window( 3889 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3890 ) 3891 3892 def _parse_if(self) -> t.Optional[exp.Expression]: 3893 if self._match(TokenType.L_PAREN): 3894 args = self._parse_csv(self._parse_conjunction) 3895 this = self.validate_expression(exp.If.from_arg_list(args), args) 3896 self._match_r_paren() 3897 else: 3898 index = self._index - 1 3899 condition = self._parse_conjunction() 3900 3901 if not condition: 3902 self._retreat(index) 3903 return None 3904 3905 self._match(TokenType.THEN) 3906 true = self._parse_conjunction() 3907 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3908 self._match(TokenType.END) 3909 this = self.expression(exp.If, this=condition, true=true, false=false) 3910 3911 return self._parse_window(this) 3912 3913 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3914 if not self._match_text_seq("VALUE", "FOR"): 3915 self._retreat(self._index - 1) 3916 return None 3917 3918 return self.expression( 3919 exp.NextValueFor, 3920 this=self._parse_column(), 3921 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3922 ) 3923 3924 def _parse_extract(self) -> exp.Extract: 3925 this = self._parse_function() or self._parse_var() or self._parse_type() 3926 3927 if self._match(TokenType.FROM): 3928 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3929 3930 if not self._match(TokenType.COMMA): 3931 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3932 3933 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3934 3935 def _parse_any_value(self) -> exp.AnyValue: 3936 this = self._parse_lambda() 3937 is_max = None 3938 having = None 3939 3940 if self._match(TokenType.HAVING): 3941 self._match_texts(("MAX", "MIN")) 3942 is_max = self._prev.text == "MAX" 3943 having = self._parse_column() 3944 3945 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3946 3947 def _parse_cast(self, strict: bool) -> exp.Expression: 3948 this = self._parse_conjunction() 3949 3950 if not self._match(TokenType.ALIAS): 3951 if self._match(TokenType.COMMA): 3952 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3953 3954 self.raise_error("Expected AS after CAST") 3955 3956 fmt = None 3957 to = self._parse_types() 3958 3959 if not to: 3960 self.raise_error("Expected TYPE after CAST") 3961 elif isinstance(to, exp.Identifier): 3962 to = exp.DataType.build(to.name, udt=True) 3963 elif to.this == exp.DataType.Type.CHAR: 3964 if self._match(TokenType.CHARACTER_SET): 3965 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3966 elif self._match(TokenType.FORMAT): 3967 fmt_string = self._parse_string() 3968 fmt = self._parse_at_time_zone(fmt_string) 3969 3970 if to.this in exp.DataType.TEMPORAL_TYPES: 3971 this = self.expression( 3972 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3973 this=this, 3974 format=exp.Literal.string( 3975 format_time( 3976 fmt_string.this if fmt_string else "", 3977 self.FORMAT_MAPPING or self.TIME_MAPPING, 3978 self.FORMAT_TRIE or self.TIME_TRIE, 3979 ) 3980 ), 3981 ) 3982 3983 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3984 this.set("zone", fmt.args["zone"]) 3985 3986 return this 3987 3988 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3989 3990 def _parse_concat(self) -> t.Optional[exp.Expression]: 3991 args = self._parse_csv(self._parse_conjunction) 3992 if self.CONCAT_NULL_OUTPUTS_STRING: 3993 args = [ 3994 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3995 for arg in args 3996 if arg 3997 ] 3998 3999 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4000 # we find such a call we replace it with its argument. 4001 if len(args) == 1: 4002 return args[0] 4003 4004 return self.expression( 4005 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4006 ) 4007 4008 def _parse_string_agg(self) -> exp.Expression: 4009 if self._match(TokenType.DISTINCT): 4010 args: t.List[t.Optional[exp.Expression]] = [ 4011 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4012 ] 4013 if self._match(TokenType.COMMA): 4014 args.extend(self._parse_csv(self._parse_conjunction)) 4015 else: 4016 args = self._parse_csv(self._parse_conjunction) # type: ignore 4017 4018 index = self._index 4019 if not self._match(TokenType.R_PAREN) and args: 4020 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4021 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4022 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4023 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4024 4025 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4026 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4027 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4028 if not self._match_text_seq("WITHIN", "GROUP"): 4029 self._retreat(index) 4030 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4031 4032 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4033 order = self._parse_order(this=seq_get(args, 0)) 4034 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4035 4036 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4037 this = self._parse_bitwise() 4038 4039 if self._match(TokenType.USING): 4040 to: t.Optional[exp.Expression] = self.expression( 4041 exp.CharacterSet, this=self._parse_var() 4042 ) 4043 elif self._match(TokenType.COMMA): 4044 to = self._parse_types() 4045 else: 4046 to = None 4047 4048 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4049 4050 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4051 """ 4052 There are generally two variants of the DECODE function: 4053 4054 - DECODE(bin, charset) 4055 - DECODE(expression, search, result [, search, result] ... [, default]) 4056 4057 The second variant will always be parsed into a CASE expression. Note that NULL 4058 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4059 instead of relying on pattern matching. 4060 """ 4061 args = self._parse_csv(self._parse_conjunction) 4062 4063 if len(args) < 3: 4064 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4065 4066 expression, *expressions = args 4067 if not expression: 4068 return None 4069 4070 ifs = [] 4071 for search, result in zip(expressions[::2], expressions[1::2]): 4072 if not search or not result: 4073 return None 4074 4075 if isinstance(search, exp.Literal): 4076 ifs.append( 4077 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4078 ) 4079 elif isinstance(search, exp.Null): 4080 ifs.append( 4081 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4082 ) 4083 else: 4084 cond = exp.or_( 4085 exp.EQ(this=expression.copy(), expression=search), 4086 exp.and_( 4087 exp.Is(this=expression.copy(), expression=exp.Null()), 4088 exp.Is(this=search.copy(), expression=exp.Null()), 4089 copy=False, 4090 ), 4091 copy=False, 4092 ) 4093 ifs.append(exp.If(this=cond, true=result)) 4094 4095 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4096 4097 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4098 self._match_text_seq("KEY") 4099 key = self._parse_field() 4100 self._match(TokenType.COLON) 4101 self._match_text_seq("VALUE") 4102 value = self._parse_field() 4103 4104 if not key and not value: 4105 return None 4106 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4107 4108 def _parse_json_object(self) -> exp.JSONObject: 4109 star = self._parse_star() 4110 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4111 4112 null_handling = None 4113 if self._match_text_seq("NULL", "ON", "NULL"): 4114 null_handling = "NULL ON NULL" 4115 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4116 null_handling = "ABSENT ON NULL" 4117 4118 unique_keys = None 4119 if self._match_text_seq("WITH", "UNIQUE"): 4120 unique_keys = True 4121 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4122 unique_keys = False 4123 4124 self._match_text_seq("KEYS") 4125 4126 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4127 format_json = self._match_text_seq("FORMAT", "JSON") 4128 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4129 4130 return self.expression( 4131 exp.JSONObject, 4132 expressions=expressions, 4133 null_handling=null_handling, 4134 unique_keys=unique_keys, 4135 return_type=return_type, 4136 format_json=format_json, 4137 encoding=encoding, 4138 ) 4139 4140 def _parse_logarithm(self) -> exp.Func: 4141 # Default argument order is base, expression 4142 args = self._parse_csv(self._parse_range) 4143 4144 if len(args) > 1: 4145 if not self.LOG_BASE_FIRST: 4146 args.reverse() 4147 return exp.Log.from_arg_list(args) 4148 4149 return self.expression( 4150 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4151 ) 4152 4153 def _parse_match_against(self) -> exp.MatchAgainst: 4154 expressions = self._parse_csv(self._parse_column) 4155 4156 self._match_text_seq(")", "AGAINST", "(") 4157 4158 this = self._parse_string() 4159 4160 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4161 modifier = "IN NATURAL LANGUAGE MODE" 4162 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4163 modifier = f"{modifier} WITH QUERY EXPANSION" 4164 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4165 modifier = "IN BOOLEAN MODE" 4166 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4167 modifier = "WITH QUERY EXPANSION" 4168 else: 4169 modifier = None 4170 4171 return self.expression( 4172 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4173 ) 4174 4175 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4176 def _parse_open_json(self) -> exp.OpenJSON: 4177 this = self._parse_bitwise() 4178 path = self._match(TokenType.COMMA) and self._parse_string() 4179 4180 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4181 this = self._parse_field(any_token=True) 4182 kind = self._parse_types() 4183 path = self._parse_string() 4184 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4185 4186 return self.expression( 4187 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4188 ) 4189 4190 expressions = None 4191 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4192 self._match_l_paren() 4193 expressions = self._parse_csv(_parse_open_json_column_def) 4194 4195 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4196 4197 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4198 args = self._parse_csv(self._parse_bitwise) 4199 4200 if self._match(TokenType.IN): 4201 return self.expression( 4202 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4203 ) 4204 4205 if haystack_first: 4206 haystack = seq_get(args, 0) 4207 needle = seq_get(args, 1) 4208 else: 4209 needle = seq_get(args, 0) 4210 haystack = seq_get(args, 1) 4211 4212 return self.expression( 4213 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4214 ) 4215 4216 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4217 args = self._parse_csv(self._parse_table) 4218 return exp.JoinHint(this=func_name.upper(), expressions=args) 4219 4220 def _parse_substring(self) -> exp.Substring: 4221 # Postgres supports the form: substring(string [from int] [for int]) 4222 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4223 4224 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4225 4226 if self._match(TokenType.FROM): 4227 args.append(self._parse_bitwise()) 4228 if self._match(TokenType.FOR): 4229 args.append(self._parse_bitwise()) 4230 4231 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4232 4233 def _parse_trim(self) -> exp.Trim: 4234 # https://www.w3resource.com/sql/character-functions/trim.php 4235 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4236 4237 position = None 4238 collation = None 4239 4240 if self._match_texts(self.TRIM_TYPES): 4241 position = self._prev.text.upper() 4242 4243 expression = self._parse_bitwise() 4244 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4245 this = self._parse_bitwise() 4246 else: 4247 this = expression 4248 expression = None 4249 4250 if self._match(TokenType.COLLATE): 4251 collation = self._parse_bitwise() 4252 4253 return self.expression( 4254 exp.Trim, this=this, position=position, expression=expression, collation=collation 4255 ) 4256 4257 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4258 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4259 4260 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4261 return self._parse_window(self._parse_id_var(), alias=True) 4262 4263 def _parse_respect_or_ignore_nulls( 4264 self, this: t.Optional[exp.Expression] 4265 ) -> t.Optional[exp.Expression]: 4266 if self._match_text_seq("IGNORE", "NULLS"): 4267 return self.expression(exp.IgnoreNulls, this=this) 4268 if self._match_text_seq("RESPECT", "NULLS"): 4269 return self.expression(exp.RespectNulls, this=this) 4270 return this 4271 4272 def _parse_window( 4273 self, this: t.Optional[exp.Expression], alias: bool = False 4274 ) -> t.Optional[exp.Expression]: 4275 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4276 self._match(TokenType.WHERE) 4277 this = self.expression( 4278 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4279 ) 4280 self._match_r_paren() 4281 4282 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4283 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4284 if self._match_text_seq("WITHIN", "GROUP"): 4285 order = self._parse_wrapped(self._parse_order) 4286 this = self.expression(exp.WithinGroup, this=this, expression=order) 4287 4288 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4289 # Some dialects choose to implement and some do not. 4290 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4291 4292 # There is some code above in _parse_lambda that handles 4293 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4294 4295 # The below changes handle 4296 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4297 4298 # Oracle allows both formats 4299 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4300 # and Snowflake chose to do the same for familiarity 4301 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4302 this = self._parse_respect_or_ignore_nulls(this) 4303 4304 # bigquery select from window x AS (partition by ...) 4305 if alias: 4306 over = None 4307 self._match(TokenType.ALIAS) 4308 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4309 return this 4310 else: 4311 over = self._prev.text.upper() 4312 4313 if not self._match(TokenType.L_PAREN): 4314 return self.expression( 4315 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4316 ) 4317 4318 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4319 4320 first = self._match(TokenType.FIRST) 4321 if self._match_text_seq("LAST"): 4322 first = False 4323 4324 partition, order = self._parse_partition_and_order() 4325 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4326 4327 if kind: 4328 self._match(TokenType.BETWEEN) 4329 start = self._parse_window_spec() 4330 self._match(TokenType.AND) 4331 end = self._parse_window_spec() 4332 4333 spec = self.expression( 4334 exp.WindowSpec, 4335 kind=kind, 4336 start=start["value"], 4337 start_side=start["side"], 4338 end=end["value"], 4339 end_side=end["side"], 4340 ) 4341 else: 4342 spec = None 4343 4344 self._match_r_paren() 4345 4346 window = self.expression( 4347 exp.Window, 4348 this=this, 4349 partition_by=partition, 4350 order=order, 4351 spec=spec, 4352 alias=window_alias, 4353 over=over, 4354 first=first, 4355 ) 4356 4357 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4358 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4359 return self._parse_window(window, alias=alias) 4360 4361 return window 4362 4363 def _parse_partition_and_order( 4364 self, 4365 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4366 return self._parse_partition_by(), self._parse_order() 4367 4368 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4369 self._match(TokenType.BETWEEN) 4370 4371 return { 4372 "value": ( 4373 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4374 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4375 or self._parse_bitwise() 4376 ), 4377 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4378 } 4379 4380 def _parse_alias( 4381 self, this: t.Optional[exp.Expression], explicit: bool = False 4382 ) -> t.Optional[exp.Expression]: 4383 any_token = self._match(TokenType.ALIAS) 4384 4385 if explicit and not any_token: 4386 return this 4387 4388 if self._match(TokenType.L_PAREN): 4389 aliases = self.expression( 4390 exp.Aliases, 4391 this=this, 4392 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4393 ) 4394 self._match_r_paren(aliases) 4395 return aliases 4396 4397 alias = self._parse_id_var(any_token) 4398 4399 if alias: 4400 return self.expression(exp.Alias, this=this, alias=alias) 4401 4402 return this 4403 4404 def _parse_id_var( 4405 self, 4406 any_token: bool = True, 4407 tokens: t.Optional[t.Collection[TokenType]] = None, 4408 ) -> t.Optional[exp.Expression]: 4409 identifier = self._parse_identifier() 4410 4411 if identifier: 4412 return identifier 4413 4414 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4415 quoted = self._prev.token_type == TokenType.STRING 4416 return exp.Identifier(this=self._prev.text, quoted=quoted) 4417 4418 return None 4419 4420 def _parse_string(self) -> t.Optional[exp.Expression]: 4421 if self._match(TokenType.STRING): 4422 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4423 return self._parse_placeholder() 4424 4425 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4426 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4427 4428 def _parse_number(self) -> t.Optional[exp.Expression]: 4429 if self._match(TokenType.NUMBER): 4430 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4431 return self._parse_placeholder() 4432 4433 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4434 if self._match(TokenType.IDENTIFIER): 4435 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4436 return self._parse_placeholder() 4437 4438 def _parse_var( 4439 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4440 ) -> t.Optional[exp.Expression]: 4441 if ( 4442 (any_token and self._advance_any()) 4443 or self._match(TokenType.VAR) 4444 or (self._match_set(tokens) if tokens else False) 4445 ): 4446 return self.expression(exp.Var, this=self._prev.text) 4447 return self._parse_placeholder() 4448 4449 def _advance_any(self) -> t.Optional[Token]: 4450 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4451 self._advance() 4452 return self._prev 4453 return None 4454 4455 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4456 return self._parse_var() or self._parse_string() 4457 4458 def _parse_null(self) -> t.Optional[exp.Expression]: 4459 if self._match(TokenType.NULL): 4460 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4461 return self._parse_placeholder() 4462 4463 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4464 if self._match(TokenType.TRUE): 4465 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4466 if self._match(TokenType.FALSE): 4467 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4468 return self._parse_placeholder() 4469 4470 def _parse_star(self) -> t.Optional[exp.Expression]: 4471 if self._match(TokenType.STAR): 4472 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4473 return self._parse_placeholder() 4474 4475 def _parse_parameter(self) -> exp.Parameter: 4476 wrapped = self._match(TokenType.L_BRACE) 4477 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4478 self._match(TokenType.R_BRACE) 4479 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4480 4481 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4482 if self._match_set(self.PLACEHOLDER_PARSERS): 4483 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4484 if placeholder: 4485 return placeholder 4486 self._advance(-1) 4487 return None 4488 4489 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4490 if not self._match(TokenType.EXCEPT): 4491 return None 4492 if self._match(TokenType.L_PAREN, advance=False): 4493 return self._parse_wrapped_csv(self._parse_column) 4494 return self._parse_csv(self._parse_column) 4495 4496 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4497 if not self._match(TokenType.REPLACE): 4498 return None 4499 if self._match(TokenType.L_PAREN, advance=False): 4500 return self._parse_wrapped_csv(self._parse_expression) 4501 return self._parse_expressions() 4502 4503 def _parse_csv( 4504 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4505 ) -> t.List[exp.Expression]: 4506 parse_result = parse_method() 4507 items = [parse_result] if parse_result is not None else [] 4508 4509 while self._match(sep): 4510 self._add_comments(parse_result) 4511 parse_result = parse_method() 4512 if parse_result is not None: 4513 items.append(parse_result) 4514 4515 return items 4516 4517 def _parse_tokens( 4518 self, parse_method: t.Callable, expressions: t.Dict 4519 ) -> t.Optional[exp.Expression]: 4520 this = parse_method() 4521 4522 while self._match_set(expressions): 4523 this = self.expression( 4524 expressions[self._prev.token_type], 4525 this=this, 4526 comments=self._prev_comments, 4527 expression=parse_method(), 4528 ) 4529 4530 return this 4531 4532 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4533 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4534 4535 def _parse_wrapped_csv( 4536 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4537 ) -> t.List[exp.Expression]: 4538 return self._parse_wrapped( 4539 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4540 ) 4541 4542 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4543 wrapped = self._match(TokenType.L_PAREN) 4544 if not wrapped and not optional: 4545 self.raise_error("Expecting (") 4546 parse_result = parse_method() 4547 if wrapped: 4548 self._match_r_paren() 4549 return parse_result 4550 4551 def _parse_expressions(self) -> t.List[exp.Expression]: 4552 return self._parse_csv(self._parse_expression) 4553 4554 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4555 return self._parse_select() or self._parse_set_operations( 4556 self._parse_expression() if alias else self._parse_conjunction() 4557 ) 4558 4559 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4560 return self._parse_query_modifiers( 4561 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4562 ) 4563 4564 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4565 this = None 4566 if self._match_texts(self.TRANSACTION_KIND): 4567 this = self._prev.text 4568 4569 self._match_texts({"TRANSACTION", "WORK"}) 4570 4571 modes = [] 4572 while True: 4573 mode = [] 4574 while self._match(TokenType.VAR): 4575 mode.append(self._prev.text) 4576 4577 if mode: 4578 modes.append(" ".join(mode)) 4579 if not self._match(TokenType.COMMA): 4580 break 4581 4582 return self.expression(exp.Transaction, this=this, modes=modes) 4583 4584 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4585 chain = None 4586 savepoint = None 4587 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4588 4589 self._match_texts({"TRANSACTION", "WORK"}) 4590 4591 if self._match_text_seq("TO"): 4592 self._match_text_seq("SAVEPOINT") 4593 savepoint = self._parse_id_var() 4594 4595 if self._match(TokenType.AND): 4596 chain = not self._match_text_seq("NO") 4597 self._match_text_seq("CHAIN") 4598 4599 if is_rollback: 4600 return self.expression(exp.Rollback, savepoint=savepoint) 4601 4602 return self.expression(exp.Commit, chain=chain) 4603 4604 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4605 if not self._match_text_seq("ADD"): 4606 return None 4607 4608 self._match(TokenType.COLUMN) 4609 exists_column = self._parse_exists(not_=True) 4610 expression = self._parse_field_def() 4611 4612 if expression: 4613 expression.set("exists", exists_column) 4614 4615 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4616 if self._match_texts(("FIRST", "AFTER")): 4617 position = self._prev.text 4618 column_position = self.expression( 4619 exp.ColumnPosition, this=self._parse_column(), position=position 4620 ) 4621 expression.set("position", column_position) 4622 4623 return expression 4624 4625 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4626 drop = self._match(TokenType.DROP) and self._parse_drop() 4627 if drop and not isinstance(drop, exp.Command): 4628 drop.set("kind", drop.args.get("kind", "COLUMN")) 4629 return drop 4630 4631 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4632 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4633 return self.expression( 4634 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4635 ) 4636 4637 def _parse_add_constraint(self) -> exp.AddConstraint: 4638 this = None 4639 kind = self._prev.token_type 4640 4641 if kind == TokenType.CONSTRAINT: 4642 this = self._parse_id_var() 4643 4644 if self._match_text_seq("CHECK"): 4645 expression = self._parse_wrapped(self._parse_conjunction) 4646 enforced = self._match_text_seq("ENFORCED") 4647 4648 return self.expression( 4649 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4650 ) 4651 4652 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4653 expression = self._parse_foreign_key() 4654 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4655 expression = self._parse_primary_key() 4656 else: 4657 expression = None 4658 4659 return self.expression(exp.AddConstraint, this=this, expression=expression) 4660 4661 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4662 index = self._index - 1 4663 4664 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4665 return self._parse_csv(self._parse_add_constraint) 4666 4667 self._retreat(index) 4668 return self._parse_csv(self._parse_add_column) 4669 4670 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4671 self._match(TokenType.COLUMN) 4672 column = self._parse_field(any_token=True) 4673 4674 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4675 return self.expression(exp.AlterColumn, this=column, drop=True) 4676 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4677 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4678 4679 self._match_text_seq("SET", "DATA") 4680 return self.expression( 4681 exp.AlterColumn, 4682 this=column, 4683 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4684 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4685 using=self._match(TokenType.USING) and self._parse_conjunction(), 4686 ) 4687 4688 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4689 index = self._index - 1 4690 4691 partition_exists = self._parse_exists() 4692 if self._match(TokenType.PARTITION, advance=False): 4693 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4694 4695 self._retreat(index) 4696 return self._parse_csv(self._parse_drop_column) 4697 4698 def _parse_alter_table_rename(self) -> exp.RenameTable: 4699 self._match_text_seq("TO") 4700 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4701 4702 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4703 start = self._prev 4704 4705 if not self._match(TokenType.TABLE): 4706 return self._parse_as_command(start) 4707 4708 exists = self._parse_exists() 4709 this = self._parse_table(schema=True) 4710 4711 if self._next: 4712 self._advance() 4713 4714 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4715 if parser: 4716 actions = ensure_list(parser(self)) 4717 4718 if not self._curr: 4719 return self.expression( 4720 exp.AlterTable, 4721 this=this, 4722 exists=exists, 4723 actions=actions, 4724 ) 4725 return self._parse_as_command(start) 4726 4727 def _parse_merge(self) -> exp.Merge: 4728 self._match(TokenType.INTO) 4729 target = self._parse_table() 4730 4731 if target and self._match(TokenType.ALIAS, advance=False): 4732 target.set("alias", self._parse_table_alias()) 4733 4734 self._match(TokenType.USING) 4735 using = self._parse_table() 4736 4737 self._match(TokenType.ON) 4738 on = self._parse_conjunction() 4739 4740 whens = [] 4741 while self._match(TokenType.WHEN): 4742 matched = not self._match(TokenType.NOT) 4743 self._match_text_seq("MATCHED") 4744 source = ( 4745 False 4746 if self._match_text_seq("BY", "TARGET") 4747 else self._match_text_seq("BY", "SOURCE") 4748 ) 4749 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4750 4751 self._match(TokenType.THEN) 4752 4753 if self._match(TokenType.INSERT): 4754 _this = self._parse_star() 4755 if _this: 4756 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4757 else: 4758 then = self.expression( 4759 exp.Insert, 4760 this=self._parse_value(), 4761 expression=self._match(TokenType.VALUES) and self._parse_value(), 4762 ) 4763 elif self._match(TokenType.UPDATE): 4764 expressions = self._parse_star() 4765 if expressions: 4766 then = self.expression(exp.Update, expressions=expressions) 4767 else: 4768 then = self.expression( 4769 exp.Update, 4770 expressions=self._match(TokenType.SET) 4771 and self._parse_csv(self._parse_equality), 4772 ) 4773 elif self._match(TokenType.DELETE): 4774 then = self.expression(exp.Var, this=self._prev.text) 4775 else: 4776 then = None 4777 4778 whens.append( 4779 self.expression( 4780 exp.When, 4781 matched=matched, 4782 source=source, 4783 condition=condition, 4784 then=then, 4785 ) 4786 ) 4787 4788 return self.expression( 4789 exp.Merge, 4790 this=target, 4791 using=using, 4792 on=on, 4793 expressions=whens, 4794 ) 4795 4796 def _parse_show(self) -> t.Optional[exp.Expression]: 4797 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4798 if parser: 4799 return parser(self) 4800 self._advance() 4801 return self.expression(exp.Show, this=self._prev.text.upper()) 4802 4803 def _parse_set_item_assignment( 4804 self, kind: t.Optional[str] = None 4805 ) -> t.Optional[exp.Expression]: 4806 index = self._index 4807 4808 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4809 return self._parse_set_transaction(global_=kind == "GLOBAL") 4810 4811 left = self._parse_primary() or self._parse_id_var() 4812 4813 if not self._match_texts(("=", "TO")): 4814 self._retreat(index) 4815 return None 4816 4817 right = self._parse_statement() or self._parse_id_var() 4818 this = self.expression(exp.EQ, this=left, expression=right) 4819 4820 return self.expression(exp.SetItem, this=this, kind=kind) 4821 4822 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4823 self._match_text_seq("TRANSACTION") 4824 characteristics = self._parse_csv( 4825 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4826 ) 4827 return self.expression( 4828 exp.SetItem, 4829 expressions=characteristics, 4830 kind="TRANSACTION", 4831 **{"global": global_}, # type: ignore 4832 ) 4833 4834 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4835 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4836 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4837 4838 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4839 index = self._index 4840 set_ = self.expression( 4841 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4842 ) 4843 4844 if self._curr: 4845 self._retreat(index) 4846 return self._parse_as_command(self._prev) 4847 4848 return set_ 4849 4850 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4851 for option in options: 4852 if self._match_text_seq(*option.split(" ")): 4853 return exp.var(option) 4854 return None 4855 4856 def _parse_as_command(self, start: Token) -> exp.Command: 4857 while self._curr: 4858 self._advance() 4859 text = self._find_sql(start, self._prev) 4860 size = len(start.text) 4861 return exp.Command(this=text[:size], expression=text[size:]) 4862 4863 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4864 settings = [] 4865 4866 self._match_l_paren() 4867 kind = self._parse_id_var() 4868 4869 if self._match(TokenType.L_PAREN): 4870 while True: 4871 key = self._parse_id_var() 4872 value = self._parse_primary() 4873 4874 if not key and value is None: 4875 break 4876 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4877 self._match(TokenType.R_PAREN) 4878 4879 self._match_r_paren() 4880 4881 return self.expression( 4882 exp.DictProperty, 4883 this=this, 4884 kind=kind.this if kind else None, 4885 settings=settings, 4886 ) 4887 4888 def _parse_dict_range(self, this: str) -> exp.DictRange: 4889 self._match_l_paren() 4890 has_min = self._match_text_seq("MIN") 4891 if has_min: 4892 min = self._parse_var() or self._parse_primary() 4893 self._match_text_seq("MAX") 4894 max = self._parse_var() or self._parse_primary() 4895 else: 4896 max = self._parse_var() or self._parse_primary() 4897 min = exp.Literal.number(0) 4898 self._match_r_paren() 4899 return self.expression(exp.DictRange, this=this, min=min, max=max) 4900 4901 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4902 expression = self._parse_column() 4903 self._match(TokenType.IN) 4904 iterator = self._parse_column() 4905 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4906 return self.expression( 4907 exp.Comprehension, 4908 this=this, 4909 expression=expression, 4910 iterator=iterator, 4911 condition=condition, 4912 ) 4913 4914 def _find_parser( 4915 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4916 ) -> t.Optional[t.Callable]: 4917 if not self._curr: 4918 return None 4919 4920 index = self._index 4921 this = [] 4922 while True: 4923 # The current token might be multiple words 4924 curr = self._curr.text.upper() 4925 key = curr.split(" ") 4926 this.append(curr) 4927 4928 self._advance() 4929 result, trie = in_trie(trie, key) 4930 if result == TrieResult.FAILED: 4931 break 4932 4933 if result == TrieResult.EXISTS: 4934 subparser = parsers[" ".join(this)] 4935 return subparser 4936 4937 self._retreat(index) 4938 return None 4939 4940 def _match(self, token_type, advance=True, expression=None): 4941 if not self._curr: 4942 return None 4943 4944 if self._curr.token_type == token_type: 4945 if advance: 4946 self._advance() 4947 self._add_comments(expression) 4948 return True 4949 4950 return None 4951 4952 def _match_set(self, types, advance=True): 4953 if not self._curr: 4954 return None 4955 4956 if self._curr.token_type in types: 4957 if advance: 4958 self._advance() 4959 return True 4960 4961 return None 4962 4963 def _match_pair(self, token_type_a, token_type_b, advance=True): 4964 if not self._curr or not self._next: 4965 return None 4966 4967 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4968 if advance: 4969 self._advance(2) 4970 return True 4971 4972 return None 4973 4974 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4975 if not self._match(TokenType.L_PAREN, expression=expression): 4976 self.raise_error("Expecting (") 4977 4978 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4979 if not self._match(TokenType.R_PAREN, expression=expression): 4980 self.raise_error("Expecting )") 4981 4982 def _match_texts(self, texts, advance=True): 4983 if self._curr and self._curr.text.upper() in texts: 4984 if advance: 4985 self._advance() 4986 return True 4987 return False 4988 4989 def _match_text_seq(self, *texts, advance=True): 4990 index = self._index 4991 for text in texts: 4992 if self._curr and self._curr.text.upper() == text: 4993 self._advance() 4994 else: 4995 self._retreat(index) 4996 return False 4997 4998 if not advance: 4999 self._retreat(index) 5000 5001 return True 5002 5003 @t.overload 5004 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5005 ... 5006 5007 @t.overload 5008 def _replace_columns_with_dots( 5009 self, this: t.Optional[exp.Expression] 5010 ) -> t.Optional[exp.Expression]: 5011 ... 5012 5013 def _replace_columns_with_dots(self, this): 5014 if isinstance(this, exp.Dot): 5015 exp.replace_children(this, self._replace_columns_with_dots) 5016 elif isinstance(this, exp.Column): 5017 exp.replace_children(this, self._replace_columns_with_dots) 5018 table = this.args.get("table") 5019 this = ( 5020 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5021 ) 5022 5023 return this 5024 5025 def _replace_lambda( 5026 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5027 ) -> t.Optional[exp.Expression]: 5028 if not node: 5029 return node 5030 5031 for column in node.find_all(exp.Column): 5032 if column.parts[0].name in lambda_variables: 5033 dot_or_id = column.to_dot() if column.table else column.this 5034 parent = column.parent 5035 5036 while isinstance(parent, exp.Dot): 5037 if not isinstance(parent.parent, exp.Dot): 5038 parent.replace(dot_or_id) 5039 break 5040 parent = parent.parent 5041 else: 5042 if column is node: 5043 node = dot_or_id 5044 else: 5045 column.replace(dot_or_id) 5046 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
886 def __init__( 887 self, 888 error_level: t.Optional[ErrorLevel] = None, 889 error_message_context: int = 100, 890 max_errors: int = 3, 891 ): 892 self.error_level = error_level or ErrorLevel.IMMEDIATE 893 self.error_message_context = error_message_context 894 self.max_errors = max_errors 895 self._tokenizer = self.TOKENIZER_CLASS() 896 self.reset()
908 def parse( 909 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 910 ) -> t.List[t.Optional[exp.Expression]]: 911 """ 912 Parses a list of tokens and returns a list of syntax trees, one tree 913 per parsed SQL statement. 914 915 Args: 916 raw_tokens: The list of tokens. 917 sql: The original SQL string, used to produce helpful debug messages. 918 919 Returns: 920 The list of the produced syntax trees. 921 """ 922 return self._parse( 923 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 924 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
926 def parse_into( 927 self, 928 expression_types: exp.IntoType, 929 raw_tokens: t.List[Token], 930 sql: t.Optional[str] = None, 931 ) -> t.List[t.Optional[exp.Expression]]: 932 """ 933 Parses a list of tokens into a given Expression type. If a collection of Expression 934 types is given instead, this method will try to parse the token list into each one 935 of them, stopping at the first for which the parsing succeeds. 936 937 Args: 938 expression_types: The expression type(s) to try and parse the token list into. 939 raw_tokens: The list of tokens. 940 sql: The original SQL string, used to produce helpful debug messages. 941 942 Returns: 943 The target Expression. 944 """ 945 errors = [] 946 for expression_type in ensure_list(expression_types): 947 parser = self.EXPRESSION_PARSERS.get(expression_type) 948 if not parser: 949 raise TypeError(f"No parser registered for {expression_type}") 950 951 try: 952 return self._parse(parser, raw_tokens, sql) 953 except ParseError as e: 954 e.errors[0]["into_expression"] = expression_type 955 errors.append(e) 956 957 raise ParseError( 958 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 959 errors=merge_errors(errors), 960 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
997 def check_errors(self) -> None: 998 """Logs or raises any found errors, depending on the chosen error level setting.""" 999 if self.error_level == ErrorLevel.WARN: 1000 for error in self.errors: 1001 logger.error(str(error)) 1002 elif self.error_level == ErrorLevel.RAISE and self.errors: 1003 raise ParseError( 1004 concat_messages(self.errors, self.max_errors), 1005 errors=merge_errors(self.errors), 1006 )
Logs or raises any found errors, depending on the chosen error level setting.
1008 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1009 """ 1010 Appends an error in the list of recorded errors or raises it, depending on the chosen 1011 error level setting. 1012 """ 1013 token = token or self._curr or self._prev or Token.string("") 1014 start = token.start 1015 end = token.end + 1 1016 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1017 highlight = self.sql[start:end] 1018 end_context = self.sql[end : end + self.error_message_context] 1019 1020 error = ParseError.new( 1021 f"{message}. Line {token.line}, Col: {token.col}.\n" 1022 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1023 description=message, 1024 line=token.line, 1025 col=token.col, 1026 start_context=start_context, 1027 highlight=highlight, 1028 end_context=end_context, 1029 ) 1030 1031 if self.error_level == ErrorLevel.IMMEDIATE: 1032 raise error 1033 1034 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1036 def expression( 1037 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1038 ) -> E: 1039 """ 1040 Creates a new, validated Expression. 1041 1042 Args: 1043 exp_class: The expression class to instantiate. 1044 comments: An optional list of comments to attach to the expression. 1045 kwargs: The arguments to set for the expression along with their respective values. 1046 1047 Returns: 1048 The target expression. 1049 """ 1050 instance = exp_class(**kwargs) 1051 instance.add_comments(comments) if comments else self._add_comments(instance) 1052 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1059 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1060 """ 1061 Validates an Expression, making sure that all its mandatory arguments are set. 1062 1063 Args: 1064 expression: The expression to validate. 1065 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1066 1067 Returns: 1068 The validated expression. 1069 """ 1070 if self.error_level != ErrorLevel.IGNORE: 1071 for error_message in expression.error_messages(args): 1072 self.raise_error(error_message) 1073 1074 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.