sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.YEAR, 191 TokenType.UNIQUEIDENTIFIER, 192 TokenType.USERDEFINED, 193 TokenType.MONEY, 194 TokenType.SMALLMONEY, 195 TokenType.ROWVERSION, 196 TokenType.IMAGE, 197 TokenType.VARIANT, 198 TokenType.OBJECT, 199 TokenType.INET, 200 TokenType.IPADDRESS, 201 TokenType.IPPREFIX, 202 TokenType.UNKNOWN, 203 TokenType.NULL, 204 *ENUM_TYPE_TOKENS, 205 *NESTED_TYPE_TOKENS, 206 } 207 208 SUBQUERY_PREDICATES = { 209 TokenType.ANY: exp.Any, 210 TokenType.ALL: exp.All, 211 TokenType.EXISTS: exp.Exists, 212 TokenType.SOME: exp.Any, 213 } 214 215 RESERVED_KEYWORDS = { 216 *Tokenizer.SINGLE_TOKENS.values(), 217 TokenType.SELECT, 218 } 219 220 DB_CREATABLES = { 221 TokenType.DATABASE, 222 TokenType.SCHEMA, 223 TokenType.TABLE, 224 TokenType.VIEW, 225 TokenType.DICTIONARY, 226 } 227 228 CREATABLES = { 229 TokenType.COLUMN, 230 TokenType.FUNCTION, 231 TokenType.INDEX, 232 TokenType.PROCEDURE, 233 *DB_CREATABLES, 234 } 235 236 # Tokens that can represent identifiers 237 ID_VAR_TOKENS = { 238 TokenType.VAR, 239 TokenType.ANTI, 240 TokenType.APPLY, 241 TokenType.ASC, 242 TokenType.AUTO_INCREMENT, 243 TokenType.BEGIN, 244 TokenType.CACHE, 245 TokenType.CASE, 246 TokenType.COLLATE, 247 TokenType.COMMAND, 248 TokenType.COMMENT, 249 TokenType.COMMIT, 250 TokenType.CONSTRAINT, 251 TokenType.DEFAULT, 252 TokenType.DELETE, 253 TokenType.DESC, 254 TokenType.DESCRIBE, 255 TokenType.DICTIONARY, 256 TokenType.DIV, 257 TokenType.END, 258 TokenType.EXECUTE, 259 TokenType.ESCAPE, 260 TokenType.FALSE, 261 TokenType.FIRST, 262 TokenType.FILTER, 263 TokenType.FORMAT, 264 TokenType.FULL, 265 TokenType.IS, 266 TokenType.ISNULL, 267 TokenType.INTERVAL, 268 TokenType.KEEP, 269 TokenType.LEFT, 270 TokenType.LOAD, 271 TokenType.MERGE, 272 TokenType.NATURAL, 273 TokenType.NEXT, 274 TokenType.OFFSET, 275 TokenType.ORDINALITY, 276 TokenType.OVERWRITE, 277 TokenType.PARTITION, 278 TokenType.PERCENT, 279 TokenType.PIVOT, 280 TokenType.PRAGMA, 281 TokenType.RANGE, 282 TokenType.REFERENCES, 283 TokenType.RIGHT, 284 TokenType.ROW, 285 TokenType.ROWS, 286 TokenType.SEMI, 287 TokenType.SET, 288 TokenType.SETTINGS, 289 TokenType.SHOW, 290 TokenType.TEMPORARY, 291 TokenType.TOP, 292 TokenType.TRUE, 293 TokenType.UNIQUE, 294 TokenType.UNPIVOT, 295 TokenType.UPDATE, 296 TokenType.VOLATILE, 297 TokenType.WINDOW, 298 *CREATABLES, 299 *SUBQUERY_PREDICATES, 300 *TYPE_TOKENS, 301 *NO_PAREN_FUNCTIONS, 302 } 303 304 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 305 306 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 307 TokenType.APPLY, 308 TokenType.ASOF, 309 TokenType.FULL, 310 TokenType.LEFT, 311 TokenType.LOCK, 312 TokenType.NATURAL, 313 TokenType.OFFSET, 314 TokenType.RIGHT, 315 TokenType.WINDOW, 316 } 317 318 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 319 320 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 321 322 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 323 324 FUNC_TOKENS = { 325 TokenType.COMMAND, 326 TokenType.CURRENT_DATE, 327 TokenType.CURRENT_DATETIME, 328 TokenType.CURRENT_TIMESTAMP, 329 TokenType.CURRENT_TIME, 330 TokenType.CURRENT_USER, 331 TokenType.FILTER, 332 TokenType.FIRST, 333 TokenType.FORMAT, 334 TokenType.GLOB, 335 TokenType.IDENTIFIER, 336 TokenType.INDEX, 337 TokenType.ISNULL, 338 TokenType.ILIKE, 339 TokenType.INSERT, 340 TokenType.LIKE, 341 TokenType.MERGE, 342 TokenType.OFFSET, 343 TokenType.PRIMARY_KEY, 344 TokenType.RANGE, 345 TokenType.REPLACE, 346 TokenType.RLIKE, 347 TokenType.ROW, 348 TokenType.UNNEST, 349 TokenType.VAR, 350 TokenType.LEFT, 351 TokenType.RIGHT, 352 TokenType.DATE, 353 TokenType.DATETIME, 354 TokenType.TABLE, 355 TokenType.TIMESTAMP, 356 TokenType.TIMESTAMPTZ, 357 TokenType.WINDOW, 358 TokenType.XOR, 359 *TYPE_TOKENS, 360 *SUBQUERY_PREDICATES, 361 } 362 363 CONJUNCTION = { 364 TokenType.AND: exp.And, 365 TokenType.OR: exp.Or, 366 } 367 368 EQUALITY = { 369 TokenType.EQ: exp.EQ, 370 TokenType.NEQ: exp.NEQ, 371 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 372 } 373 374 COMPARISON = { 375 TokenType.GT: exp.GT, 376 TokenType.GTE: exp.GTE, 377 TokenType.LT: exp.LT, 378 TokenType.LTE: exp.LTE, 379 } 380 381 BITWISE = { 382 TokenType.AMP: exp.BitwiseAnd, 383 TokenType.CARET: exp.BitwiseXor, 384 TokenType.PIPE: exp.BitwiseOr, 385 TokenType.DPIPE: exp.DPipe, 386 } 387 388 TERM = { 389 TokenType.DASH: exp.Sub, 390 TokenType.PLUS: exp.Add, 391 TokenType.MOD: exp.Mod, 392 TokenType.COLLATE: exp.Collate, 393 } 394 395 FACTOR = { 396 TokenType.DIV: exp.IntDiv, 397 TokenType.LR_ARROW: exp.Distance, 398 TokenType.SLASH: exp.Div, 399 TokenType.STAR: exp.Mul, 400 } 401 402 TIMES = { 403 TokenType.TIME, 404 TokenType.TIMETZ, 405 } 406 407 TIMESTAMPS = { 408 TokenType.TIMESTAMP, 409 TokenType.TIMESTAMPTZ, 410 TokenType.TIMESTAMPLTZ, 411 *TIMES, 412 } 413 414 SET_OPERATIONS = { 415 TokenType.UNION, 416 TokenType.INTERSECT, 417 TokenType.EXCEPT, 418 } 419 420 JOIN_METHODS = { 421 TokenType.NATURAL, 422 TokenType.ASOF, 423 } 424 425 JOIN_SIDES = { 426 TokenType.LEFT, 427 TokenType.RIGHT, 428 TokenType.FULL, 429 } 430 431 JOIN_KINDS = { 432 TokenType.INNER, 433 TokenType.OUTER, 434 TokenType.CROSS, 435 TokenType.SEMI, 436 TokenType.ANTI, 437 } 438 439 JOIN_HINTS: t.Set[str] = set() 440 441 LAMBDAS = { 442 TokenType.ARROW: lambda self, expressions: self.expression( 443 exp.Lambda, 444 this=self._replace_lambda( 445 self._parse_conjunction(), 446 {node.name for node in expressions}, 447 ), 448 expressions=expressions, 449 ), 450 TokenType.FARROW: lambda self, expressions: self.expression( 451 exp.Kwarg, 452 this=exp.var(expressions[0].name), 453 expression=self._parse_conjunction(), 454 ), 455 } 456 457 COLUMN_OPERATORS = { 458 TokenType.DOT: None, 459 TokenType.DCOLON: lambda self, this, to: self.expression( 460 exp.Cast if self.STRICT_CAST else exp.TryCast, 461 this=this, 462 to=to, 463 ), 464 TokenType.ARROW: lambda self, this, path: self.expression( 465 exp.JSONExtract, 466 this=this, 467 expression=path, 468 ), 469 TokenType.DARROW: lambda self, this, path: self.expression( 470 exp.JSONExtractScalar, 471 this=this, 472 expression=path, 473 ), 474 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtract, 476 this=this, 477 expression=path, 478 ), 479 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 480 exp.JSONBExtractScalar, 481 this=this, 482 expression=path, 483 ), 484 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 485 exp.JSONBContains, 486 this=this, 487 expression=key, 488 ), 489 } 490 491 EXPRESSION_PARSERS = { 492 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 493 exp.Column: lambda self: self._parse_column(), 494 exp.Condition: lambda self: self._parse_conjunction(), 495 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 496 exp.Expression: lambda self: self._parse_statement(), 497 exp.From: lambda self: self._parse_from(), 498 exp.Group: lambda self: self._parse_group(), 499 exp.Having: lambda self: self._parse_having(), 500 exp.Identifier: lambda self: self._parse_id_var(), 501 exp.Join: lambda self: self._parse_join(), 502 exp.Lambda: lambda self: self._parse_lambda(), 503 exp.Lateral: lambda self: self._parse_lateral(), 504 exp.Limit: lambda self: self._parse_limit(), 505 exp.Offset: lambda self: self._parse_offset(), 506 exp.Order: lambda self: self._parse_order(), 507 exp.Ordered: lambda self: self._parse_ordered(), 508 exp.Properties: lambda self: self._parse_properties(), 509 exp.Qualify: lambda self: self._parse_qualify(), 510 exp.Returning: lambda self: self._parse_returning(), 511 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 512 exp.Table: lambda self: self._parse_table_parts(), 513 exp.TableAlias: lambda self: self._parse_table_alias(), 514 exp.Where: lambda self: self._parse_where(), 515 exp.Window: lambda self: self._parse_named_window(), 516 exp.With: lambda self: self._parse_with(), 517 "JOIN_TYPE": lambda self: self._parse_join_parts(), 518 } 519 520 STATEMENT_PARSERS = { 521 TokenType.ALTER: lambda self: self._parse_alter(), 522 TokenType.BEGIN: lambda self: self._parse_transaction(), 523 TokenType.CACHE: lambda self: self._parse_cache(), 524 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 525 TokenType.COMMENT: lambda self: self._parse_comment(), 526 TokenType.CREATE: lambda self: self._parse_create(), 527 TokenType.DELETE: lambda self: self._parse_delete(), 528 TokenType.DESC: lambda self: self._parse_describe(), 529 TokenType.DESCRIBE: lambda self: self._parse_describe(), 530 TokenType.DROP: lambda self: self._parse_drop(), 531 TokenType.FROM: lambda self: exp.select("*").from_( 532 t.cast(exp.From, self._parse_from(skip_from_token=True)) 533 ), 534 TokenType.INSERT: lambda self: self._parse_insert(), 535 TokenType.LOAD: lambda self: self._parse_load(), 536 TokenType.MERGE: lambda self: self._parse_merge(), 537 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 538 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 539 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 540 TokenType.SET: lambda self: self._parse_set(), 541 TokenType.UNCACHE: lambda self: self._parse_uncache(), 542 TokenType.UPDATE: lambda self: self._parse_update(), 543 TokenType.USE: lambda self: self.expression( 544 exp.Use, 545 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 546 and exp.var(self._prev.text), 547 this=self._parse_table(schema=False), 548 ), 549 } 550 551 UNARY_PARSERS = { 552 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 553 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 554 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 555 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 556 } 557 558 PRIMARY_PARSERS = { 559 TokenType.STRING: lambda self, token: self.expression( 560 exp.Literal, this=token.text, is_string=True 561 ), 562 TokenType.NUMBER: lambda self, token: self.expression( 563 exp.Literal, this=token.text, is_string=False 564 ), 565 TokenType.STAR: lambda self, _: self.expression( 566 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 567 ), 568 TokenType.NULL: lambda self, _: self.expression(exp.Null), 569 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 570 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 571 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 572 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 573 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 574 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 575 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 576 exp.National, this=token.text 577 ), 578 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 579 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 580 } 581 582 PLACEHOLDER_PARSERS = { 583 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 584 TokenType.PARAMETER: lambda self: self._parse_parameter(), 585 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 586 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 587 else None, 588 } 589 590 RANGE_PARSERS = { 591 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 592 TokenType.GLOB: binary_range_parser(exp.Glob), 593 TokenType.ILIKE: binary_range_parser(exp.ILike), 594 TokenType.IN: lambda self, this: self._parse_in(this), 595 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 596 TokenType.IS: lambda self, this: self._parse_is(this), 597 TokenType.LIKE: binary_range_parser(exp.Like), 598 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 599 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 600 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 601 } 602 603 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 604 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 605 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 606 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 607 "CHARACTER SET": lambda self: self._parse_character_set(), 608 "CHECKSUM": lambda self: self._parse_checksum(), 609 "CLUSTER BY": lambda self: self._parse_cluster(), 610 "CLUSTERED": lambda self: self._parse_clustered_by(), 611 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 612 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 613 "COPY": lambda self: self._parse_copy_property(), 614 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 615 "DEFINER": lambda self: self._parse_definer(), 616 "DETERMINISTIC": lambda self: self.expression( 617 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 618 ), 619 "DISTKEY": lambda self: self._parse_distkey(), 620 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 621 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 622 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 623 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 624 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 625 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 626 "FREESPACE": lambda self: self._parse_freespace(), 627 "HEAP": lambda self: self.expression(exp.HeapProperty), 628 "IMMUTABLE": lambda self: self.expression( 629 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 630 ), 631 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 632 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 633 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 634 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 635 "LIKE": lambda self: self._parse_create_like(), 636 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 637 "LOCK": lambda self: self._parse_locking(), 638 "LOCKING": lambda self: self._parse_locking(), 639 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 640 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 641 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 642 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 643 "NO": lambda self: self._parse_no_property(), 644 "ON": lambda self: self._parse_on_property(), 645 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 646 "PARTITION BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 648 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 649 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 650 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 651 "RETURNS": lambda self: self._parse_returns(), 652 "ROW": lambda self: self._parse_row(), 653 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 654 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 655 "SETTINGS": lambda self: self.expression( 656 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 657 ), 658 "SORTKEY": lambda self: self._parse_sortkey(), 659 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 660 "STABLE": lambda self: self.expression( 661 exp.StabilityProperty, this=exp.Literal.string("STABLE") 662 ), 663 "STORED": lambda self: self._parse_stored(), 664 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 665 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 666 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 667 "TO": lambda self: self._parse_to_table(), 668 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 669 "TTL": lambda self: self._parse_ttl(), 670 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 671 "VOLATILE": lambda self: self._parse_volatile_property(), 672 "WITH": lambda self: self._parse_with_property(), 673 } 674 675 CONSTRAINT_PARSERS = { 676 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 677 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 678 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 679 "CHARACTER SET": lambda self: self.expression( 680 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "CHECK": lambda self: self.expression( 683 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 684 ), 685 "COLLATE": lambda self: self.expression( 686 exp.CollateColumnConstraint, this=self._parse_var() 687 ), 688 "COMMENT": lambda self: self.expression( 689 exp.CommentColumnConstraint, this=self._parse_string() 690 ), 691 "COMPRESS": lambda self: self._parse_compress(), 692 "CLUSTERED": lambda self: self.expression( 693 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 694 ), 695 "NONCLUSTERED": lambda self: self.expression( 696 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 697 ), 698 "DEFAULT": lambda self: self.expression( 699 exp.DefaultColumnConstraint, this=self._parse_bitwise() 700 ), 701 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 702 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 703 "FORMAT": lambda self: self.expression( 704 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 705 ), 706 "GENERATED": lambda self: self._parse_generated_as_identity(), 707 "IDENTITY": lambda self: self._parse_auto_increment(), 708 "INLINE": lambda self: self._parse_inline(), 709 "LIKE": lambda self: self._parse_create_like(), 710 "NOT": lambda self: self._parse_not_constraint(), 711 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 712 "ON": lambda self: ( 713 self._match(TokenType.UPDATE) 714 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 715 ) 716 or self.expression(exp.OnProperty, this=self._parse_id_var()), 717 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 718 "PRIMARY KEY": lambda self: self._parse_primary_key(), 719 "REFERENCES": lambda self: self._parse_references(match=False), 720 "TITLE": lambda self: self.expression( 721 exp.TitleColumnConstraint, this=self._parse_var_or_string() 722 ), 723 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 724 "UNIQUE": lambda self: self._parse_unique(), 725 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 726 "WITH": lambda self: self.expression( 727 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 728 ), 729 } 730 731 ALTER_PARSERS = { 732 "ADD": lambda self: self._parse_alter_table_add(), 733 "ALTER": lambda self: self._parse_alter_table_alter(), 734 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 735 "DROP": lambda self: self._parse_alter_table_drop(), 736 "RENAME": lambda self: self._parse_alter_table_rename(), 737 } 738 739 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 740 741 NO_PAREN_FUNCTION_PARSERS = { 742 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 743 "CASE": lambda self: self._parse_case(), 744 "IF": lambda self: self._parse_if(), 745 "NEXT": lambda self: self._parse_next_value_for(), 746 } 747 748 INVALID_FUNC_NAME_TOKENS = { 749 TokenType.IDENTIFIER, 750 TokenType.STRING, 751 } 752 753 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 754 755 FUNCTION_PARSERS = { 756 "ANY_VALUE": lambda self: self._parse_any_value(), 757 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 758 "CONCAT": lambda self: self._parse_concat(), 759 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 760 "DECODE": lambda self: self._parse_decode(), 761 "EXTRACT": lambda self: self._parse_extract(), 762 "JSON_OBJECT": lambda self: self._parse_json_object(), 763 "LOG": lambda self: self._parse_logarithm(), 764 "MATCH": lambda self: self._parse_match_against(), 765 "OPENJSON": lambda self: self._parse_open_json(), 766 "POSITION": lambda self: self._parse_position(), 767 "SAFE_CAST": lambda self: self._parse_cast(False), 768 "STRING_AGG": lambda self: self._parse_string_agg(), 769 "SUBSTRING": lambda self: self._parse_substring(), 770 "TRIM": lambda self: self._parse_trim(), 771 "TRY_CAST": lambda self: self._parse_cast(False), 772 "TRY_CONVERT": lambda self: self._parse_convert(False), 773 } 774 775 QUERY_MODIFIER_PARSERS = { 776 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 777 TokenType.WHERE: lambda self: ("where", self._parse_where()), 778 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 779 TokenType.HAVING: lambda self: ("having", self._parse_having()), 780 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 781 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 782 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 783 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 784 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 785 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 786 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 787 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 788 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 789 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 790 TokenType.CLUSTER_BY: lambda self: ( 791 "cluster", 792 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 ), 794 TokenType.DISTRIBUTE_BY: lambda self: ( 795 "distribute", 796 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 797 ), 798 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 799 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 800 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 801 } 802 803 SET_PARSERS = { 804 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 805 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 806 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 807 "TRANSACTION": lambda self: self._parse_set_transaction(), 808 } 809 810 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 811 812 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 813 814 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 815 816 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 817 818 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 819 820 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 821 TRANSACTION_CHARACTERISTICS = { 822 "ISOLATION LEVEL REPEATABLE READ", 823 "ISOLATION LEVEL READ COMMITTED", 824 "ISOLATION LEVEL READ UNCOMMITTED", 825 "ISOLATION LEVEL SERIALIZABLE", 826 "READ WRITE", 827 "READ ONLY", 828 } 829 830 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 831 832 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 833 834 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 835 836 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 837 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 838 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 839 840 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 841 842 DISTINCT_TOKENS = {TokenType.DISTINCT} 843 844 STRICT_CAST = True 845 846 # A NULL arg in CONCAT yields NULL by default 847 CONCAT_NULL_OUTPUTS_STRING = False 848 849 PREFIXED_PIVOT_COLUMNS = False 850 IDENTIFY_PIVOT_STRINGS = False 851 852 LOG_BASE_FIRST = True 853 LOG_DEFAULTS_TO_LN = False 854 855 SUPPORTS_USER_DEFINED_TYPES = True 856 857 __slots__ = ( 858 "error_level", 859 "error_message_context", 860 "max_errors", 861 "sql", 862 "errors", 863 "_tokens", 864 "_index", 865 "_curr", 866 "_next", 867 "_prev", 868 "_prev_comments", 869 "_tokenizer", 870 ) 871 872 # Autofilled 873 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 874 INDEX_OFFSET: int = 0 875 UNNEST_COLUMN_ONLY: bool = False 876 ALIAS_POST_TABLESAMPLE: bool = False 877 STRICT_STRING_CONCAT = False 878 NORMALIZE_FUNCTIONS = "upper" 879 NULL_ORDERING: str = "nulls_are_small" 880 SHOW_TRIE: t.Dict = {} 881 SET_TRIE: t.Dict = {} 882 FORMAT_MAPPING: t.Dict[str, str] = {} 883 FORMAT_TRIE: t.Dict = {} 884 TIME_MAPPING: t.Dict[str, str] = {} 885 TIME_TRIE: t.Dict = {} 886 887 def __init__( 888 self, 889 error_level: t.Optional[ErrorLevel] = None, 890 error_message_context: int = 100, 891 max_errors: int = 3, 892 ): 893 self.error_level = error_level or ErrorLevel.IMMEDIATE 894 self.error_message_context = error_message_context 895 self.max_errors = max_errors 896 self._tokenizer = self.TOKENIZER_CLASS() 897 self.reset() 898 899 def reset(self): 900 self.sql = "" 901 self.errors = [] 902 self._tokens = [] 903 self._index = 0 904 self._curr = None 905 self._next = None 906 self._prev = None 907 self._prev_comments = None 908 909 def parse( 910 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 911 ) -> t.List[t.Optional[exp.Expression]]: 912 """ 913 Parses a list of tokens and returns a list of syntax trees, one tree 914 per parsed SQL statement. 915 916 Args: 917 raw_tokens: The list of tokens. 918 sql: The original SQL string, used to produce helpful debug messages. 919 920 Returns: 921 The list of the produced syntax trees. 922 """ 923 return self._parse( 924 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 925 ) 926 927 def parse_into( 928 self, 929 expression_types: exp.IntoType, 930 raw_tokens: t.List[Token], 931 sql: t.Optional[str] = None, 932 ) -> t.List[t.Optional[exp.Expression]]: 933 """ 934 Parses a list of tokens into a given Expression type. If a collection of Expression 935 types is given instead, this method will try to parse the token list into each one 936 of them, stopping at the first for which the parsing succeeds. 937 938 Args: 939 expression_types: The expression type(s) to try and parse the token list into. 940 raw_tokens: The list of tokens. 941 sql: The original SQL string, used to produce helpful debug messages. 942 943 Returns: 944 The target Expression. 945 """ 946 errors = [] 947 for expression_type in ensure_list(expression_types): 948 parser = self.EXPRESSION_PARSERS.get(expression_type) 949 if not parser: 950 raise TypeError(f"No parser registered for {expression_type}") 951 952 try: 953 return self._parse(parser, raw_tokens, sql) 954 except ParseError as e: 955 e.errors[0]["into_expression"] = expression_type 956 errors.append(e) 957 958 raise ParseError( 959 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 960 errors=merge_errors(errors), 961 ) from errors[-1] 962 963 def _parse( 964 self, 965 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 966 raw_tokens: t.List[Token], 967 sql: t.Optional[str] = None, 968 ) -> t.List[t.Optional[exp.Expression]]: 969 self.reset() 970 self.sql = sql or "" 971 972 total = len(raw_tokens) 973 chunks: t.List[t.List[Token]] = [[]] 974 975 for i, token in enumerate(raw_tokens): 976 if token.token_type == TokenType.SEMICOLON: 977 if i < total - 1: 978 chunks.append([]) 979 else: 980 chunks[-1].append(token) 981 982 expressions = [] 983 984 for tokens in chunks: 985 self._index = -1 986 self._tokens = tokens 987 self._advance() 988 989 expressions.append(parse_method(self)) 990 991 if self._index < len(self._tokens): 992 self.raise_error("Invalid expression / Unexpected token") 993 994 self.check_errors() 995 996 return expressions 997 998 def check_errors(self) -> None: 999 """Logs or raises any found errors, depending on the chosen error level setting.""" 1000 if self.error_level == ErrorLevel.WARN: 1001 for error in self.errors: 1002 logger.error(str(error)) 1003 elif self.error_level == ErrorLevel.RAISE and self.errors: 1004 raise ParseError( 1005 concat_messages(self.errors, self.max_errors), 1006 errors=merge_errors(self.errors), 1007 ) 1008 1009 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1010 """ 1011 Appends an error in the list of recorded errors or raises it, depending on the chosen 1012 error level setting. 1013 """ 1014 token = token or self._curr or self._prev or Token.string("") 1015 start = token.start 1016 end = token.end + 1 1017 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1018 highlight = self.sql[start:end] 1019 end_context = self.sql[end : end + self.error_message_context] 1020 1021 error = ParseError.new( 1022 f"{message}. Line {token.line}, Col: {token.col}.\n" 1023 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1024 description=message, 1025 line=token.line, 1026 col=token.col, 1027 start_context=start_context, 1028 highlight=highlight, 1029 end_context=end_context, 1030 ) 1031 1032 if self.error_level == ErrorLevel.IMMEDIATE: 1033 raise error 1034 1035 self.errors.append(error) 1036 1037 def expression( 1038 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1039 ) -> E: 1040 """ 1041 Creates a new, validated Expression. 1042 1043 Args: 1044 exp_class: The expression class to instantiate. 1045 comments: An optional list of comments to attach to the expression. 1046 kwargs: The arguments to set for the expression along with their respective values. 1047 1048 Returns: 1049 The target expression. 1050 """ 1051 instance = exp_class(**kwargs) 1052 instance.add_comments(comments) if comments else self._add_comments(instance) 1053 return self.validate_expression(instance) 1054 1055 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1056 if expression and self._prev_comments: 1057 expression.add_comments(self._prev_comments) 1058 self._prev_comments = None 1059 1060 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1061 """ 1062 Validates an Expression, making sure that all its mandatory arguments are set. 1063 1064 Args: 1065 expression: The expression to validate. 1066 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1067 1068 Returns: 1069 The validated expression. 1070 """ 1071 if self.error_level != ErrorLevel.IGNORE: 1072 for error_message in expression.error_messages(args): 1073 self.raise_error(error_message) 1074 1075 return expression 1076 1077 def _find_sql(self, start: Token, end: Token) -> str: 1078 return self.sql[start.start : end.end + 1] 1079 1080 def _advance(self, times: int = 1) -> None: 1081 self._index += times 1082 self._curr = seq_get(self._tokens, self._index) 1083 self._next = seq_get(self._tokens, self._index + 1) 1084 1085 if self._index > 0: 1086 self._prev = self._tokens[self._index - 1] 1087 self._prev_comments = self._prev.comments 1088 else: 1089 self._prev = None 1090 self._prev_comments = None 1091 1092 def _retreat(self, index: int) -> None: 1093 if index != self._index: 1094 self._advance(index - self._index) 1095 1096 def _parse_command(self) -> exp.Command: 1097 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1098 1099 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1100 start = self._prev 1101 exists = self._parse_exists() if allow_exists else None 1102 1103 self._match(TokenType.ON) 1104 1105 kind = self._match_set(self.CREATABLES) and self._prev 1106 if not kind: 1107 return self._parse_as_command(start) 1108 1109 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1110 this = self._parse_user_defined_function(kind=kind.token_type) 1111 elif kind.token_type == TokenType.TABLE: 1112 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1113 elif kind.token_type == TokenType.COLUMN: 1114 this = self._parse_column() 1115 else: 1116 this = self._parse_id_var() 1117 1118 self._match(TokenType.IS) 1119 1120 return self.expression( 1121 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1122 ) 1123 1124 def _parse_to_table( 1125 self, 1126 ) -> exp.ToTableProperty: 1127 table = self._parse_table_parts(schema=True) 1128 return self.expression(exp.ToTableProperty, this=table) 1129 1130 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1131 def _parse_ttl(self) -> exp.Expression: 1132 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1133 this = self._parse_bitwise() 1134 1135 if self._match_text_seq("DELETE"): 1136 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1137 if self._match_text_seq("RECOMPRESS"): 1138 return self.expression( 1139 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1140 ) 1141 if self._match_text_seq("TO", "DISK"): 1142 return self.expression( 1143 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1144 ) 1145 if self._match_text_seq("TO", "VOLUME"): 1146 return self.expression( 1147 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1148 ) 1149 1150 return this 1151 1152 expressions = self._parse_csv(_parse_ttl_action) 1153 where = self._parse_where() 1154 group = self._parse_group() 1155 1156 aggregates = None 1157 if group and self._match(TokenType.SET): 1158 aggregates = self._parse_csv(self._parse_set_item) 1159 1160 return self.expression( 1161 exp.MergeTreeTTL, 1162 expressions=expressions, 1163 where=where, 1164 group=group, 1165 aggregates=aggregates, 1166 ) 1167 1168 def _parse_statement(self) -> t.Optional[exp.Expression]: 1169 if self._curr is None: 1170 return None 1171 1172 if self._match_set(self.STATEMENT_PARSERS): 1173 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1174 1175 if self._match_set(Tokenizer.COMMANDS): 1176 return self._parse_command() 1177 1178 expression = self._parse_expression() 1179 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1180 return self._parse_query_modifiers(expression) 1181 1182 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1183 start = self._prev 1184 temporary = self._match(TokenType.TEMPORARY) 1185 materialized = self._match_text_seq("MATERIALIZED") 1186 1187 kind = self._match_set(self.CREATABLES) and self._prev.text 1188 if not kind: 1189 return self._parse_as_command(start) 1190 1191 return self.expression( 1192 exp.Drop, 1193 comments=start.comments, 1194 exists=exists or self._parse_exists(), 1195 this=self._parse_table(schema=True), 1196 kind=kind, 1197 temporary=temporary, 1198 materialized=materialized, 1199 cascade=self._match_text_seq("CASCADE"), 1200 constraints=self._match_text_seq("CONSTRAINTS"), 1201 purge=self._match_text_seq("PURGE"), 1202 ) 1203 1204 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1205 return ( 1206 self._match_text_seq("IF") 1207 and (not not_ or self._match(TokenType.NOT)) 1208 and self._match(TokenType.EXISTS) 1209 ) 1210 1211 def _parse_create(self) -> exp.Create | exp.Command: 1212 # Note: this can't be None because we've matched a statement parser 1213 start = self._prev 1214 comments = self._prev_comments 1215 1216 replace = start.text.upper() == "REPLACE" or self._match_pair( 1217 TokenType.OR, TokenType.REPLACE 1218 ) 1219 unique = self._match(TokenType.UNIQUE) 1220 1221 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1222 self._advance() 1223 1224 properties = None 1225 create_token = self._match_set(self.CREATABLES) and self._prev 1226 1227 if not create_token: 1228 # exp.Properties.Location.POST_CREATE 1229 properties = self._parse_properties() 1230 create_token = self._match_set(self.CREATABLES) and self._prev 1231 1232 if not properties or not create_token: 1233 return self._parse_as_command(start) 1234 1235 exists = self._parse_exists(not_=True) 1236 this = None 1237 expression: t.Optional[exp.Expression] = None 1238 indexes = None 1239 no_schema_binding = None 1240 begin = None 1241 clone = None 1242 1243 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1244 nonlocal properties 1245 if properties and temp_props: 1246 properties.expressions.extend(temp_props.expressions) 1247 elif temp_props: 1248 properties = temp_props 1249 1250 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1251 this = self._parse_user_defined_function(kind=create_token.token_type) 1252 1253 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1254 extend_props(self._parse_properties()) 1255 1256 self._match(TokenType.ALIAS) 1257 1258 if self._match(TokenType.COMMAND): 1259 expression = self._parse_as_command(self._prev) 1260 else: 1261 begin = self._match(TokenType.BEGIN) 1262 return_ = self._match_text_seq("RETURN") 1263 expression = self._parse_statement() 1264 1265 if return_: 1266 expression = self.expression(exp.Return, this=expression) 1267 elif create_token.token_type == TokenType.INDEX: 1268 this = self._parse_index(index=self._parse_id_var()) 1269 elif create_token.token_type in self.DB_CREATABLES: 1270 table_parts = self._parse_table_parts(schema=True) 1271 1272 # exp.Properties.Location.POST_NAME 1273 self._match(TokenType.COMMA) 1274 extend_props(self._parse_properties(before=True)) 1275 1276 this = self._parse_schema(this=table_parts) 1277 1278 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1279 extend_props(self._parse_properties()) 1280 1281 self._match(TokenType.ALIAS) 1282 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1283 # exp.Properties.Location.POST_ALIAS 1284 extend_props(self._parse_properties()) 1285 1286 expression = self._parse_ddl_select() 1287 1288 if create_token.token_type == TokenType.TABLE: 1289 # exp.Properties.Location.POST_EXPRESSION 1290 extend_props(self._parse_properties()) 1291 1292 indexes = [] 1293 while True: 1294 index = self._parse_index() 1295 1296 # exp.Properties.Location.POST_INDEX 1297 extend_props(self._parse_properties()) 1298 1299 if not index: 1300 break 1301 else: 1302 self._match(TokenType.COMMA) 1303 indexes.append(index) 1304 elif create_token.token_type == TokenType.VIEW: 1305 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1306 no_schema_binding = True 1307 1308 if self._match_text_seq("CLONE"): 1309 clone = self._parse_table(schema=True) 1310 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1311 clone_kind = ( 1312 self._match(TokenType.L_PAREN) 1313 and self._match_texts(self.CLONE_KINDS) 1314 and self._prev.text.upper() 1315 ) 1316 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1317 self._match(TokenType.R_PAREN) 1318 clone = self.expression( 1319 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1320 ) 1321 1322 return self.expression( 1323 exp.Create, 1324 comments=comments, 1325 this=this, 1326 kind=create_token.text, 1327 replace=replace, 1328 unique=unique, 1329 expression=expression, 1330 exists=exists, 1331 properties=properties, 1332 indexes=indexes, 1333 no_schema_binding=no_schema_binding, 1334 begin=begin, 1335 clone=clone, 1336 ) 1337 1338 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1339 # only used for teradata currently 1340 self._match(TokenType.COMMA) 1341 1342 kwargs = { 1343 "no": self._match_text_seq("NO"), 1344 "dual": self._match_text_seq("DUAL"), 1345 "before": self._match_text_seq("BEFORE"), 1346 "default": self._match_text_seq("DEFAULT"), 1347 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1348 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1349 "after": self._match_text_seq("AFTER"), 1350 "minimum": self._match_texts(("MIN", "MINIMUM")), 1351 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1352 } 1353 1354 if self._match_texts(self.PROPERTY_PARSERS): 1355 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1356 try: 1357 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1358 except TypeError: 1359 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1360 1361 return None 1362 1363 def _parse_property(self) -> t.Optional[exp.Expression]: 1364 if self._match_texts(self.PROPERTY_PARSERS): 1365 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1366 1367 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1368 return self._parse_character_set(default=True) 1369 1370 if self._match_text_seq("COMPOUND", "SORTKEY"): 1371 return self._parse_sortkey(compound=True) 1372 1373 if self._match_text_seq("SQL", "SECURITY"): 1374 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1375 1376 assignment = self._match_pair( 1377 TokenType.VAR, TokenType.EQ, advance=False 1378 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1379 1380 if assignment: 1381 key = self._parse_var_or_string() 1382 self._match(TokenType.EQ) 1383 return self.expression( 1384 exp.Property, 1385 this=key, 1386 value=self._parse_column() or self._parse_var(any_token=True), 1387 ) 1388 1389 return None 1390 1391 def _parse_stored(self) -> exp.FileFormatProperty: 1392 self._match(TokenType.ALIAS) 1393 1394 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1395 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1396 1397 return self.expression( 1398 exp.FileFormatProperty, 1399 this=self.expression( 1400 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1401 ) 1402 if input_format or output_format 1403 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1404 ) 1405 1406 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1407 self._match(TokenType.EQ) 1408 self._match(TokenType.ALIAS) 1409 return self.expression(exp_class, this=self._parse_field()) 1410 1411 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1412 properties = [] 1413 while True: 1414 if before: 1415 prop = self._parse_property_before() 1416 else: 1417 prop = self._parse_property() 1418 1419 if not prop: 1420 break 1421 for p in ensure_list(prop): 1422 properties.append(p) 1423 1424 if properties: 1425 return self.expression(exp.Properties, expressions=properties) 1426 1427 return None 1428 1429 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1430 return self.expression( 1431 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1432 ) 1433 1434 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1435 if self._index >= 2: 1436 pre_volatile_token = self._tokens[self._index - 2] 1437 else: 1438 pre_volatile_token = None 1439 1440 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1441 return exp.VolatileProperty() 1442 1443 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1444 1445 def _parse_with_property( 1446 self, 1447 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1448 if self._match(TokenType.L_PAREN, advance=False): 1449 return self._parse_wrapped_csv(self._parse_property) 1450 1451 if self._match_text_seq("JOURNAL"): 1452 return self._parse_withjournaltable() 1453 1454 if self._match_text_seq("DATA"): 1455 return self._parse_withdata(no=False) 1456 elif self._match_text_seq("NO", "DATA"): 1457 return self._parse_withdata(no=True) 1458 1459 if not self._next: 1460 return None 1461 1462 return self._parse_withisolatedloading() 1463 1464 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1465 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1466 self._match(TokenType.EQ) 1467 1468 user = self._parse_id_var() 1469 self._match(TokenType.PARAMETER) 1470 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1471 1472 if not user or not host: 1473 return None 1474 1475 return exp.DefinerProperty(this=f"{user}@{host}") 1476 1477 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1478 self._match(TokenType.TABLE) 1479 self._match(TokenType.EQ) 1480 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1481 1482 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1483 return self.expression(exp.LogProperty, no=no) 1484 1485 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1486 return self.expression(exp.JournalProperty, **kwargs) 1487 1488 def _parse_checksum(self) -> exp.ChecksumProperty: 1489 self._match(TokenType.EQ) 1490 1491 on = None 1492 if self._match(TokenType.ON): 1493 on = True 1494 elif self._match_text_seq("OFF"): 1495 on = False 1496 1497 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1498 1499 def _parse_cluster(self) -> exp.Cluster: 1500 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1501 1502 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1503 self._match_text_seq("BY") 1504 1505 self._match_l_paren() 1506 expressions = self._parse_csv(self._parse_column) 1507 self._match_r_paren() 1508 1509 if self._match_text_seq("SORTED", "BY"): 1510 self._match_l_paren() 1511 sorted_by = self._parse_csv(self._parse_ordered) 1512 self._match_r_paren() 1513 else: 1514 sorted_by = None 1515 1516 self._match(TokenType.INTO) 1517 buckets = self._parse_number() 1518 self._match_text_seq("BUCKETS") 1519 1520 return self.expression( 1521 exp.ClusteredByProperty, 1522 expressions=expressions, 1523 sorted_by=sorted_by, 1524 buckets=buckets, 1525 ) 1526 1527 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1528 if not self._match_text_seq("GRANTS"): 1529 self._retreat(self._index - 1) 1530 return None 1531 1532 return self.expression(exp.CopyGrantsProperty) 1533 1534 def _parse_freespace(self) -> exp.FreespaceProperty: 1535 self._match(TokenType.EQ) 1536 return self.expression( 1537 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1538 ) 1539 1540 def _parse_mergeblockratio( 1541 self, no: bool = False, default: bool = False 1542 ) -> exp.MergeBlockRatioProperty: 1543 if self._match(TokenType.EQ): 1544 return self.expression( 1545 exp.MergeBlockRatioProperty, 1546 this=self._parse_number(), 1547 percent=self._match(TokenType.PERCENT), 1548 ) 1549 1550 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1551 1552 def _parse_datablocksize( 1553 self, 1554 default: t.Optional[bool] = None, 1555 minimum: t.Optional[bool] = None, 1556 maximum: t.Optional[bool] = None, 1557 ) -> exp.DataBlocksizeProperty: 1558 self._match(TokenType.EQ) 1559 size = self._parse_number() 1560 1561 units = None 1562 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1563 units = self._prev.text 1564 1565 return self.expression( 1566 exp.DataBlocksizeProperty, 1567 size=size, 1568 units=units, 1569 default=default, 1570 minimum=minimum, 1571 maximum=maximum, 1572 ) 1573 1574 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1575 self._match(TokenType.EQ) 1576 always = self._match_text_seq("ALWAYS") 1577 manual = self._match_text_seq("MANUAL") 1578 never = self._match_text_seq("NEVER") 1579 default = self._match_text_seq("DEFAULT") 1580 1581 autotemp = None 1582 if self._match_text_seq("AUTOTEMP"): 1583 autotemp = self._parse_schema() 1584 1585 return self.expression( 1586 exp.BlockCompressionProperty, 1587 always=always, 1588 manual=manual, 1589 never=never, 1590 default=default, 1591 autotemp=autotemp, 1592 ) 1593 1594 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1595 no = self._match_text_seq("NO") 1596 concurrent = self._match_text_seq("CONCURRENT") 1597 self._match_text_seq("ISOLATED", "LOADING") 1598 for_all = self._match_text_seq("FOR", "ALL") 1599 for_insert = self._match_text_seq("FOR", "INSERT") 1600 for_none = self._match_text_seq("FOR", "NONE") 1601 return self.expression( 1602 exp.IsolatedLoadingProperty, 1603 no=no, 1604 concurrent=concurrent, 1605 for_all=for_all, 1606 for_insert=for_insert, 1607 for_none=for_none, 1608 ) 1609 1610 def _parse_locking(self) -> exp.LockingProperty: 1611 if self._match(TokenType.TABLE): 1612 kind = "TABLE" 1613 elif self._match(TokenType.VIEW): 1614 kind = "VIEW" 1615 elif self._match(TokenType.ROW): 1616 kind = "ROW" 1617 elif self._match_text_seq("DATABASE"): 1618 kind = "DATABASE" 1619 else: 1620 kind = None 1621 1622 if kind in ("DATABASE", "TABLE", "VIEW"): 1623 this = self._parse_table_parts() 1624 else: 1625 this = None 1626 1627 if self._match(TokenType.FOR): 1628 for_or_in = "FOR" 1629 elif self._match(TokenType.IN): 1630 for_or_in = "IN" 1631 else: 1632 for_or_in = None 1633 1634 if self._match_text_seq("ACCESS"): 1635 lock_type = "ACCESS" 1636 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1637 lock_type = "EXCLUSIVE" 1638 elif self._match_text_seq("SHARE"): 1639 lock_type = "SHARE" 1640 elif self._match_text_seq("READ"): 1641 lock_type = "READ" 1642 elif self._match_text_seq("WRITE"): 1643 lock_type = "WRITE" 1644 elif self._match_text_seq("CHECKSUM"): 1645 lock_type = "CHECKSUM" 1646 else: 1647 lock_type = None 1648 1649 override = self._match_text_seq("OVERRIDE") 1650 1651 return self.expression( 1652 exp.LockingProperty, 1653 this=this, 1654 kind=kind, 1655 for_or_in=for_or_in, 1656 lock_type=lock_type, 1657 override=override, 1658 ) 1659 1660 def _parse_partition_by(self) -> t.List[exp.Expression]: 1661 if self._match(TokenType.PARTITION_BY): 1662 return self._parse_csv(self._parse_conjunction) 1663 return [] 1664 1665 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1666 self._match(TokenType.EQ) 1667 return self.expression( 1668 exp.PartitionedByProperty, 1669 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1670 ) 1671 1672 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1673 if self._match_text_seq("AND", "STATISTICS"): 1674 statistics = True 1675 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1676 statistics = False 1677 else: 1678 statistics = None 1679 1680 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1681 1682 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1683 if self._match_text_seq("PRIMARY", "INDEX"): 1684 return exp.NoPrimaryIndexProperty() 1685 return None 1686 1687 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1688 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1689 return exp.OnCommitProperty() 1690 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1691 return exp.OnCommitProperty(delete=True) 1692 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1693 1694 def _parse_distkey(self) -> exp.DistKeyProperty: 1695 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1696 1697 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1698 table = self._parse_table(schema=True) 1699 1700 options = [] 1701 while self._match_texts(("INCLUDING", "EXCLUDING")): 1702 this = self._prev.text.upper() 1703 1704 id_var = self._parse_id_var() 1705 if not id_var: 1706 return None 1707 1708 options.append( 1709 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1710 ) 1711 1712 return self.expression(exp.LikeProperty, this=table, expressions=options) 1713 1714 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1715 return self.expression( 1716 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1717 ) 1718 1719 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1720 self._match(TokenType.EQ) 1721 return self.expression( 1722 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1723 ) 1724 1725 def _parse_returns(self) -> exp.ReturnsProperty: 1726 value: t.Optional[exp.Expression] 1727 is_table = self._match(TokenType.TABLE) 1728 1729 if is_table: 1730 if self._match(TokenType.LT): 1731 value = self.expression( 1732 exp.Schema, 1733 this="TABLE", 1734 expressions=self._parse_csv(self._parse_struct_types), 1735 ) 1736 if not self._match(TokenType.GT): 1737 self.raise_error("Expecting >") 1738 else: 1739 value = self._parse_schema(exp.var("TABLE")) 1740 else: 1741 value = self._parse_types() 1742 1743 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1744 1745 def _parse_describe(self) -> exp.Describe: 1746 kind = self._match_set(self.CREATABLES) and self._prev.text 1747 this = self._parse_table() 1748 return self.expression(exp.Describe, this=this, kind=kind) 1749 1750 def _parse_insert(self) -> exp.Insert: 1751 comments = ensure_list(self._prev_comments) 1752 overwrite = self._match(TokenType.OVERWRITE) 1753 ignore = self._match(TokenType.IGNORE) 1754 local = self._match_text_seq("LOCAL") 1755 alternative = None 1756 1757 if self._match_text_seq("DIRECTORY"): 1758 this: t.Optional[exp.Expression] = self.expression( 1759 exp.Directory, 1760 this=self._parse_var_or_string(), 1761 local=local, 1762 row_format=self._parse_row_format(match_row=True), 1763 ) 1764 else: 1765 if self._match(TokenType.OR): 1766 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1767 1768 self._match(TokenType.INTO) 1769 comments += ensure_list(self._prev_comments) 1770 self._match(TokenType.TABLE) 1771 this = self._parse_table(schema=True) 1772 1773 returning = self._parse_returning() 1774 1775 return self.expression( 1776 exp.Insert, 1777 comments=comments, 1778 this=this, 1779 exists=self._parse_exists(), 1780 partition=self._parse_partition(), 1781 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1782 and self._parse_conjunction(), 1783 expression=self._parse_ddl_select(), 1784 conflict=self._parse_on_conflict(), 1785 returning=returning or self._parse_returning(), 1786 overwrite=overwrite, 1787 alternative=alternative, 1788 ignore=ignore, 1789 ) 1790 1791 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1792 conflict = self._match_text_seq("ON", "CONFLICT") 1793 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1794 1795 if not conflict and not duplicate: 1796 return None 1797 1798 nothing = None 1799 expressions = None 1800 key = None 1801 constraint = None 1802 1803 if conflict: 1804 if self._match_text_seq("ON", "CONSTRAINT"): 1805 constraint = self._parse_id_var() 1806 else: 1807 key = self._parse_csv(self._parse_value) 1808 1809 self._match_text_seq("DO") 1810 if self._match_text_seq("NOTHING"): 1811 nothing = True 1812 else: 1813 self._match(TokenType.UPDATE) 1814 self._match(TokenType.SET) 1815 expressions = self._parse_csv(self._parse_equality) 1816 1817 return self.expression( 1818 exp.OnConflict, 1819 duplicate=duplicate, 1820 expressions=expressions, 1821 nothing=nothing, 1822 key=key, 1823 constraint=constraint, 1824 ) 1825 1826 def _parse_returning(self) -> t.Optional[exp.Returning]: 1827 if not self._match(TokenType.RETURNING): 1828 return None 1829 return self.expression( 1830 exp.Returning, 1831 expressions=self._parse_csv(self._parse_expression), 1832 into=self._match(TokenType.INTO) and self._parse_table_part(), 1833 ) 1834 1835 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1836 if not self._match(TokenType.FORMAT): 1837 return None 1838 return self._parse_row_format() 1839 1840 def _parse_row_format( 1841 self, match_row: bool = False 1842 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1843 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1844 return None 1845 1846 if self._match_text_seq("SERDE"): 1847 this = self._parse_string() 1848 1849 serde_properties = None 1850 if self._match(TokenType.SERDE_PROPERTIES): 1851 serde_properties = self.expression( 1852 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1853 ) 1854 1855 return self.expression( 1856 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1857 ) 1858 1859 self._match_text_seq("DELIMITED") 1860 1861 kwargs = {} 1862 1863 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1864 kwargs["fields"] = self._parse_string() 1865 if self._match_text_seq("ESCAPED", "BY"): 1866 kwargs["escaped"] = self._parse_string() 1867 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1868 kwargs["collection_items"] = self._parse_string() 1869 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1870 kwargs["map_keys"] = self._parse_string() 1871 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1872 kwargs["lines"] = self._parse_string() 1873 if self._match_text_seq("NULL", "DEFINED", "AS"): 1874 kwargs["null"] = self._parse_string() 1875 1876 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1877 1878 def _parse_load(self) -> exp.LoadData | exp.Command: 1879 if self._match_text_seq("DATA"): 1880 local = self._match_text_seq("LOCAL") 1881 self._match_text_seq("INPATH") 1882 inpath = self._parse_string() 1883 overwrite = self._match(TokenType.OVERWRITE) 1884 self._match_pair(TokenType.INTO, TokenType.TABLE) 1885 1886 return self.expression( 1887 exp.LoadData, 1888 this=self._parse_table(schema=True), 1889 local=local, 1890 overwrite=overwrite, 1891 inpath=inpath, 1892 partition=self._parse_partition(), 1893 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1894 serde=self._match_text_seq("SERDE") and self._parse_string(), 1895 ) 1896 return self._parse_as_command(self._prev) 1897 1898 def _parse_delete(self) -> exp.Delete: 1899 # This handles MySQL's "Multiple-Table Syntax" 1900 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1901 tables = None 1902 comments = self._prev_comments 1903 if not self._match(TokenType.FROM, advance=False): 1904 tables = self._parse_csv(self._parse_table) or None 1905 1906 returning = self._parse_returning() 1907 1908 return self.expression( 1909 exp.Delete, 1910 comments=comments, 1911 tables=tables, 1912 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1913 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1914 where=self._parse_where(), 1915 returning=returning or self._parse_returning(), 1916 limit=self._parse_limit(), 1917 ) 1918 1919 def _parse_update(self) -> exp.Update: 1920 comments = self._prev_comments 1921 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1922 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1923 returning = self._parse_returning() 1924 return self.expression( 1925 exp.Update, 1926 comments=comments, 1927 **{ # type: ignore 1928 "this": this, 1929 "expressions": expressions, 1930 "from": self._parse_from(joins=True), 1931 "where": self._parse_where(), 1932 "returning": returning or self._parse_returning(), 1933 "limit": self._parse_limit(), 1934 }, 1935 ) 1936 1937 def _parse_uncache(self) -> exp.Uncache: 1938 if not self._match(TokenType.TABLE): 1939 self.raise_error("Expecting TABLE after UNCACHE") 1940 1941 return self.expression( 1942 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1943 ) 1944 1945 def _parse_cache(self) -> exp.Cache: 1946 lazy = self._match_text_seq("LAZY") 1947 self._match(TokenType.TABLE) 1948 table = self._parse_table(schema=True) 1949 1950 options = [] 1951 if self._match_text_seq("OPTIONS"): 1952 self._match_l_paren() 1953 k = self._parse_string() 1954 self._match(TokenType.EQ) 1955 v = self._parse_string() 1956 options = [k, v] 1957 self._match_r_paren() 1958 1959 self._match(TokenType.ALIAS) 1960 return self.expression( 1961 exp.Cache, 1962 this=table, 1963 lazy=lazy, 1964 options=options, 1965 expression=self._parse_select(nested=True), 1966 ) 1967 1968 def _parse_partition(self) -> t.Optional[exp.Partition]: 1969 if not self._match(TokenType.PARTITION): 1970 return None 1971 1972 return self.expression( 1973 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1974 ) 1975 1976 def _parse_value(self) -> exp.Tuple: 1977 if self._match(TokenType.L_PAREN): 1978 expressions = self._parse_csv(self._parse_conjunction) 1979 self._match_r_paren() 1980 return self.expression(exp.Tuple, expressions=expressions) 1981 1982 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1983 # https://prestodb.io/docs/current/sql/values.html 1984 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1985 1986 def _parse_projections(self) -> t.List[exp.Expression]: 1987 return self._parse_expressions() 1988 1989 def _parse_select( 1990 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1991 ) -> t.Optional[exp.Expression]: 1992 cte = self._parse_with() 1993 if cte: 1994 this = self._parse_statement() 1995 1996 if not this: 1997 self.raise_error("Failed to parse any statement following CTE") 1998 return cte 1999 2000 if "with" in this.arg_types: 2001 this.set("with", cte) 2002 else: 2003 self.raise_error(f"{this.key} does not support CTE") 2004 this = cte 2005 elif self._match(TokenType.SELECT): 2006 comments = self._prev_comments 2007 2008 hint = self._parse_hint() 2009 all_ = self._match(TokenType.ALL) 2010 distinct = self._match_set(self.DISTINCT_TOKENS) 2011 2012 kind = ( 2013 self._match(TokenType.ALIAS) 2014 and self._match_texts(("STRUCT", "VALUE")) 2015 and self._prev.text 2016 ) 2017 2018 if distinct: 2019 distinct = self.expression( 2020 exp.Distinct, 2021 on=self._parse_value() if self._match(TokenType.ON) else None, 2022 ) 2023 2024 if all_ and distinct: 2025 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2026 2027 limit = self._parse_limit(top=True) 2028 projections = self._parse_projections() 2029 2030 this = self.expression( 2031 exp.Select, 2032 kind=kind, 2033 hint=hint, 2034 distinct=distinct, 2035 expressions=projections, 2036 limit=limit, 2037 ) 2038 this.comments = comments 2039 2040 into = self._parse_into() 2041 if into: 2042 this.set("into", into) 2043 2044 from_ = self._parse_from() 2045 if from_: 2046 this.set("from", from_) 2047 2048 this = self._parse_query_modifiers(this) 2049 elif (table or nested) and self._match(TokenType.L_PAREN): 2050 if self._match(TokenType.PIVOT): 2051 this = self._parse_simplified_pivot() 2052 elif self._match(TokenType.FROM): 2053 this = exp.select("*").from_( 2054 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2055 ) 2056 else: 2057 this = self._parse_table() if table else self._parse_select(nested=True) 2058 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2059 2060 self._match_r_paren() 2061 2062 # We return early here so that the UNION isn't attached to the subquery by the 2063 # following call to _parse_set_operations, but instead becomes the parent node 2064 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2065 elif self._match(TokenType.VALUES): 2066 this = self.expression( 2067 exp.Values, 2068 expressions=self._parse_csv(self._parse_value), 2069 alias=self._parse_table_alias(), 2070 ) 2071 else: 2072 this = None 2073 2074 return self._parse_set_operations(this) 2075 2076 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2077 if not skip_with_token and not self._match(TokenType.WITH): 2078 return None 2079 2080 comments = self._prev_comments 2081 recursive = self._match(TokenType.RECURSIVE) 2082 2083 expressions = [] 2084 while True: 2085 expressions.append(self._parse_cte()) 2086 2087 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2088 break 2089 else: 2090 self._match(TokenType.WITH) 2091 2092 return self.expression( 2093 exp.With, comments=comments, expressions=expressions, recursive=recursive 2094 ) 2095 2096 def _parse_cte(self) -> exp.CTE: 2097 alias = self._parse_table_alias() 2098 if not alias or not alias.this: 2099 self.raise_error("Expected CTE to have alias") 2100 2101 self._match(TokenType.ALIAS) 2102 return self.expression( 2103 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2104 ) 2105 2106 def _parse_table_alias( 2107 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2108 ) -> t.Optional[exp.TableAlias]: 2109 any_token = self._match(TokenType.ALIAS) 2110 alias = ( 2111 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2112 or self._parse_string_as_identifier() 2113 ) 2114 2115 index = self._index 2116 if self._match(TokenType.L_PAREN): 2117 columns = self._parse_csv(self._parse_function_parameter) 2118 self._match_r_paren() if columns else self._retreat(index) 2119 else: 2120 columns = None 2121 2122 if not alias and not columns: 2123 return None 2124 2125 return self.expression(exp.TableAlias, this=alias, columns=columns) 2126 2127 def _parse_subquery( 2128 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2129 ) -> t.Optional[exp.Subquery]: 2130 if not this: 2131 return None 2132 2133 return self.expression( 2134 exp.Subquery, 2135 this=this, 2136 pivots=self._parse_pivots(), 2137 alias=self._parse_table_alias() if parse_alias else None, 2138 ) 2139 2140 def _parse_query_modifiers( 2141 self, this: t.Optional[exp.Expression] 2142 ) -> t.Optional[exp.Expression]: 2143 if isinstance(this, self.MODIFIABLES): 2144 for join in iter(self._parse_join, None): 2145 this.append("joins", join) 2146 for lateral in iter(self._parse_lateral, None): 2147 this.append("laterals", lateral) 2148 2149 while True: 2150 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2151 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2152 key, expression = parser(self) 2153 2154 if expression: 2155 this.set(key, expression) 2156 if key == "limit": 2157 offset = expression.args.pop("offset", None) 2158 if offset: 2159 this.set("offset", exp.Offset(expression=offset)) 2160 continue 2161 break 2162 return this 2163 2164 def _parse_hint(self) -> t.Optional[exp.Hint]: 2165 if self._match(TokenType.HINT): 2166 hints = [] 2167 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2168 hints.extend(hint) 2169 2170 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2171 self.raise_error("Expected */ after HINT") 2172 2173 return self.expression(exp.Hint, expressions=hints) 2174 2175 return None 2176 2177 def _parse_into(self) -> t.Optional[exp.Into]: 2178 if not self._match(TokenType.INTO): 2179 return None 2180 2181 temp = self._match(TokenType.TEMPORARY) 2182 unlogged = self._match_text_seq("UNLOGGED") 2183 self._match(TokenType.TABLE) 2184 2185 return self.expression( 2186 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2187 ) 2188 2189 def _parse_from( 2190 self, joins: bool = False, skip_from_token: bool = False 2191 ) -> t.Optional[exp.From]: 2192 if not skip_from_token and not self._match(TokenType.FROM): 2193 return None 2194 2195 return self.expression( 2196 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2197 ) 2198 2199 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2200 if not self._match(TokenType.MATCH_RECOGNIZE): 2201 return None 2202 2203 self._match_l_paren() 2204 2205 partition = self._parse_partition_by() 2206 order = self._parse_order() 2207 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2208 2209 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2210 rows = exp.var("ONE ROW PER MATCH") 2211 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2212 text = "ALL ROWS PER MATCH" 2213 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2214 text += f" SHOW EMPTY MATCHES" 2215 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2216 text += f" OMIT EMPTY MATCHES" 2217 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2218 text += f" WITH UNMATCHED ROWS" 2219 rows = exp.var(text) 2220 else: 2221 rows = None 2222 2223 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2224 text = "AFTER MATCH SKIP" 2225 if self._match_text_seq("PAST", "LAST", "ROW"): 2226 text += f" PAST LAST ROW" 2227 elif self._match_text_seq("TO", "NEXT", "ROW"): 2228 text += f" TO NEXT ROW" 2229 elif self._match_text_seq("TO", "FIRST"): 2230 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2231 elif self._match_text_seq("TO", "LAST"): 2232 text += f" TO LAST {self._advance_any().text}" # type: ignore 2233 after = exp.var(text) 2234 else: 2235 after = None 2236 2237 if self._match_text_seq("PATTERN"): 2238 self._match_l_paren() 2239 2240 if not self._curr: 2241 self.raise_error("Expecting )", self._curr) 2242 2243 paren = 1 2244 start = self._curr 2245 2246 while self._curr and paren > 0: 2247 if self._curr.token_type == TokenType.L_PAREN: 2248 paren += 1 2249 if self._curr.token_type == TokenType.R_PAREN: 2250 paren -= 1 2251 2252 end = self._prev 2253 self._advance() 2254 2255 if paren > 0: 2256 self.raise_error("Expecting )", self._curr) 2257 2258 pattern = exp.var(self._find_sql(start, end)) 2259 else: 2260 pattern = None 2261 2262 define = ( 2263 self._parse_csv( 2264 lambda: self.expression( 2265 exp.Alias, 2266 alias=self._parse_id_var(any_token=True), 2267 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2268 ) 2269 ) 2270 if self._match_text_seq("DEFINE") 2271 else None 2272 ) 2273 2274 self._match_r_paren() 2275 2276 return self.expression( 2277 exp.MatchRecognize, 2278 partition_by=partition, 2279 order=order, 2280 measures=measures, 2281 rows=rows, 2282 after=after, 2283 pattern=pattern, 2284 define=define, 2285 alias=self._parse_table_alias(), 2286 ) 2287 2288 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2289 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2290 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2291 2292 if outer_apply or cross_apply: 2293 this = self._parse_select(table=True) 2294 view = None 2295 outer = not cross_apply 2296 elif self._match(TokenType.LATERAL): 2297 this = self._parse_select(table=True) 2298 view = self._match(TokenType.VIEW) 2299 outer = self._match(TokenType.OUTER) 2300 else: 2301 return None 2302 2303 if not this: 2304 this = ( 2305 self._parse_unnest() 2306 or self._parse_function() 2307 or self._parse_id_var(any_token=False) 2308 ) 2309 2310 while self._match(TokenType.DOT): 2311 this = exp.Dot( 2312 this=this, 2313 expression=self._parse_function() or self._parse_id_var(any_token=False), 2314 ) 2315 2316 if view: 2317 table = self._parse_id_var(any_token=False) 2318 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2319 table_alias: t.Optional[exp.TableAlias] = self.expression( 2320 exp.TableAlias, this=table, columns=columns 2321 ) 2322 elif isinstance(this, exp.Subquery) and this.alias: 2323 # Ensures parity between the Subquery's and the Lateral's "alias" args 2324 table_alias = this.args["alias"].copy() 2325 else: 2326 table_alias = self._parse_table_alias() 2327 2328 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2329 2330 def _parse_join_parts( 2331 self, 2332 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2333 return ( 2334 self._match_set(self.JOIN_METHODS) and self._prev, 2335 self._match_set(self.JOIN_SIDES) and self._prev, 2336 self._match_set(self.JOIN_KINDS) and self._prev, 2337 ) 2338 2339 def _parse_join( 2340 self, skip_join_token: bool = False, parse_bracket: bool = False 2341 ) -> t.Optional[exp.Join]: 2342 if self._match(TokenType.COMMA): 2343 return self.expression(exp.Join, this=self._parse_table()) 2344 2345 index = self._index 2346 method, side, kind = self._parse_join_parts() 2347 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2348 join = self._match(TokenType.JOIN) 2349 2350 if not skip_join_token and not join: 2351 self._retreat(index) 2352 kind = None 2353 method = None 2354 side = None 2355 2356 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2357 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2358 2359 if not skip_join_token and not join and not outer_apply and not cross_apply: 2360 return None 2361 2362 if outer_apply: 2363 side = Token(TokenType.LEFT, "LEFT") 2364 2365 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2366 2367 if method: 2368 kwargs["method"] = method.text 2369 if side: 2370 kwargs["side"] = side.text 2371 if kind: 2372 kwargs["kind"] = kind.text 2373 if hint: 2374 kwargs["hint"] = hint 2375 2376 if self._match(TokenType.ON): 2377 kwargs["on"] = self._parse_conjunction() 2378 elif self._match(TokenType.USING): 2379 kwargs["using"] = self._parse_wrapped_id_vars() 2380 elif not (kind and kind.token_type == TokenType.CROSS): 2381 index = self._index 2382 joins = self._parse_joins() 2383 2384 if joins and self._match(TokenType.ON): 2385 kwargs["on"] = self._parse_conjunction() 2386 elif joins and self._match(TokenType.USING): 2387 kwargs["using"] = self._parse_wrapped_id_vars() 2388 else: 2389 joins = None 2390 self._retreat(index) 2391 2392 kwargs["this"].set("joins", joins) 2393 2394 comments = [c for token in (method, side, kind) if token for c in token.comments] 2395 return self.expression(exp.Join, comments=comments, **kwargs) 2396 2397 def _parse_index( 2398 self, 2399 index: t.Optional[exp.Expression] = None, 2400 ) -> t.Optional[exp.Index]: 2401 if index: 2402 unique = None 2403 primary = None 2404 amp = None 2405 2406 self._match(TokenType.ON) 2407 self._match(TokenType.TABLE) # hive 2408 table = self._parse_table_parts(schema=True) 2409 else: 2410 unique = self._match(TokenType.UNIQUE) 2411 primary = self._match_text_seq("PRIMARY") 2412 amp = self._match_text_seq("AMP") 2413 2414 if not self._match(TokenType.INDEX): 2415 return None 2416 2417 index = self._parse_id_var() 2418 table = None 2419 2420 using = self._parse_field() if self._match(TokenType.USING) else None 2421 2422 if self._match(TokenType.L_PAREN, advance=False): 2423 columns = self._parse_wrapped_csv(self._parse_ordered) 2424 else: 2425 columns = None 2426 2427 return self.expression( 2428 exp.Index, 2429 this=index, 2430 table=table, 2431 using=using, 2432 columns=columns, 2433 unique=unique, 2434 primary=primary, 2435 amp=amp, 2436 partition_by=self._parse_partition_by(), 2437 ) 2438 2439 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2440 hints: t.List[exp.Expression] = [] 2441 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2442 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2443 hints.append( 2444 self.expression( 2445 exp.WithTableHint, 2446 expressions=self._parse_csv( 2447 lambda: self._parse_function() or self._parse_var(any_token=True) 2448 ), 2449 ) 2450 ) 2451 self._match_r_paren() 2452 else: 2453 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2454 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2455 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2456 2457 self._match_texts({"INDEX", "KEY"}) 2458 if self._match(TokenType.FOR): 2459 hint.set("target", self._advance_any() and self._prev.text.upper()) 2460 2461 hint.set("expressions", self._parse_wrapped_id_vars()) 2462 hints.append(hint) 2463 2464 return hints or None 2465 2466 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2467 return ( 2468 (not schema and self._parse_function(optional_parens=False)) 2469 or self._parse_id_var(any_token=False) 2470 or self._parse_string_as_identifier() 2471 or self._parse_placeholder() 2472 ) 2473 2474 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2475 catalog = None 2476 db = None 2477 table = self._parse_table_part(schema=schema) 2478 2479 while self._match(TokenType.DOT): 2480 if catalog: 2481 # This allows nesting the table in arbitrarily many dot expressions if needed 2482 table = self.expression( 2483 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2484 ) 2485 else: 2486 catalog = db 2487 db = table 2488 table = self._parse_table_part(schema=schema) 2489 2490 if not table: 2491 self.raise_error(f"Expected table name but got {self._curr}") 2492 2493 return self.expression( 2494 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2495 ) 2496 2497 def _parse_table( 2498 self, 2499 schema: bool = False, 2500 joins: bool = False, 2501 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2502 parse_bracket: bool = False, 2503 ) -> t.Optional[exp.Expression]: 2504 lateral = self._parse_lateral() 2505 if lateral: 2506 return lateral 2507 2508 unnest = self._parse_unnest() 2509 if unnest: 2510 return unnest 2511 2512 values = self._parse_derived_table_values() 2513 if values: 2514 return values 2515 2516 subquery = self._parse_select(table=True) 2517 if subquery: 2518 if not subquery.args.get("pivots"): 2519 subquery.set("pivots", self._parse_pivots()) 2520 return subquery 2521 2522 bracket = parse_bracket and self._parse_bracket(None) 2523 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2524 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2525 2526 if schema: 2527 return self._parse_schema(this=this) 2528 2529 if self.ALIAS_POST_TABLESAMPLE: 2530 table_sample = self._parse_table_sample() 2531 2532 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2533 if alias: 2534 this.set("alias", alias) 2535 2536 if not this.args.get("pivots"): 2537 this.set("pivots", self._parse_pivots()) 2538 2539 this.set("hints", self._parse_table_hints()) 2540 2541 if not self.ALIAS_POST_TABLESAMPLE: 2542 table_sample = self._parse_table_sample() 2543 2544 if table_sample: 2545 table_sample.set("this", this) 2546 this = table_sample 2547 2548 if joins: 2549 for join in iter(self._parse_join, None): 2550 this.append("joins", join) 2551 2552 return this 2553 2554 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2555 if not self._match(TokenType.UNNEST): 2556 return None 2557 2558 expressions = self._parse_wrapped_csv(self._parse_type) 2559 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2560 2561 alias = self._parse_table_alias() if with_alias else None 2562 2563 if alias and self.UNNEST_COLUMN_ONLY: 2564 if alias.args.get("columns"): 2565 self.raise_error("Unexpected extra column alias in unnest.") 2566 2567 alias.set("columns", [alias.this]) 2568 alias.set("this", None) 2569 2570 offset = None 2571 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2572 self._match(TokenType.ALIAS) 2573 offset = self._parse_id_var() or exp.to_identifier("offset") 2574 2575 return self.expression( 2576 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2577 ) 2578 2579 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2580 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2581 if not is_derived and not self._match(TokenType.VALUES): 2582 return None 2583 2584 expressions = self._parse_csv(self._parse_value) 2585 alias = self._parse_table_alias() 2586 2587 if is_derived: 2588 self._match_r_paren() 2589 2590 return self.expression( 2591 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2592 ) 2593 2594 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2595 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2596 as_modifier and self._match_text_seq("USING", "SAMPLE") 2597 ): 2598 return None 2599 2600 bucket_numerator = None 2601 bucket_denominator = None 2602 bucket_field = None 2603 percent = None 2604 rows = None 2605 size = None 2606 seed = None 2607 2608 kind = ( 2609 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2610 ) 2611 method = self._parse_var(tokens=(TokenType.ROW,)) 2612 2613 self._match(TokenType.L_PAREN) 2614 2615 num = self._parse_number() 2616 2617 if self._match_text_seq("BUCKET"): 2618 bucket_numerator = self._parse_number() 2619 self._match_text_seq("OUT", "OF") 2620 bucket_denominator = bucket_denominator = self._parse_number() 2621 self._match(TokenType.ON) 2622 bucket_field = self._parse_field() 2623 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2624 percent = num 2625 elif self._match(TokenType.ROWS): 2626 rows = num 2627 else: 2628 size = num 2629 2630 self._match(TokenType.R_PAREN) 2631 2632 if self._match(TokenType.L_PAREN): 2633 method = self._parse_var() 2634 seed = self._match(TokenType.COMMA) and self._parse_number() 2635 self._match_r_paren() 2636 elif self._match_texts(("SEED", "REPEATABLE")): 2637 seed = self._parse_wrapped(self._parse_number) 2638 2639 return self.expression( 2640 exp.TableSample, 2641 method=method, 2642 bucket_numerator=bucket_numerator, 2643 bucket_denominator=bucket_denominator, 2644 bucket_field=bucket_field, 2645 percent=percent, 2646 rows=rows, 2647 size=size, 2648 seed=seed, 2649 kind=kind, 2650 ) 2651 2652 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2653 return list(iter(self._parse_pivot, None)) or None 2654 2655 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2656 return list(iter(self._parse_join, None)) or None 2657 2658 # https://duckdb.org/docs/sql/statements/pivot 2659 def _parse_simplified_pivot(self) -> exp.Pivot: 2660 def _parse_on() -> t.Optional[exp.Expression]: 2661 this = self._parse_bitwise() 2662 return self._parse_in(this) if self._match(TokenType.IN) else this 2663 2664 this = self._parse_table() 2665 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2666 using = self._match(TokenType.USING) and self._parse_csv( 2667 lambda: self._parse_alias(self._parse_function()) 2668 ) 2669 group = self._parse_group() 2670 return self.expression( 2671 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2672 ) 2673 2674 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2675 index = self._index 2676 include_nulls = None 2677 2678 if self._match(TokenType.PIVOT): 2679 unpivot = False 2680 elif self._match(TokenType.UNPIVOT): 2681 unpivot = True 2682 2683 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2684 if self._match_text_seq("INCLUDE", "NULLS"): 2685 include_nulls = True 2686 elif self._match_text_seq("EXCLUDE", "NULLS"): 2687 include_nulls = False 2688 else: 2689 return None 2690 2691 expressions = [] 2692 field = None 2693 2694 if not self._match(TokenType.L_PAREN): 2695 self._retreat(index) 2696 return None 2697 2698 if unpivot: 2699 expressions = self._parse_csv(self._parse_column) 2700 else: 2701 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2702 2703 if not expressions: 2704 self.raise_error("Failed to parse PIVOT's aggregation list") 2705 2706 if not self._match(TokenType.FOR): 2707 self.raise_error("Expecting FOR") 2708 2709 value = self._parse_column() 2710 2711 if not self._match(TokenType.IN): 2712 self.raise_error("Expecting IN") 2713 2714 field = self._parse_in(value, alias=True) 2715 2716 self._match_r_paren() 2717 2718 pivot = self.expression( 2719 exp.Pivot, 2720 expressions=expressions, 2721 field=field, 2722 unpivot=unpivot, 2723 include_nulls=include_nulls, 2724 ) 2725 2726 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2727 pivot.set("alias", self._parse_table_alias()) 2728 2729 if not unpivot: 2730 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2731 2732 columns: t.List[exp.Expression] = [] 2733 for fld in pivot.args["field"].expressions: 2734 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2735 for name in names: 2736 if self.PREFIXED_PIVOT_COLUMNS: 2737 name = f"{name}_{field_name}" if name else field_name 2738 else: 2739 name = f"{field_name}_{name}" if name else field_name 2740 2741 columns.append(exp.to_identifier(name)) 2742 2743 pivot.set("columns", columns) 2744 2745 return pivot 2746 2747 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2748 return [agg.alias for agg in aggregations] 2749 2750 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2751 if not skip_where_token and not self._match(TokenType.WHERE): 2752 return None 2753 2754 return self.expression( 2755 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2756 ) 2757 2758 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2759 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2760 return None 2761 2762 elements = defaultdict(list) 2763 2764 if self._match(TokenType.ALL): 2765 return self.expression(exp.Group, all=True) 2766 2767 while True: 2768 expressions = self._parse_csv(self._parse_conjunction) 2769 if expressions: 2770 elements["expressions"].extend(expressions) 2771 2772 grouping_sets = self._parse_grouping_sets() 2773 if grouping_sets: 2774 elements["grouping_sets"].extend(grouping_sets) 2775 2776 rollup = None 2777 cube = None 2778 totals = None 2779 2780 with_ = self._match(TokenType.WITH) 2781 if self._match(TokenType.ROLLUP): 2782 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2783 elements["rollup"].extend(ensure_list(rollup)) 2784 2785 if self._match(TokenType.CUBE): 2786 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2787 elements["cube"].extend(ensure_list(cube)) 2788 2789 if self._match_text_seq("TOTALS"): 2790 totals = True 2791 elements["totals"] = True # type: ignore 2792 2793 if not (grouping_sets or rollup or cube or totals): 2794 break 2795 2796 return self.expression(exp.Group, **elements) # type: ignore 2797 2798 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2799 if not self._match(TokenType.GROUPING_SETS): 2800 return None 2801 2802 return self._parse_wrapped_csv(self._parse_grouping_set) 2803 2804 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2805 if self._match(TokenType.L_PAREN): 2806 grouping_set = self._parse_csv(self._parse_column) 2807 self._match_r_paren() 2808 return self.expression(exp.Tuple, expressions=grouping_set) 2809 2810 return self._parse_column() 2811 2812 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2813 if not skip_having_token and not self._match(TokenType.HAVING): 2814 return None 2815 return self.expression(exp.Having, this=self._parse_conjunction()) 2816 2817 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2818 if not self._match(TokenType.QUALIFY): 2819 return None 2820 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2821 2822 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2823 if skip_start_token: 2824 start = None 2825 elif self._match(TokenType.START_WITH): 2826 start = self._parse_conjunction() 2827 else: 2828 return None 2829 2830 self._match(TokenType.CONNECT_BY) 2831 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2832 exp.Prior, this=self._parse_bitwise() 2833 ) 2834 connect = self._parse_conjunction() 2835 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2836 return self.expression(exp.Connect, start=start, connect=connect) 2837 2838 def _parse_order( 2839 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2840 ) -> t.Optional[exp.Expression]: 2841 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2842 return this 2843 2844 return self.expression( 2845 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2846 ) 2847 2848 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2849 if not self._match(token): 2850 return None 2851 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2852 2853 def _parse_ordered(self) -> exp.Ordered: 2854 this = self._parse_conjunction() 2855 self._match(TokenType.ASC) 2856 2857 is_desc = self._match(TokenType.DESC) 2858 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2859 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2860 desc = is_desc or False 2861 asc = not desc 2862 nulls_first = is_nulls_first or False 2863 explicitly_null_ordered = is_nulls_first or is_nulls_last 2864 2865 if ( 2866 not explicitly_null_ordered 2867 and ( 2868 (asc and self.NULL_ORDERING == "nulls_are_small") 2869 or (desc and self.NULL_ORDERING != "nulls_are_small") 2870 ) 2871 and self.NULL_ORDERING != "nulls_are_last" 2872 ): 2873 nulls_first = True 2874 2875 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2876 2877 def _parse_limit( 2878 self, this: t.Optional[exp.Expression] = None, top: bool = False 2879 ) -> t.Optional[exp.Expression]: 2880 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2881 comments = self._prev_comments 2882 if top: 2883 limit_paren = self._match(TokenType.L_PAREN) 2884 expression = self._parse_number() 2885 2886 if limit_paren: 2887 self._match_r_paren() 2888 else: 2889 expression = self._parse_term() 2890 2891 if self._match(TokenType.COMMA): 2892 offset = expression 2893 expression = self._parse_term() 2894 else: 2895 offset = None 2896 2897 limit_exp = self.expression( 2898 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2899 ) 2900 2901 return limit_exp 2902 2903 if self._match(TokenType.FETCH): 2904 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2905 direction = self._prev.text if direction else "FIRST" 2906 2907 count = self._parse_number() 2908 percent = self._match(TokenType.PERCENT) 2909 2910 self._match_set((TokenType.ROW, TokenType.ROWS)) 2911 2912 only = self._match_text_seq("ONLY") 2913 with_ties = self._match_text_seq("WITH", "TIES") 2914 2915 if only and with_ties: 2916 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2917 2918 return self.expression( 2919 exp.Fetch, 2920 direction=direction, 2921 count=count, 2922 percent=percent, 2923 with_ties=with_ties, 2924 ) 2925 2926 return this 2927 2928 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2929 if not self._match(TokenType.OFFSET): 2930 return this 2931 2932 count = self._parse_term() 2933 self._match_set((TokenType.ROW, TokenType.ROWS)) 2934 return self.expression(exp.Offset, this=this, expression=count) 2935 2936 def _parse_locks(self) -> t.List[exp.Lock]: 2937 locks = [] 2938 while True: 2939 if self._match_text_seq("FOR", "UPDATE"): 2940 update = True 2941 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2942 "LOCK", "IN", "SHARE", "MODE" 2943 ): 2944 update = False 2945 else: 2946 break 2947 2948 expressions = None 2949 if self._match_text_seq("OF"): 2950 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2951 2952 wait: t.Optional[bool | exp.Expression] = None 2953 if self._match_text_seq("NOWAIT"): 2954 wait = True 2955 elif self._match_text_seq("WAIT"): 2956 wait = self._parse_primary() 2957 elif self._match_text_seq("SKIP", "LOCKED"): 2958 wait = False 2959 2960 locks.append( 2961 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2962 ) 2963 2964 return locks 2965 2966 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2967 if not self._match_set(self.SET_OPERATIONS): 2968 return this 2969 2970 token_type = self._prev.token_type 2971 2972 if token_type == TokenType.UNION: 2973 expression = exp.Union 2974 elif token_type == TokenType.EXCEPT: 2975 expression = exp.Except 2976 else: 2977 expression = exp.Intersect 2978 2979 return self.expression( 2980 expression, 2981 this=this, 2982 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2983 expression=self._parse_set_operations(self._parse_select(nested=True)), 2984 ) 2985 2986 def _parse_expression(self) -> t.Optional[exp.Expression]: 2987 return self._parse_alias(self._parse_conjunction()) 2988 2989 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2990 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2991 2992 def _parse_equality(self) -> t.Optional[exp.Expression]: 2993 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2994 2995 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2996 return self._parse_tokens(self._parse_range, self.COMPARISON) 2997 2998 def _parse_range(self) -> t.Optional[exp.Expression]: 2999 this = self._parse_bitwise() 3000 negate = self._match(TokenType.NOT) 3001 3002 if self._match_set(self.RANGE_PARSERS): 3003 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3004 if not expression: 3005 return this 3006 3007 this = expression 3008 elif self._match(TokenType.ISNULL): 3009 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3010 3011 # Postgres supports ISNULL and NOTNULL for conditions. 3012 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3013 if self._match(TokenType.NOTNULL): 3014 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3015 this = self.expression(exp.Not, this=this) 3016 3017 if negate: 3018 this = self.expression(exp.Not, this=this) 3019 3020 if self._match(TokenType.IS): 3021 this = self._parse_is(this) 3022 3023 return this 3024 3025 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3026 index = self._index - 1 3027 negate = self._match(TokenType.NOT) 3028 3029 if self._match_text_seq("DISTINCT", "FROM"): 3030 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3031 return self.expression(klass, this=this, expression=self._parse_expression()) 3032 3033 expression = self._parse_null() or self._parse_boolean() 3034 if not expression: 3035 self._retreat(index) 3036 return None 3037 3038 this = self.expression(exp.Is, this=this, expression=expression) 3039 return self.expression(exp.Not, this=this) if negate else this 3040 3041 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3042 unnest = self._parse_unnest(with_alias=False) 3043 if unnest: 3044 this = self.expression(exp.In, this=this, unnest=unnest) 3045 elif self._match(TokenType.L_PAREN): 3046 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3047 3048 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3049 this = self.expression(exp.In, this=this, query=expressions[0]) 3050 else: 3051 this = self.expression(exp.In, this=this, expressions=expressions) 3052 3053 self._match_r_paren(this) 3054 else: 3055 this = self.expression(exp.In, this=this, field=self._parse_field()) 3056 3057 return this 3058 3059 def _parse_between(self, this: exp.Expression) -> exp.Between: 3060 low = self._parse_bitwise() 3061 self._match(TokenType.AND) 3062 high = self._parse_bitwise() 3063 return self.expression(exp.Between, this=this, low=low, high=high) 3064 3065 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3066 if not self._match(TokenType.ESCAPE): 3067 return this 3068 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3069 3070 def _parse_interval(self) -> t.Optional[exp.Interval]: 3071 index = self._index 3072 3073 if not self._match(TokenType.INTERVAL): 3074 return None 3075 3076 if self._match(TokenType.STRING, advance=False): 3077 this = self._parse_primary() 3078 else: 3079 this = self._parse_term() 3080 3081 if not this: 3082 self._retreat(index) 3083 return None 3084 3085 unit = self._parse_function() or self._parse_var() 3086 3087 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3088 # each INTERVAL expression into this canonical form so it's easy to transpile 3089 if this and this.is_number: 3090 this = exp.Literal.string(this.name) 3091 elif this and this.is_string: 3092 parts = this.name.split() 3093 3094 if len(parts) == 2: 3095 if unit: 3096 # this is not actually a unit, it's something else 3097 unit = None 3098 self._retreat(self._index - 1) 3099 else: 3100 this = exp.Literal.string(parts[0]) 3101 unit = self.expression(exp.Var, this=parts[1]) 3102 3103 return self.expression(exp.Interval, this=this, unit=unit) 3104 3105 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3106 this = self._parse_term() 3107 3108 while True: 3109 if self._match_set(self.BITWISE): 3110 this = self.expression( 3111 self.BITWISE[self._prev.token_type], 3112 this=this, 3113 expression=self._parse_term(), 3114 ) 3115 elif self._match(TokenType.DQMARK): 3116 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3117 elif self._match_pair(TokenType.LT, TokenType.LT): 3118 this = self.expression( 3119 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3120 ) 3121 elif self._match_pair(TokenType.GT, TokenType.GT): 3122 this = self.expression( 3123 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3124 ) 3125 else: 3126 break 3127 3128 return this 3129 3130 def _parse_term(self) -> t.Optional[exp.Expression]: 3131 return self._parse_tokens(self._parse_factor, self.TERM) 3132 3133 def _parse_factor(self) -> t.Optional[exp.Expression]: 3134 return self._parse_tokens(self._parse_unary, self.FACTOR) 3135 3136 def _parse_unary(self) -> t.Optional[exp.Expression]: 3137 if self._match_set(self.UNARY_PARSERS): 3138 return self.UNARY_PARSERS[self._prev.token_type](self) 3139 return self._parse_at_time_zone(self._parse_type()) 3140 3141 def _parse_type(self) -> t.Optional[exp.Expression]: 3142 interval = self._parse_interval() 3143 if interval: 3144 return interval 3145 3146 index = self._index 3147 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3148 this = self._parse_column() 3149 3150 if data_type: 3151 if isinstance(this, exp.Literal): 3152 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3153 if parser: 3154 return parser(self, this, data_type) 3155 return self.expression(exp.Cast, this=this, to=data_type) 3156 if not data_type.expressions: 3157 self._retreat(index) 3158 return self._parse_column() 3159 return self._parse_column_ops(data_type) 3160 3161 return this 3162 3163 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3164 this = self._parse_type() 3165 if not this: 3166 return None 3167 3168 return self.expression( 3169 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3170 ) 3171 3172 def _parse_types( 3173 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3174 ) -> t.Optional[exp.Expression]: 3175 index = self._index 3176 3177 prefix = self._match_text_seq("SYSUDTLIB", ".") 3178 3179 if not self._match_set(self.TYPE_TOKENS): 3180 identifier = allow_identifiers and self._parse_id_var( 3181 any_token=False, tokens=(TokenType.VAR,) 3182 ) 3183 3184 if identifier: 3185 tokens = self._tokenizer.tokenize(identifier.name) 3186 3187 if len(tokens) != 1: 3188 self.raise_error("Unexpected identifier", self._prev) 3189 3190 if tokens[0].token_type in self.TYPE_TOKENS: 3191 self._prev = tokens[0] 3192 elif self.SUPPORTS_USER_DEFINED_TYPES: 3193 return identifier 3194 else: 3195 return None 3196 else: 3197 return None 3198 3199 type_token = self._prev.token_type 3200 3201 if type_token == TokenType.PSEUDO_TYPE: 3202 return self.expression(exp.PseudoType, this=self._prev.text) 3203 3204 nested = type_token in self.NESTED_TYPE_TOKENS 3205 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3206 expressions = None 3207 maybe_func = False 3208 3209 if self._match(TokenType.L_PAREN): 3210 if is_struct: 3211 expressions = self._parse_csv(self._parse_struct_types) 3212 elif nested: 3213 expressions = self._parse_csv( 3214 lambda: self._parse_types( 3215 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3216 ) 3217 ) 3218 elif type_token in self.ENUM_TYPE_TOKENS: 3219 expressions = self._parse_csv(self._parse_equality) 3220 else: 3221 expressions = self._parse_csv(self._parse_type_size) 3222 3223 if not expressions or not self._match(TokenType.R_PAREN): 3224 self._retreat(index) 3225 return None 3226 3227 maybe_func = True 3228 3229 this: t.Optional[exp.Expression] = None 3230 values: t.Optional[t.List[exp.Expression]] = None 3231 3232 if nested and self._match(TokenType.LT): 3233 if is_struct: 3234 expressions = self._parse_csv(self._parse_struct_types) 3235 else: 3236 expressions = self._parse_csv( 3237 lambda: self._parse_types( 3238 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3239 ) 3240 ) 3241 3242 if not self._match(TokenType.GT): 3243 self.raise_error("Expecting >") 3244 3245 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3246 values = self._parse_csv(self._parse_conjunction) 3247 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3248 3249 if type_token in self.TIMESTAMPS: 3250 if self._match_text_seq("WITH", "TIME", "ZONE"): 3251 maybe_func = False 3252 tz_type = ( 3253 exp.DataType.Type.TIMETZ 3254 if type_token in self.TIMES 3255 else exp.DataType.Type.TIMESTAMPTZ 3256 ) 3257 this = exp.DataType(this=tz_type, expressions=expressions) 3258 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3259 maybe_func = False 3260 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3261 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3262 maybe_func = False 3263 elif type_token == TokenType.INTERVAL: 3264 if self._match_text_seq("YEAR", "TO", "MONTH"): 3265 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3266 elif self._match_text_seq("DAY", "TO", "SECOND"): 3267 span = [exp.IntervalDayToSecondSpan()] 3268 else: 3269 span = None 3270 3271 unit = not span and self._parse_var() 3272 if not unit: 3273 this = self.expression( 3274 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3275 ) 3276 else: 3277 this = self.expression(exp.Interval, unit=unit) 3278 3279 if maybe_func and check_func: 3280 index2 = self._index 3281 peek = self._parse_string() 3282 3283 if not peek: 3284 self._retreat(index) 3285 return None 3286 3287 self._retreat(index2) 3288 3289 if not this: 3290 this = exp.DataType( 3291 this=exp.DataType.Type[type_token.value], 3292 expressions=expressions, 3293 nested=nested, 3294 values=values, 3295 prefix=prefix, 3296 ) 3297 3298 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3299 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3300 3301 return this 3302 3303 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3304 this = self._parse_type() or self._parse_id_var() 3305 self._match(TokenType.COLON) 3306 return self._parse_column_def(this) 3307 3308 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3309 if not self._match_text_seq("AT", "TIME", "ZONE"): 3310 return this 3311 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3312 3313 def _parse_column(self) -> t.Optional[exp.Expression]: 3314 this = self._parse_field() 3315 if isinstance(this, exp.Identifier): 3316 this = self.expression(exp.Column, this=this) 3317 elif not this: 3318 return self._parse_bracket(this) 3319 return self._parse_column_ops(this) 3320 3321 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3322 this = self._parse_bracket(this) 3323 3324 while self._match_set(self.COLUMN_OPERATORS): 3325 op_token = self._prev.token_type 3326 op = self.COLUMN_OPERATORS.get(op_token) 3327 3328 if op_token == TokenType.DCOLON: 3329 field = self._parse_types() 3330 if not field: 3331 self.raise_error("Expected type") 3332 elif op and self._curr: 3333 self._advance() 3334 value = self._prev.text 3335 field = ( 3336 exp.Literal.number(value) 3337 if self._prev.token_type == TokenType.NUMBER 3338 else exp.Literal.string(value) 3339 ) 3340 else: 3341 field = self._parse_field(anonymous_func=True, any_token=True) 3342 3343 if isinstance(field, exp.Func): 3344 # bigquery allows function calls like x.y.count(...) 3345 # SAFE.SUBSTR(...) 3346 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3347 this = self._replace_columns_with_dots(this) 3348 3349 if op: 3350 this = op(self, this, field) 3351 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3352 this = self.expression( 3353 exp.Column, 3354 this=field, 3355 table=this.this, 3356 db=this.args.get("table"), 3357 catalog=this.args.get("db"), 3358 ) 3359 else: 3360 this = self.expression(exp.Dot, this=this, expression=field) 3361 this = self._parse_bracket(this) 3362 return this 3363 3364 def _parse_primary(self) -> t.Optional[exp.Expression]: 3365 if self._match_set(self.PRIMARY_PARSERS): 3366 token_type = self._prev.token_type 3367 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3368 3369 if token_type == TokenType.STRING: 3370 expressions = [primary] 3371 while self._match(TokenType.STRING): 3372 expressions.append(exp.Literal.string(self._prev.text)) 3373 3374 if len(expressions) > 1: 3375 return self.expression(exp.Concat, expressions=expressions) 3376 3377 return primary 3378 3379 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3380 return exp.Literal.number(f"0.{self._prev.text}") 3381 3382 if self._match(TokenType.L_PAREN): 3383 comments = self._prev_comments 3384 query = self._parse_select() 3385 3386 if query: 3387 expressions = [query] 3388 else: 3389 expressions = self._parse_expressions() 3390 3391 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3392 3393 if isinstance(this, exp.Subqueryable): 3394 this = self._parse_set_operations( 3395 self._parse_subquery(this=this, parse_alias=False) 3396 ) 3397 elif len(expressions) > 1: 3398 this = self.expression(exp.Tuple, expressions=expressions) 3399 else: 3400 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3401 3402 if this: 3403 this.add_comments(comments) 3404 3405 self._match_r_paren(expression=this) 3406 return this 3407 3408 return None 3409 3410 def _parse_field( 3411 self, 3412 any_token: bool = False, 3413 tokens: t.Optional[t.Collection[TokenType]] = None, 3414 anonymous_func: bool = False, 3415 ) -> t.Optional[exp.Expression]: 3416 return ( 3417 self._parse_primary() 3418 or self._parse_function(anonymous=anonymous_func) 3419 or self._parse_id_var(any_token=any_token, tokens=tokens) 3420 ) 3421 3422 def _parse_function( 3423 self, 3424 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3425 anonymous: bool = False, 3426 optional_parens: bool = True, 3427 ) -> t.Optional[exp.Expression]: 3428 if not self._curr: 3429 return None 3430 3431 token_type = self._curr.token_type 3432 this = self._curr.text 3433 upper = this.upper() 3434 3435 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3436 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3437 self._advance() 3438 return parser(self) 3439 3440 if not self._next or self._next.token_type != TokenType.L_PAREN: 3441 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3442 self._advance() 3443 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3444 3445 return None 3446 3447 if token_type not in self.FUNC_TOKENS: 3448 return None 3449 3450 self._advance(2) 3451 3452 parser = self.FUNCTION_PARSERS.get(upper) 3453 if parser and not anonymous: 3454 this = parser(self) 3455 else: 3456 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3457 3458 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3459 this = self.expression(subquery_predicate, this=self._parse_select()) 3460 self._match_r_paren() 3461 return this 3462 3463 if functions is None: 3464 functions = self.FUNCTIONS 3465 3466 function = functions.get(upper) 3467 3468 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3469 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3470 3471 if function and not anonymous: 3472 func = self.validate_expression(function(args), args) 3473 if not self.NORMALIZE_FUNCTIONS: 3474 func.meta["name"] = this 3475 this = func 3476 else: 3477 this = self.expression(exp.Anonymous, this=this, expressions=args) 3478 3479 self._match_r_paren(this) 3480 return self._parse_window(this) 3481 3482 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3483 return self._parse_column_def(self._parse_id_var()) 3484 3485 def _parse_user_defined_function( 3486 self, kind: t.Optional[TokenType] = None 3487 ) -> t.Optional[exp.Expression]: 3488 this = self._parse_id_var() 3489 3490 while self._match(TokenType.DOT): 3491 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3492 3493 if not self._match(TokenType.L_PAREN): 3494 return this 3495 3496 expressions = self._parse_csv(self._parse_function_parameter) 3497 self._match_r_paren() 3498 return self.expression( 3499 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3500 ) 3501 3502 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3503 literal = self._parse_primary() 3504 if literal: 3505 return self.expression(exp.Introducer, this=token.text, expression=literal) 3506 3507 return self.expression(exp.Identifier, this=token.text) 3508 3509 def _parse_session_parameter(self) -> exp.SessionParameter: 3510 kind = None 3511 this = self._parse_id_var() or self._parse_primary() 3512 3513 if this and self._match(TokenType.DOT): 3514 kind = this.name 3515 this = self._parse_var() or self._parse_primary() 3516 3517 return self.expression(exp.SessionParameter, this=this, kind=kind) 3518 3519 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3520 index = self._index 3521 3522 if self._match(TokenType.L_PAREN): 3523 expressions = t.cast( 3524 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3525 ) 3526 3527 if not self._match(TokenType.R_PAREN): 3528 self._retreat(index) 3529 else: 3530 expressions = [self._parse_id_var()] 3531 3532 if self._match_set(self.LAMBDAS): 3533 return self.LAMBDAS[self._prev.token_type](self, expressions) 3534 3535 self._retreat(index) 3536 3537 this: t.Optional[exp.Expression] 3538 3539 if self._match(TokenType.DISTINCT): 3540 this = self.expression( 3541 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3542 ) 3543 else: 3544 this = self._parse_select_or_expression(alias=alias) 3545 3546 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3547 3548 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3549 index = self._index 3550 3551 if not self.errors: 3552 try: 3553 if self._parse_select(nested=True): 3554 return this 3555 except ParseError: 3556 pass 3557 finally: 3558 self.errors.clear() 3559 self._retreat(index) 3560 3561 if not self._match(TokenType.L_PAREN): 3562 return this 3563 3564 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3565 3566 self._match_r_paren() 3567 return self.expression(exp.Schema, this=this, expressions=args) 3568 3569 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3570 return self._parse_column_def(self._parse_field(any_token=True)) 3571 3572 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3573 # column defs are not really columns, they're identifiers 3574 if isinstance(this, exp.Column): 3575 this = this.this 3576 3577 kind = self._parse_types(schema=True) 3578 3579 if self._match_text_seq("FOR", "ORDINALITY"): 3580 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3581 3582 constraints: t.List[exp.Expression] = [] 3583 3584 if not kind and self._match(TokenType.ALIAS): 3585 constraints.append( 3586 self.expression( 3587 exp.ComputedColumnConstraint, 3588 this=self._parse_conjunction(), 3589 persisted=self._match_text_seq("PERSISTED"), 3590 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3591 ) 3592 ) 3593 3594 while True: 3595 constraint = self._parse_column_constraint() 3596 if not constraint: 3597 break 3598 constraints.append(constraint) 3599 3600 if not kind and not constraints: 3601 return this 3602 3603 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3604 3605 def _parse_auto_increment( 3606 self, 3607 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3608 start = None 3609 increment = None 3610 3611 if self._match(TokenType.L_PAREN, advance=False): 3612 args = self._parse_wrapped_csv(self._parse_bitwise) 3613 start = seq_get(args, 0) 3614 increment = seq_get(args, 1) 3615 elif self._match_text_seq("START"): 3616 start = self._parse_bitwise() 3617 self._match_text_seq("INCREMENT") 3618 increment = self._parse_bitwise() 3619 3620 if start and increment: 3621 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3622 3623 return exp.AutoIncrementColumnConstraint() 3624 3625 def _parse_compress(self) -> exp.CompressColumnConstraint: 3626 if self._match(TokenType.L_PAREN, advance=False): 3627 return self.expression( 3628 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3629 ) 3630 3631 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3632 3633 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3634 if self._match_text_seq("BY", "DEFAULT"): 3635 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3636 this = self.expression( 3637 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3638 ) 3639 else: 3640 self._match_text_seq("ALWAYS") 3641 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3642 3643 self._match(TokenType.ALIAS) 3644 identity = self._match_text_seq("IDENTITY") 3645 3646 if self._match(TokenType.L_PAREN): 3647 if self._match(TokenType.START_WITH): 3648 this.set("start", self._parse_bitwise()) 3649 if self._match_text_seq("INCREMENT", "BY"): 3650 this.set("increment", self._parse_bitwise()) 3651 if self._match_text_seq("MINVALUE"): 3652 this.set("minvalue", self._parse_bitwise()) 3653 if self._match_text_seq("MAXVALUE"): 3654 this.set("maxvalue", self._parse_bitwise()) 3655 3656 if self._match_text_seq("CYCLE"): 3657 this.set("cycle", True) 3658 elif self._match_text_seq("NO", "CYCLE"): 3659 this.set("cycle", False) 3660 3661 if not identity: 3662 this.set("expression", self._parse_bitwise()) 3663 3664 self._match_r_paren() 3665 3666 return this 3667 3668 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3669 self._match_text_seq("LENGTH") 3670 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3671 3672 def _parse_not_constraint( 3673 self, 3674 ) -> t.Optional[exp.Expression]: 3675 if self._match_text_seq("NULL"): 3676 return self.expression(exp.NotNullColumnConstraint) 3677 if self._match_text_seq("CASESPECIFIC"): 3678 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3679 if self._match_text_seq("FOR", "REPLICATION"): 3680 return self.expression(exp.NotForReplicationColumnConstraint) 3681 return None 3682 3683 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3684 if self._match(TokenType.CONSTRAINT): 3685 this = self._parse_id_var() 3686 else: 3687 this = None 3688 3689 if self._match_texts(self.CONSTRAINT_PARSERS): 3690 return self.expression( 3691 exp.ColumnConstraint, 3692 this=this, 3693 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3694 ) 3695 3696 return this 3697 3698 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3699 if not self._match(TokenType.CONSTRAINT): 3700 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3701 3702 this = self._parse_id_var() 3703 expressions = [] 3704 3705 while True: 3706 constraint = self._parse_unnamed_constraint() or self._parse_function() 3707 if not constraint: 3708 break 3709 expressions.append(constraint) 3710 3711 return self.expression(exp.Constraint, this=this, expressions=expressions) 3712 3713 def _parse_unnamed_constraint( 3714 self, constraints: t.Optional[t.Collection[str]] = None 3715 ) -> t.Optional[exp.Expression]: 3716 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3717 return None 3718 3719 constraint = self._prev.text.upper() 3720 if constraint not in self.CONSTRAINT_PARSERS: 3721 self.raise_error(f"No parser found for schema constraint {constraint}.") 3722 3723 return self.CONSTRAINT_PARSERS[constraint](self) 3724 3725 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3726 self._match_text_seq("KEY") 3727 return self.expression( 3728 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3729 ) 3730 3731 def _parse_key_constraint_options(self) -> t.List[str]: 3732 options = [] 3733 while True: 3734 if not self._curr: 3735 break 3736 3737 if self._match(TokenType.ON): 3738 action = None 3739 on = self._advance_any() and self._prev.text 3740 3741 if self._match_text_seq("NO", "ACTION"): 3742 action = "NO ACTION" 3743 elif self._match_text_seq("CASCADE"): 3744 action = "CASCADE" 3745 elif self._match_pair(TokenType.SET, TokenType.NULL): 3746 action = "SET NULL" 3747 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3748 action = "SET DEFAULT" 3749 else: 3750 self.raise_error("Invalid key constraint") 3751 3752 options.append(f"ON {on} {action}") 3753 elif self._match_text_seq("NOT", "ENFORCED"): 3754 options.append("NOT ENFORCED") 3755 elif self._match_text_seq("DEFERRABLE"): 3756 options.append("DEFERRABLE") 3757 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3758 options.append("INITIALLY DEFERRED") 3759 elif self._match_text_seq("NORELY"): 3760 options.append("NORELY") 3761 elif self._match_text_seq("MATCH", "FULL"): 3762 options.append("MATCH FULL") 3763 else: 3764 break 3765 3766 return options 3767 3768 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3769 if match and not self._match(TokenType.REFERENCES): 3770 return None 3771 3772 expressions = None 3773 this = self._parse_table(schema=True) 3774 options = self._parse_key_constraint_options() 3775 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3776 3777 def _parse_foreign_key(self) -> exp.ForeignKey: 3778 expressions = self._parse_wrapped_id_vars() 3779 reference = self._parse_references() 3780 options = {} 3781 3782 while self._match(TokenType.ON): 3783 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3784 self.raise_error("Expected DELETE or UPDATE") 3785 3786 kind = self._prev.text.lower() 3787 3788 if self._match_text_seq("NO", "ACTION"): 3789 action = "NO ACTION" 3790 elif self._match(TokenType.SET): 3791 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3792 action = "SET " + self._prev.text.upper() 3793 else: 3794 self._advance() 3795 action = self._prev.text.upper() 3796 3797 options[kind] = action 3798 3799 return self.expression( 3800 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3801 ) 3802 3803 def _parse_primary_key( 3804 self, wrapped_optional: bool = False, in_props: bool = False 3805 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3806 desc = ( 3807 self._match_set((TokenType.ASC, TokenType.DESC)) 3808 and self._prev.token_type == TokenType.DESC 3809 ) 3810 3811 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3812 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3813 3814 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3815 options = self._parse_key_constraint_options() 3816 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3817 3818 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3819 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3820 return this 3821 3822 bracket_kind = self._prev.token_type 3823 3824 if self._match(TokenType.COLON): 3825 expressions: t.List[exp.Expression] = [ 3826 self.expression(exp.Slice, expression=self._parse_conjunction()) 3827 ] 3828 else: 3829 expressions = self._parse_csv( 3830 lambda: self._parse_slice( 3831 self._parse_alias(self._parse_conjunction(), explicit=True) 3832 ) 3833 ) 3834 3835 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3836 if bracket_kind == TokenType.L_BRACE: 3837 this = self.expression(exp.Struct, expressions=expressions) 3838 elif not this or this.name.upper() == "ARRAY": 3839 this = self.expression(exp.Array, expressions=expressions) 3840 else: 3841 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3842 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3843 3844 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3845 self.raise_error("Expected ]") 3846 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3847 self.raise_error("Expected }") 3848 3849 self._add_comments(this) 3850 return self._parse_bracket(this) 3851 3852 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3853 if self._match(TokenType.COLON): 3854 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3855 return this 3856 3857 def _parse_case(self) -> t.Optional[exp.Expression]: 3858 ifs = [] 3859 default = None 3860 3861 comments = self._prev_comments 3862 expression = self._parse_conjunction() 3863 3864 while self._match(TokenType.WHEN): 3865 this = self._parse_conjunction() 3866 self._match(TokenType.THEN) 3867 then = self._parse_conjunction() 3868 ifs.append(self.expression(exp.If, this=this, true=then)) 3869 3870 if self._match(TokenType.ELSE): 3871 default = self._parse_conjunction() 3872 3873 if not self._match(TokenType.END): 3874 self.raise_error("Expected END after CASE", self._prev) 3875 3876 return self._parse_window( 3877 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3878 ) 3879 3880 def _parse_if(self) -> t.Optional[exp.Expression]: 3881 if self._match(TokenType.L_PAREN): 3882 args = self._parse_csv(self._parse_conjunction) 3883 this = self.validate_expression(exp.If.from_arg_list(args), args) 3884 self._match_r_paren() 3885 else: 3886 index = self._index - 1 3887 condition = self._parse_conjunction() 3888 3889 if not condition: 3890 self._retreat(index) 3891 return None 3892 3893 self._match(TokenType.THEN) 3894 true = self._parse_conjunction() 3895 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3896 self._match(TokenType.END) 3897 this = self.expression(exp.If, this=condition, true=true, false=false) 3898 3899 return self._parse_window(this) 3900 3901 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3902 if not self._match_text_seq("VALUE", "FOR"): 3903 self._retreat(self._index - 1) 3904 return None 3905 3906 return self.expression( 3907 exp.NextValueFor, 3908 this=self._parse_column(), 3909 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3910 ) 3911 3912 def _parse_extract(self) -> exp.Extract: 3913 this = self._parse_function() or self._parse_var() or self._parse_type() 3914 3915 if self._match(TokenType.FROM): 3916 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3917 3918 if not self._match(TokenType.COMMA): 3919 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3920 3921 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3922 3923 def _parse_any_value(self) -> exp.AnyValue: 3924 this = self._parse_lambda() 3925 is_max = None 3926 having = None 3927 3928 if self._match(TokenType.HAVING): 3929 self._match_texts(("MAX", "MIN")) 3930 is_max = self._prev.text == "MAX" 3931 having = self._parse_column() 3932 3933 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3934 3935 def _parse_cast(self, strict: bool) -> exp.Expression: 3936 this = self._parse_conjunction() 3937 3938 if not self._match(TokenType.ALIAS): 3939 if self._match(TokenType.COMMA): 3940 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3941 3942 self.raise_error("Expected AS after CAST") 3943 3944 fmt = None 3945 to = self._parse_types() 3946 3947 if not to: 3948 self.raise_error("Expected TYPE after CAST") 3949 elif isinstance(to, exp.Identifier): 3950 to = exp.DataType.build(to.name, udt=True) 3951 elif to.this == exp.DataType.Type.CHAR: 3952 if self._match(TokenType.CHARACTER_SET): 3953 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3954 elif self._match(TokenType.FORMAT): 3955 fmt_string = self._parse_string() 3956 fmt = self._parse_at_time_zone(fmt_string) 3957 3958 if to.this in exp.DataType.TEMPORAL_TYPES: 3959 this = self.expression( 3960 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3961 this=this, 3962 format=exp.Literal.string( 3963 format_time( 3964 fmt_string.this if fmt_string else "", 3965 self.FORMAT_MAPPING or self.TIME_MAPPING, 3966 self.FORMAT_TRIE or self.TIME_TRIE, 3967 ) 3968 ), 3969 ) 3970 3971 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3972 this.set("zone", fmt.args["zone"]) 3973 3974 return this 3975 3976 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3977 3978 def _parse_concat(self) -> t.Optional[exp.Expression]: 3979 args = self._parse_csv(self._parse_conjunction) 3980 if self.CONCAT_NULL_OUTPUTS_STRING: 3981 args = [ 3982 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3983 for arg in args 3984 if arg 3985 ] 3986 3987 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3988 # we find such a call we replace it with its argument. 3989 if len(args) == 1: 3990 return args[0] 3991 3992 return self.expression( 3993 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3994 ) 3995 3996 def _parse_string_agg(self) -> exp.Expression: 3997 if self._match(TokenType.DISTINCT): 3998 args: t.List[t.Optional[exp.Expression]] = [ 3999 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4000 ] 4001 if self._match(TokenType.COMMA): 4002 args.extend(self._parse_csv(self._parse_conjunction)) 4003 else: 4004 args = self._parse_csv(self._parse_conjunction) # type: ignore 4005 4006 index = self._index 4007 if not self._match(TokenType.R_PAREN) and args: 4008 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4009 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4010 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4011 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4012 4013 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4014 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4015 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4016 if not self._match_text_seq("WITHIN", "GROUP"): 4017 self._retreat(index) 4018 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4019 4020 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4021 order = self._parse_order(this=seq_get(args, 0)) 4022 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4023 4024 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4025 this = self._parse_bitwise() 4026 4027 if self._match(TokenType.USING): 4028 to: t.Optional[exp.Expression] = self.expression( 4029 exp.CharacterSet, this=self._parse_var() 4030 ) 4031 elif self._match(TokenType.COMMA): 4032 to = self._parse_types() 4033 else: 4034 to = None 4035 4036 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4037 4038 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4039 """ 4040 There are generally two variants of the DECODE function: 4041 4042 - DECODE(bin, charset) 4043 - DECODE(expression, search, result [, search, result] ... [, default]) 4044 4045 The second variant will always be parsed into a CASE expression. Note that NULL 4046 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4047 instead of relying on pattern matching. 4048 """ 4049 args = self._parse_csv(self._parse_conjunction) 4050 4051 if len(args) < 3: 4052 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4053 4054 expression, *expressions = args 4055 if not expression: 4056 return None 4057 4058 ifs = [] 4059 for search, result in zip(expressions[::2], expressions[1::2]): 4060 if not search or not result: 4061 return None 4062 4063 if isinstance(search, exp.Literal): 4064 ifs.append( 4065 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4066 ) 4067 elif isinstance(search, exp.Null): 4068 ifs.append( 4069 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4070 ) 4071 else: 4072 cond = exp.or_( 4073 exp.EQ(this=expression.copy(), expression=search), 4074 exp.and_( 4075 exp.Is(this=expression.copy(), expression=exp.Null()), 4076 exp.Is(this=search.copy(), expression=exp.Null()), 4077 copy=False, 4078 ), 4079 copy=False, 4080 ) 4081 ifs.append(exp.If(this=cond, true=result)) 4082 4083 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4084 4085 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4086 self._match_text_seq("KEY") 4087 key = self._parse_field() 4088 self._match(TokenType.COLON) 4089 self._match_text_seq("VALUE") 4090 value = self._parse_field() 4091 4092 if not key and not value: 4093 return None 4094 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4095 4096 def _parse_json_object(self) -> exp.JSONObject: 4097 star = self._parse_star() 4098 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4099 4100 null_handling = None 4101 if self._match_text_seq("NULL", "ON", "NULL"): 4102 null_handling = "NULL ON NULL" 4103 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4104 null_handling = "ABSENT ON NULL" 4105 4106 unique_keys = None 4107 if self._match_text_seq("WITH", "UNIQUE"): 4108 unique_keys = True 4109 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4110 unique_keys = False 4111 4112 self._match_text_seq("KEYS") 4113 4114 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4115 format_json = self._match_text_seq("FORMAT", "JSON") 4116 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4117 4118 return self.expression( 4119 exp.JSONObject, 4120 expressions=expressions, 4121 null_handling=null_handling, 4122 unique_keys=unique_keys, 4123 return_type=return_type, 4124 format_json=format_json, 4125 encoding=encoding, 4126 ) 4127 4128 def _parse_logarithm(self) -> exp.Func: 4129 # Default argument order is base, expression 4130 args = self._parse_csv(self._parse_range) 4131 4132 if len(args) > 1: 4133 if not self.LOG_BASE_FIRST: 4134 args.reverse() 4135 return exp.Log.from_arg_list(args) 4136 4137 return self.expression( 4138 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4139 ) 4140 4141 def _parse_match_against(self) -> exp.MatchAgainst: 4142 expressions = self._parse_csv(self._parse_column) 4143 4144 self._match_text_seq(")", "AGAINST", "(") 4145 4146 this = self._parse_string() 4147 4148 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4149 modifier = "IN NATURAL LANGUAGE MODE" 4150 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4151 modifier = f"{modifier} WITH QUERY EXPANSION" 4152 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4153 modifier = "IN BOOLEAN MODE" 4154 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4155 modifier = "WITH QUERY EXPANSION" 4156 else: 4157 modifier = None 4158 4159 return self.expression( 4160 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4161 ) 4162 4163 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4164 def _parse_open_json(self) -> exp.OpenJSON: 4165 this = self._parse_bitwise() 4166 path = self._match(TokenType.COMMA) and self._parse_string() 4167 4168 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4169 this = self._parse_field(any_token=True) 4170 kind = self._parse_types() 4171 path = self._parse_string() 4172 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4173 4174 return self.expression( 4175 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4176 ) 4177 4178 expressions = None 4179 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4180 self._match_l_paren() 4181 expressions = self._parse_csv(_parse_open_json_column_def) 4182 4183 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4184 4185 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4186 args = self._parse_csv(self._parse_bitwise) 4187 4188 if self._match(TokenType.IN): 4189 return self.expression( 4190 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4191 ) 4192 4193 if haystack_first: 4194 haystack = seq_get(args, 0) 4195 needle = seq_get(args, 1) 4196 else: 4197 needle = seq_get(args, 0) 4198 haystack = seq_get(args, 1) 4199 4200 return self.expression( 4201 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4202 ) 4203 4204 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4205 args = self._parse_csv(self._parse_table) 4206 return exp.JoinHint(this=func_name.upper(), expressions=args) 4207 4208 def _parse_substring(self) -> exp.Substring: 4209 # Postgres supports the form: substring(string [from int] [for int]) 4210 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4211 4212 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4213 4214 if self._match(TokenType.FROM): 4215 args.append(self._parse_bitwise()) 4216 if self._match(TokenType.FOR): 4217 args.append(self._parse_bitwise()) 4218 4219 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4220 4221 def _parse_trim(self) -> exp.Trim: 4222 # https://www.w3resource.com/sql/character-functions/trim.php 4223 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4224 4225 position = None 4226 collation = None 4227 4228 if self._match_texts(self.TRIM_TYPES): 4229 position = self._prev.text.upper() 4230 4231 expression = self._parse_bitwise() 4232 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4233 this = self._parse_bitwise() 4234 else: 4235 this = expression 4236 expression = None 4237 4238 if self._match(TokenType.COLLATE): 4239 collation = self._parse_bitwise() 4240 4241 return self.expression( 4242 exp.Trim, this=this, position=position, expression=expression, collation=collation 4243 ) 4244 4245 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4246 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4247 4248 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4249 return self._parse_window(self._parse_id_var(), alias=True) 4250 4251 def _parse_respect_or_ignore_nulls( 4252 self, this: t.Optional[exp.Expression] 4253 ) -> t.Optional[exp.Expression]: 4254 if self._match_text_seq("IGNORE", "NULLS"): 4255 return self.expression(exp.IgnoreNulls, this=this) 4256 if self._match_text_seq("RESPECT", "NULLS"): 4257 return self.expression(exp.RespectNulls, this=this) 4258 return this 4259 4260 def _parse_window( 4261 self, this: t.Optional[exp.Expression], alias: bool = False 4262 ) -> t.Optional[exp.Expression]: 4263 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4264 self._match(TokenType.WHERE) 4265 this = self.expression( 4266 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4267 ) 4268 self._match_r_paren() 4269 4270 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4271 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4272 if self._match_text_seq("WITHIN", "GROUP"): 4273 order = self._parse_wrapped(self._parse_order) 4274 this = self.expression(exp.WithinGroup, this=this, expression=order) 4275 4276 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4277 # Some dialects choose to implement and some do not. 4278 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4279 4280 # There is some code above in _parse_lambda that handles 4281 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4282 4283 # The below changes handle 4284 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4285 4286 # Oracle allows both formats 4287 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4288 # and Snowflake chose to do the same for familiarity 4289 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4290 this = self._parse_respect_or_ignore_nulls(this) 4291 4292 # bigquery select from window x AS (partition by ...) 4293 if alias: 4294 over = None 4295 self._match(TokenType.ALIAS) 4296 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4297 return this 4298 else: 4299 over = self._prev.text.upper() 4300 4301 if not self._match(TokenType.L_PAREN): 4302 return self.expression( 4303 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4304 ) 4305 4306 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4307 4308 first = self._match(TokenType.FIRST) 4309 if self._match_text_seq("LAST"): 4310 first = False 4311 4312 partition, order = self._parse_partition_and_order() 4313 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4314 4315 if kind: 4316 self._match(TokenType.BETWEEN) 4317 start = self._parse_window_spec() 4318 self._match(TokenType.AND) 4319 end = self._parse_window_spec() 4320 4321 spec = self.expression( 4322 exp.WindowSpec, 4323 kind=kind, 4324 start=start["value"], 4325 start_side=start["side"], 4326 end=end["value"], 4327 end_side=end["side"], 4328 ) 4329 else: 4330 spec = None 4331 4332 self._match_r_paren() 4333 4334 window = self.expression( 4335 exp.Window, 4336 this=this, 4337 partition_by=partition, 4338 order=order, 4339 spec=spec, 4340 alias=window_alias, 4341 over=over, 4342 first=first, 4343 ) 4344 4345 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4346 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4347 return self._parse_window(window, alias=alias) 4348 4349 return window 4350 4351 def _parse_partition_and_order( 4352 self, 4353 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4354 return self._parse_partition_by(), self._parse_order() 4355 4356 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4357 self._match(TokenType.BETWEEN) 4358 4359 return { 4360 "value": ( 4361 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4362 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4363 or self._parse_bitwise() 4364 ), 4365 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4366 } 4367 4368 def _parse_alias( 4369 self, this: t.Optional[exp.Expression], explicit: bool = False 4370 ) -> t.Optional[exp.Expression]: 4371 any_token = self._match(TokenType.ALIAS) 4372 4373 if explicit and not any_token: 4374 return this 4375 4376 if self._match(TokenType.L_PAREN): 4377 aliases = self.expression( 4378 exp.Aliases, 4379 this=this, 4380 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4381 ) 4382 self._match_r_paren(aliases) 4383 return aliases 4384 4385 alias = self._parse_id_var(any_token) 4386 4387 if alias: 4388 return self.expression(exp.Alias, this=this, alias=alias) 4389 4390 return this 4391 4392 def _parse_id_var( 4393 self, 4394 any_token: bool = True, 4395 tokens: t.Optional[t.Collection[TokenType]] = None, 4396 ) -> t.Optional[exp.Expression]: 4397 identifier = self._parse_identifier() 4398 4399 if identifier: 4400 return identifier 4401 4402 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4403 quoted = self._prev.token_type == TokenType.STRING 4404 return exp.Identifier(this=self._prev.text, quoted=quoted) 4405 4406 return None 4407 4408 def _parse_string(self) -> t.Optional[exp.Expression]: 4409 if self._match(TokenType.STRING): 4410 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4411 return self._parse_placeholder() 4412 4413 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4414 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4415 4416 def _parse_number(self) -> t.Optional[exp.Expression]: 4417 if self._match(TokenType.NUMBER): 4418 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4419 return self._parse_placeholder() 4420 4421 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4422 if self._match(TokenType.IDENTIFIER): 4423 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4424 return self._parse_placeholder() 4425 4426 def _parse_var( 4427 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4428 ) -> t.Optional[exp.Expression]: 4429 if ( 4430 (any_token and self._advance_any()) 4431 or self._match(TokenType.VAR) 4432 or (self._match_set(tokens) if tokens else False) 4433 ): 4434 return self.expression(exp.Var, this=self._prev.text) 4435 return self._parse_placeholder() 4436 4437 def _advance_any(self) -> t.Optional[Token]: 4438 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4439 self._advance() 4440 return self._prev 4441 return None 4442 4443 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4444 return self._parse_var() or self._parse_string() 4445 4446 def _parse_null(self) -> t.Optional[exp.Expression]: 4447 if self._match(TokenType.NULL): 4448 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4449 return self._parse_placeholder() 4450 4451 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4452 if self._match(TokenType.TRUE): 4453 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4454 if self._match(TokenType.FALSE): 4455 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4456 return self._parse_placeholder() 4457 4458 def _parse_star(self) -> t.Optional[exp.Expression]: 4459 if self._match(TokenType.STAR): 4460 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4461 return self._parse_placeholder() 4462 4463 def _parse_parameter(self) -> exp.Parameter: 4464 wrapped = self._match(TokenType.L_BRACE) 4465 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4466 self._match(TokenType.R_BRACE) 4467 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4468 4469 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4470 if self._match_set(self.PLACEHOLDER_PARSERS): 4471 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4472 if placeholder: 4473 return placeholder 4474 self._advance(-1) 4475 return None 4476 4477 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4478 if not self._match(TokenType.EXCEPT): 4479 return None 4480 if self._match(TokenType.L_PAREN, advance=False): 4481 return self._parse_wrapped_csv(self._parse_column) 4482 return self._parse_csv(self._parse_column) 4483 4484 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4485 if not self._match(TokenType.REPLACE): 4486 return None 4487 if self._match(TokenType.L_PAREN, advance=False): 4488 return self._parse_wrapped_csv(self._parse_expression) 4489 return self._parse_expressions() 4490 4491 def _parse_csv( 4492 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4493 ) -> t.List[exp.Expression]: 4494 parse_result = parse_method() 4495 items = [parse_result] if parse_result is not None else [] 4496 4497 while self._match(sep): 4498 self._add_comments(parse_result) 4499 parse_result = parse_method() 4500 if parse_result is not None: 4501 items.append(parse_result) 4502 4503 return items 4504 4505 def _parse_tokens( 4506 self, parse_method: t.Callable, expressions: t.Dict 4507 ) -> t.Optional[exp.Expression]: 4508 this = parse_method() 4509 4510 while self._match_set(expressions): 4511 this = self.expression( 4512 expressions[self._prev.token_type], 4513 this=this, 4514 comments=self._prev_comments, 4515 expression=parse_method(), 4516 ) 4517 4518 return this 4519 4520 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4521 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4522 4523 def _parse_wrapped_csv( 4524 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4525 ) -> t.List[exp.Expression]: 4526 return self._parse_wrapped( 4527 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4528 ) 4529 4530 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4531 wrapped = self._match(TokenType.L_PAREN) 4532 if not wrapped and not optional: 4533 self.raise_error("Expecting (") 4534 parse_result = parse_method() 4535 if wrapped: 4536 self._match_r_paren() 4537 return parse_result 4538 4539 def _parse_expressions(self) -> t.List[exp.Expression]: 4540 return self._parse_csv(self._parse_expression) 4541 4542 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4543 return self._parse_select() or self._parse_set_operations( 4544 self._parse_expression() if alias else self._parse_conjunction() 4545 ) 4546 4547 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4548 return self._parse_query_modifiers( 4549 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4550 ) 4551 4552 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4553 this = None 4554 if self._match_texts(self.TRANSACTION_KIND): 4555 this = self._prev.text 4556 4557 self._match_texts({"TRANSACTION", "WORK"}) 4558 4559 modes = [] 4560 while True: 4561 mode = [] 4562 while self._match(TokenType.VAR): 4563 mode.append(self._prev.text) 4564 4565 if mode: 4566 modes.append(" ".join(mode)) 4567 if not self._match(TokenType.COMMA): 4568 break 4569 4570 return self.expression(exp.Transaction, this=this, modes=modes) 4571 4572 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4573 chain = None 4574 savepoint = None 4575 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4576 4577 self._match_texts({"TRANSACTION", "WORK"}) 4578 4579 if self._match_text_seq("TO"): 4580 self._match_text_seq("SAVEPOINT") 4581 savepoint = self._parse_id_var() 4582 4583 if self._match(TokenType.AND): 4584 chain = not self._match_text_seq("NO") 4585 self._match_text_seq("CHAIN") 4586 4587 if is_rollback: 4588 return self.expression(exp.Rollback, savepoint=savepoint) 4589 4590 return self.expression(exp.Commit, chain=chain) 4591 4592 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4593 if not self._match_text_seq("ADD"): 4594 return None 4595 4596 self._match(TokenType.COLUMN) 4597 exists_column = self._parse_exists(not_=True) 4598 expression = self._parse_field_def() 4599 4600 if expression: 4601 expression.set("exists", exists_column) 4602 4603 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4604 if self._match_texts(("FIRST", "AFTER")): 4605 position = self._prev.text 4606 column_position = self.expression( 4607 exp.ColumnPosition, this=self._parse_column(), position=position 4608 ) 4609 expression.set("position", column_position) 4610 4611 return expression 4612 4613 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4614 drop = self._match(TokenType.DROP) and self._parse_drop() 4615 if drop and not isinstance(drop, exp.Command): 4616 drop.set("kind", drop.args.get("kind", "COLUMN")) 4617 return drop 4618 4619 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4620 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4621 return self.expression( 4622 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4623 ) 4624 4625 def _parse_add_constraint(self) -> exp.AddConstraint: 4626 this = None 4627 kind = self._prev.token_type 4628 4629 if kind == TokenType.CONSTRAINT: 4630 this = self._parse_id_var() 4631 4632 if self._match_text_seq("CHECK"): 4633 expression = self._parse_wrapped(self._parse_conjunction) 4634 enforced = self._match_text_seq("ENFORCED") 4635 4636 return self.expression( 4637 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4638 ) 4639 4640 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4641 expression = self._parse_foreign_key() 4642 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4643 expression = self._parse_primary_key() 4644 else: 4645 expression = None 4646 4647 return self.expression(exp.AddConstraint, this=this, expression=expression) 4648 4649 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4650 index = self._index - 1 4651 4652 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4653 return self._parse_csv(self._parse_add_constraint) 4654 4655 self._retreat(index) 4656 return self._parse_csv(self._parse_add_column) 4657 4658 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4659 self._match(TokenType.COLUMN) 4660 column = self._parse_field(any_token=True) 4661 4662 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4663 return self.expression(exp.AlterColumn, this=column, drop=True) 4664 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4665 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4666 4667 self._match_text_seq("SET", "DATA") 4668 return self.expression( 4669 exp.AlterColumn, 4670 this=column, 4671 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4672 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4673 using=self._match(TokenType.USING) and self._parse_conjunction(), 4674 ) 4675 4676 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4677 index = self._index - 1 4678 4679 partition_exists = self._parse_exists() 4680 if self._match(TokenType.PARTITION, advance=False): 4681 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4682 4683 self._retreat(index) 4684 return self._parse_csv(self._parse_drop_column) 4685 4686 def _parse_alter_table_rename(self) -> exp.RenameTable: 4687 self._match_text_seq("TO") 4688 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4689 4690 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4691 start = self._prev 4692 4693 if not self._match(TokenType.TABLE): 4694 return self._parse_as_command(start) 4695 4696 exists = self._parse_exists() 4697 this = self._parse_table(schema=True) 4698 4699 if self._next: 4700 self._advance() 4701 4702 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4703 if parser: 4704 actions = ensure_list(parser(self)) 4705 4706 if not self._curr: 4707 return self.expression( 4708 exp.AlterTable, 4709 this=this, 4710 exists=exists, 4711 actions=actions, 4712 ) 4713 return self._parse_as_command(start) 4714 4715 def _parse_merge(self) -> exp.Merge: 4716 self._match(TokenType.INTO) 4717 target = self._parse_table() 4718 4719 if target and self._match(TokenType.ALIAS, advance=False): 4720 target.set("alias", self._parse_table_alias()) 4721 4722 self._match(TokenType.USING) 4723 using = self._parse_table() 4724 4725 self._match(TokenType.ON) 4726 on = self._parse_conjunction() 4727 4728 whens = [] 4729 while self._match(TokenType.WHEN): 4730 matched = not self._match(TokenType.NOT) 4731 self._match_text_seq("MATCHED") 4732 source = ( 4733 False 4734 if self._match_text_seq("BY", "TARGET") 4735 else self._match_text_seq("BY", "SOURCE") 4736 ) 4737 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4738 4739 self._match(TokenType.THEN) 4740 4741 if self._match(TokenType.INSERT): 4742 _this = self._parse_star() 4743 if _this: 4744 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4745 else: 4746 then = self.expression( 4747 exp.Insert, 4748 this=self._parse_value(), 4749 expression=self._match(TokenType.VALUES) and self._parse_value(), 4750 ) 4751 elif self._match(TokenType.UPDATE): 4752 expressions = self._parse_star() 4753 if expressions: 4754 then = self.expression(exp.Update, expressions=expressions) 4755 else: 4756 then = self.expression( 4757 exp.Update, 4758 expressions=self._match(TokenType.SET) 4759 and self._parse_csv(self._parse_equality), 4760 ) 4761 elif self._match(TokenType.DELETE): 4762 then = self.expression(exp.Var, this=self._prev.text) 4763 else: 4764 then = None 4765 4766 whens.append( 4767 self.expression( 4768 exp.When, 4769 matched=matched, 4770 source=source, 4771 condition=condition, 4772 then=then, 4773 ) 4774 ) 4775 4776 return self.expression( 4777 exp.Merge, 4778 this=target, 4779 using=using, 4780 on=on, 4781 expressions=whens, 4782 ) 4783 4784 def _parse_show(self) -> t.Optional[exp.Expression]: 4785 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4786 if parser: 4787 return parser(self) 4788 self._advance() 4789 return self.expression(exp.Show, this=self._prev.text.upper()) 4790 4791 def _parse_set_item_assignment( 4792 self, kind: t.Optional[str] = None 4793 ) -> t.Optional[exp.Expression]: 4794 index = self._index 4795 4796 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4797 return self._parse_set_transaction(global_=kind == "GLOBAL") 4798 4799 left = self._parse_primary() or self._parse_id_var() 4800 4801 if not self._match_texts(("=", "TO")): 4802 self._retreat(index) 4803 return None 4804 4805 right = self._parse_statement() or self._parse_id_var() 4806 this = self.expression(exp.EQ, this=left, expression=right) 4807 4808 return self.expression(exp.SetItem, this=this, kind=kind) 4809 4810 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4811 self._match_text_seq("TRANSACTION") 4812 characteristics = self._parse_csv( 4813 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4814 ) 4815 return self.expression( 4816 exp.SetItem, 4817 expressions=characteristics, 4818 kind="TRANSACTION", 4819 **{"global": global_}, # type: ignore 4820 ) 4821 4822 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4823 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4824 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4825 4826 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4827 index = self._index 4828 set_ = self.expression( 4829 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4830 ) 4831 4832 if self._curr: 4833 self._retreat(index) 4834 return self._parse_as_command(self._prev) 4835 4836 return set_ 4837 4838 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4839 for option in options: 4840 if self._match_text_seq(*option.split(" ")): 4841 return exp.var(option) 4842 return None 4843 4844 def _parse_as_command(self, start: Token) -> exp.Command: 4845 while self._curr: 4846 self._advance() 4847 text = self._find_sql(start, self._prev) 4848 size = len(start.text) 4849 return exp.Command(this=text[:size], expression=text[size:]) 4850 4851 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4852 settings = [] 4853 4854 self._match_l_paren() 4855 kind = self._parse_id_var() 4856 4857 if self._match(TokenType.L_PAREN): 4858 while True: 4859 key = self._parse_id_var() 4860 value = self._parse_primary() 4861 4862 if not key and value is None: 4863 break 4864 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4865 self._match(TokenType.R_PAREN) 4866 4867 self._match_r_paren() 4868 4869 return self.expression( 4870 exp.DictProperty, 4871 this=this, 4872 kind=kind.this if kind else None, 4873 settings=settings, 4874 ) 4875 4876 def _parse_dict_range(self, this: str) -> exp.DictRange: 4877 self._match_l_paren() 4878 has_min = self._match_text_seq("MIN") 4879 if has_min: 4880 min = self._parse_var() or self._parse_primary() 4881 self._match_text_seq("MAX") 4882 max = self._parse_var() or self._parse_primary() 4883 else: 4884 max = self._parse_var() or self._parse_primary() 4885 min = exp.Literal.number(0) 4886 self._match_r_paren() 4887 return self.expression(exp.DictRange, this=this, min=min, max=max) 4888 4889 def _find_parser( 4890 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4891 ) -> t.Optional[t.Callable]: 4892 if not self._curr: 4893 return None 4894 4895 index = self._index 4896 this = [] 4897 while True: 4898 # The current token might be multiple words 4899 curr = self._curr.text.upper() 4900 key = curr.split(" ") 4901 this.append(curr) 4902 4903 self._advance() 4904 result, trie = in_trie(trie, key) 4905 if result == TrieResult.FAILED: 4906 break 4907 4908 if result == TrieResult.EXISTS: 4909 subparser = parsers[" ".join(this)] 4910 return subparser 4911 4912 self._retreat(index) 4913 return None 4914 4915 def _match(self, token_type, advance=True, expression=None): 4916 if not self._curr: 4917 return None 4918 4919 if self._curr.token_type == token_type: 4920 if advance: 4921 self._advance() 4922 self._add_comments(expression) 4923 return True 4924 4925 return None 4926 4927 def _match_set(self, types, advance=True): 4928 if not self._curr: 4929 return None 4930 4931 if self._curr.token_type in types: 4932 if advance: 4933 self._advance() 4934 return True 4935 4936 return None 4937 4938 def _match_pair(self, token_type_a, token_type_b, advance=True): 4939 if not self._curr or not self._next: 4940 return None 4941 4942 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4943 if advance: 4944 self._advance(2) 4945 return True 4946 4947 return None 4948 4949 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4950 if not self._match(TokenType.L_PAREN, expression=expression): 4951 self.raise_error("Expecting (") 4952 4953 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4954 if not self._match(TokenType.R_PAREN, expression=expression): 4955 self.raise_error("Expecting )") 4956 4957 def _match_texts(self, texts, advance=True): 4958 if self._curr and self._curr.text.upper() in texts: 4959 if advance: 4960 self._advance() 4961 return True 4962 return False 4963 4964 def _match_text_seq(self, *texts, advance=True): 4965 index = self._index 4966 for text in texts: 4967 if self._curr and self._curr.text.upper() == text: 4968 self._advance() 4969 else: 4970 self._retreat(index) 4971 return False 4972 4973 if not advance: 4974 self._retreat(index) 4975 4976 return True 4977 4978 @t.overload 4979 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4980 ... 4981 4982 @t.overload 4983 def _replace_columns_with_dots( 4984 self, this: t.Optional[exp.Expression] 4985 ) -> t.Optional[exp.Expression]: 4986 ... 4987 4988 def _replace_columns_with_dots(self, this): 4989 if isinstance(this, exp.Dot): 4990 exp.replace_children(this, self._replace_columns_with_dots) 4991 elif isinstance(this, exp.Column): 4992 exp.replace_children(this, self._replace_columns_with_dots) 4993 table = this.args.get("table") 4994 this = ( 4995 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4996 ) 4997 4998 return this 4999 5000 def _replace_lambda( 5001 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5002 ) -> t.Optional[exp.Expression]: 5003 if not node: 5004 return node 5005 5006 for column in node.find_all(exp.Column): 5007 if column.parts[0].name in lambda_variables: 5008 dot_or_id = column.to_dot() if column.table else column.this 5009 parent = column.parent 5010 5011 while isinstance(parent, exp.Dot): 5012 if not isinstance(parent.parent, exp.Dot): 5013 parent.replace(dot_or_id) 5014 break 5015 parent = parent.parent 5016 else: 5017 if column is node: 5018 node = dot_or_id 5019 else: 5020 column.replace(dot_or_id) 5021 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.INET, 201 TokenType.IPADDRESS, 202 TokenType.IPPREFIX, 203 TokenType.UNKNOWN, 204 TokenType.NULL, 205 *ENUM_TYPE_TOKENS, 206 *NESTED_TYPE_TOKENS, 207 } 208 209 SUBQUERY_PREDICATES = { 210 TokenType.ANY: exp.Any, 211 TokenType.ALL: exp.All, 212 TokenType.EXISTS: exp.Exists, 213 TokenType.SOME: exp.Any, 214 } 215 216 RESERVED_KEYWORDS = { 217 *Tokenizer.SINGLE_TOKENS.values(), 218 TokenType.SELECT, 219 } 220 221 DB_CREATABLES = { 222 TokenType.DATABASE, 223 TokenType.SCHEMA, 224 TokenType.TABLE, 225 TokenType.VIEW, 226 TokenType.DICTIONARY, 227 } 228 229 CREATABLES = { 230 TokenType.COLUMN, 231 TokenType.FUNCTION, 232 TokenType.INDEX, 233 TokenType.PROCEDURE, 234 *DB_CREATABLES, 235 } 236 237 # Tokens that can represent identifiers 238 ID_VAR_TOKENS = { 239 TokenType.VAR, 240 TokenType.ANTI, 241 TokenType.APPLY, 242 TokenType.ASC, 243 TokenType.AUTO_INCREMENT, 244 TokenType.BEGIN, 245 TokenType.CACHE, 246 TokenType.CASE, 247 TokenType.COLLATE, 248 TokenType.COMMAND, 249 TokenType.COMMENT, 250 TokenType.COMMIT, 251 TokenType.CONSTRAINT, 252 TokenType.DEFAULT, 253 TokenType.DELETE, 254 TokenType.DESC, 255 TokenType.DESCRIBE, 256 TokenType.DICTIONARY, 257 TokenType.DIV, 258 TokenType.END, 259 TokenType.EXECUTE, 260 TokenType.ESCAPE, 261 TokenType.FALSE, 262 TokenType.FIRST, 263 TokenType.FILTER, 264 TokenType.FORMAT, 265 TokenType.FULL, 266 TokenType.IS, 267 TokenType.ISNULL, 268 TokenType.INTERVAL, 269 TokenType.KEEP, 270 TokenType.LEFT, 271 TokenType.LOAD, 272 TokenType.MERGE, 273 TokenType.NATURAL, 274 TokenType.NEXT, 275 TokenType.OFFSET, 276 TokenType.ORDINALITY, 277 TokenType.OVERWRITE, 278 TokenType.PARTITION, 279 TokenType.PERCENT, 280 TokenType.PIVOT, 281 TokenType.PRAGMA, 282 TokenType.RANGE, 283 TokenType.REFERENCES, 284 TokenType.RIGHT, 285 TokenType.ROW, 286 TokenType.ROWS, 287 TokenType.SEMI, 288 TokenType.SET, 289 TokenType.SETTINGS, 290 TokenType.SHOW, 291 TokenType.TEMPORARY, 292 TokenType.TOP, 293 TokenType.TRUE, 294 TokenType.UNIQUE, 295 TokenType.UNPIVOT, 296 TokenType.UPDATE, 297 TokenType.VOLATILE, 298 TokenType.WINDOW, 299 *CREATABLES, 300 *SUBQUERY_PREDICATES, 301 *TYPE_TOKENS, 302 *NO_PAREN_FUNCTIONS, 303 } 304 305 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 306 307 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 308 TokenType.APPLY, 309 TokenType.ASOF, 310 TokenType.FULL, 311 TokenType.LEFT, 312 TokenType.LOCK, 313 TokenType.NATURAL, 314 TokenType.OFFSET, 315 TokenType.RIGHT, 316 TokenType.WINDOW, 317 } 318 319 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 320 321 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 322 323 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 324 325 FUNC_TOKENS = { 326 TokenType.COMMAND, 327 TokenType.CURRENT_DATE, 328 TokenType.CURRENT_DATETIME, 329 TokenType.CURRENT_TIMESTAMP, 330 TokenType.CURRENT_TIME, 331 TokenType.CURRENT_USER, 332 TokenType.FILTER, 333 TokenType.FIRST, 334 TokenType.FORMAT, 335 TokenType.GLOB, 336 TokenType.IDENTIFIER, 337 TokenType.INDEX, 338 TokenType.ISNULL, 339 TokenType.ILIKE, 340 TokenType.INSERT, 341 TokenType.LIKE, 342 TokenType.MERGE, 343 TokenType.OFFSET, 344 TokenType.PRIMARY_KEY, 345 TokenType.RANGE, 346 TokenType.REPLACE, 347 TokenType.RLIKE, 348 TokenType.ROW, 349 TokenType.UNNEST, 350 TokenType.VAR, 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.DATE, 354 TokenType.DATETIME, 355 TokenType.TABLE, 356 TokenType.TIMESTAMP, 357 TokenType.TIMESTAMPTZ, 358 TokenType.WINDOW, 359 TokenType.XOR, 360 *TYPE_TOKENS, 361 *SUBQUERY_PREDICATES, 362 } 363 364 CONJUNCTION = { 365 TokenType.AND: exp.And, 366 TokenType.OR: exp.Or, 367 } 368 369 EQUALITY = { 370 TokenType.EQ: exp.EQ, 371 TokenType.NEQ: exp.NEQ, 372 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 373 } 374 375 COMPARISON = { 376 TokenType.GT: exp.GT, 377 TokenType.GTE: exp.GTE, 378 TokenType.LT: exp.LT, 379 TokenType.LTE: exp.LTE, 380 } 381 382 BITWISE = { 383 TokenType.AMP: exp.BitwiseAnd, 384 TokenType.CARET: exp.BitwiseXor, 385 TokenType.PIPE: exp.BitwiseOr, 386 TokenType.DPIPE: exp.DPipe, 387 } 388 389 TERM = { 390 TokenType.DASH: exp.Sub, 391 TokenType.PLUS: exp.Add, 392 TokenType.MOD: exp.Mod, 393 TokenType.COLLATE: exp.Collate, 394 } 395 396 FACTOR = { 397 TokenType.DIV: exp.IntDiv, 398 TokenType.LR_ARROW: exp.Distance, 399 TokenType.SLASH: exp.Div, 400 TokenType.STAR: exp.Mul, 401 } 402 403 TIMES = { 404 TokenType.TIME, 405 TokenType.TIMETZ, 406 } 407 408 TIMESTAMPS = { 409 TokenType.TIMESTAMP, 410 TokenType.TIMESTAMPTZ, 411 TokenType.TIMESTAMPLTZ, 412 *TIMES, 413 } 414 415 SET_OPERATIONS = { 416 TokenType.UNION, 417 TokenType.INTERSECT, 418 TokenType.EXCEPT, 419 } 420 421 JOIN_METHODS = { 422 TokenType.NATURAL, 423 TokenType.ASOF, 424 } 425 426 JOIN_SIDES = { 427 TokenType.LEFT, 428 TokenType.RIGHT, 429 TokenType.FULL, 430 } 431 432 JOIN_KINDS = { 433 TokenType.INNER, 434 TokenType.OUTER, 435 TokenType.CROSS, 436 TokenType.SEMI, 437 TokenType.ANTI, 438 } 439 440 JOIN_HINTS: t.Set[str] = set() 441 442 LAMBDAS = { 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_conjunction(), 447 {node.name for node in expressions}, 448 ), 449 expressions=expressions, 450 ), 451 TokenType.FARROW: lambda self, expressions: self.expression( 452 exp.Kwarg, 453 this=exp.var(expressions[0].name), 454 expression=self._parse_conjunction(), 455 ), 456 } 457 458 COLUMN_OPERATORS = { 459 TokenType.DOT: None, 460 TokenType.DCOLON: lambda self, this, to: self.expression( 461 exp.Cast if self.STRICT_CAST else exp.TryCast, 462 this=this, 463 to=to, 464 ), 465 TokenType.ARROW: lambda self, this, path: self.expression( 466 exp.JSONExtract, 467 this=this, 468 expression=path, 469 ), 470 TokenType.DARROW: lambda self, this, path: self.expression( 471 exp.JSONExtractScalar, 472 this=this, 473 expression=path, 474 ), 475 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 481 exp.JSONBExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 486 exp.JSONBContains, 487 this=this, 488 expression=key, 489 ), 490 } 491 492 EXPRESSION_PARSERS = { 493 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 494 exp.Column: lambda self: self._parse_column(), 495 exp.Condition: lambda self: self._parse_conjunction(), 496 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 497 exp.Expression: lambda self: self._parse_statement(), 498 exp.From: lambda self: self._parse_from(), 499 exp.Group: lambda self: self._parse_group(), 500 exp.Having: lambda self: self._parse_having(), 501 exp.Identifier: lambda self: self._parse_id_var(), 502 exp.Join: lambda self: self._parse_join(), 503 exp.Lambda: lambda self: self._parse_lambda(), 504 exp.Lateral: lambda self: self._parse_lateral(), 505 exp.Limit: lambda self: self._parse_limit(), 506 exp.Offset: lambda self: self._parse_offset(), 507 exp.Order: lambda self: self._parse_order(), 508 exp.Ordered: lambda self: self._parse_ordered(), 509 exp.Properties: lambda self: self._parse_properties(), 510 exp.Qualify: lambda self: self._parse_qualify(), 511 exp.Returning: lambda self: self._parse_returning(), 512 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 513 exp.Table: lambda self: self._parse_table_parts(), 514 exp.TableAlias: lambda self: self._parse_table_alias(), 515 exp.Where: lambda self: self._parse_where(), 516 exp.Window: lambda self: self._parse_named_window(), 517 exp.With: lambda self: self._parse_with(), 518 "JOIN_TYPE": lambda self: self._parse_join_parts(), 519 } 520 521 STATEMENT_PARSERS = { 522 TokenType.ALTER: lambda self: self._parse_alter(), 523 TokenType.BEGIN: lambda self: self._parse_transaction(), 524 TokenType.CACHE: lambda self: self._parse_cache(), 525 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 526 TokenType.COMMENT: lambda self: self._parse_comment(), 527 TokenType.CREATE: lambda self: self._parse_create(), 528 TokenType.DELETE: lambda self: self._parse_delete(), 529 TokenType.DESC: lambda self: self._parse_describe(), 530 TokenType.DESCRIBE: lambda self: self._parse_describe(), 531 TokenType.DROP: lambda self: self._parse_drop(), 532 TokenType.FROM: lambda self: exp.select("*").from_( 533 t.cast(exp.From, self._parse_from(skip_from_token=True)) 534 ), 535 TokenType.INSERT: lambda self: self._parse_insert(), 536 TokenType.LOAD: lambda self: self._parse_load(), 537 TokenType.MERGE: lambda self: self._parse_merge(), 538 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 539 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 540 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 541 TokenType.SET: lambda self: self._parse_set(), 542 TokenType.UNCACHE: lambda self: self._parse_uncache(), 543 TokenType.UPDATE: lambda self: self._parse_update(), 544 TokenType.USE: lambda self: self.expression( 545 exp.Use, 546 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 547 and exp.var(self._prev.text), 548 this=self._parse_table(schema=False), 549 ), 550 } 551 552 UNARY_PARSERS = { 553 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 554 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 555 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 556 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 557 } 558 559 PRIMARY_PARSERS = { 560 TokenType.STRING: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=True 562 ), 563 TokenType.NUMBER: lambda self, token: self.expression( 564 exp.Literal, this=token.text, is_string=False 565 ), 566 TokenType.STAR: lambda self, _: self.expression( 567 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 568 ), 569 TokenType.NULL: lambda self, _: self.expression(exp.Null), 570 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 571 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 572 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 573 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 574 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 575 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 576 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 577 exp.National, this=token.text 578 ), 579 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 580 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 581 } 582 583 PLACEHOLDER_PARSERS = { 584 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 585 TokenType.PARAMETER: lambda self: self._parse_parameter(), 586 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 587 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 588 else None, 589 } 590 591 RANGE_PARSERS = { 592 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 593 TokenType.GLOB: binary_range_parser(exp.Glob), 594 TokenType.ILIKE: binary_range_parser(exp.ILike), 595 TokenType.IN: lambda self, this: self._parse_in(this), 596 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 597 TokenType.IS: lambda self, this: self._parse_is(this), 598 TokenType.LIKE: binary_range_parser(exp.Like), 599 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 600 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 601 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 602 } 603 604 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 605 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 606 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 607 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 608 "CHARACTER SET": lambda self: self._parse_character_set(), 609 "CHECKSUM": lambda self: self._parse_checksum(), 610 "CLUSTER BY": lambda self: self._parse_cluster(), 611 "CLUSTERED": lambda self: self._parse_clustered_by(), 612 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 613 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 614 "COPY": lambda self: self._parse_copy_property(), 615 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 616 "DEFINER": lambda self: self._parse_definer(), 617 "DETERMINISTIC": lambda self: self.expression( 618 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 619 ), 620 "DISTKEY": lambda self: self._parse_distkey(), 621 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 622 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 623 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 624 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 625 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 626 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 627 "FREESPACE": lambda self: self._parse_freespace(), 628 "HEAP": lambda self: self.expression(exp.HeapProperty), 629 "IMMUTABLE": lambda self: self.expression( 630 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 631 ), 632 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 633 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 634 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 635 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 636 "LIKE": lambda self: self._parse_create_like(), 637 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 638 "LOCK": lambda self: self._parse_locking(), 639 "LOCKING": lambda self: self._parse_locking(), 640 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 641 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 642 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 643 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 644 "NO": lambda self: self._parse_no_property(), 645 "ON": lambda self: self._parse_on_property(), 646 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 647 "PARTITION BY": lambda self: self._parse_partitioned_by(), 648 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 649 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 650 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 651 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 652 "RETURNS": lambda self: self._parse_returns(), 653 "ROW": lambda self: self._parse_row(), 654 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 655 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 656 "SETTINGS": lambda self: self.expression( 657 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 658 ), 659 "SORTKEY": lambda self: self._parse_sortkey(), 660 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 661 "STABLE": lambda self: self.expression( 662 exp.StabilityProperty, this=exp.Literal.string("STABLE") 663 ), 664 "STORED": lambda self: self._parse_stored(), 665 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 666 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 667 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 668 "TO": lambda self: self._parse_to_table(), 669 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 670 "TTL": lambda self: self._parse_ttl(), 671 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 672 "VOLATILE": lambda self: self._parse_volatile_property(), 673 "WITH": lambda self: self._parse_with_property(), 674 } 675 676 CONSTRAINT_PARSERS = { 677 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 678 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 679 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 680 "CHARACTER SET": lambda self: self.expression( 681 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 682 ), 683 "CHECK": lambda self: self.expression( 684 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 685 ), 686 "COLLATE": lambda self: self.expression( 687 exp.CollateColumnConstraint, this=self._parse_var() 688 ), 689 "COMMENT": lambda self: self.expression( 690 exp.CommentColumnConstraint, this=self._parse_string() 691 ), 692 "COMPRESS": lambda self: self._parse_compress(), 693 "CLUSTERED": lambda self: self.expression( 694 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 695 ), 696 "NONCLUSTERED": lambda self: self.expression( 697 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 698 ), 699 "DEFAULT": lambda self: self.expression( 700 exp.DefaultColumnConstraint, this=self._parse_bitwise() 701 ), 702 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 703 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 704 "FORMAT": lambda self: self.expression( 705 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 706 ), 707 "GENERATED": lambda self: self._parse_generated_as_identity(), 708 "IDENTITY": lambda self: self._parse_auto_increment(), 709 "INLINE": lambda self: self._parse_inline(), 710 "LIKE": lambda self: self._parse_create_like(), 711 "NOT": lambda self: self._parse_not_constraint(), 712 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 713 "ON": lambda self: ( 714 self._match(TokenType.UPDATE) 715 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 716 ) 717 or self.expression(exp.OnProperty, this=self._parse_id_var()), 718 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 719 "PRIMARY KEY": lambda self: self._parse_primary_key(), 720 "REFERENCES": lambda self: self._parse_references(match=False), 721 "TITLE": lambda self: self.expression( 722 exp.TitleColumnConstraint, this=self._parse_var_or_string() 723 ), 724 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 725 "UNIQUE": lambda self: self._parse_unique(), 726 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 727 "WITH": lambda self: self.expression( 728 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 729 ), 730 } 731 732 ALTER_PARSERS = { 733 "ADD": lambda self: self._parse_alter_table_add(), 734 "ALTER": lambda self: self._parse_alter_table_alter(), 735 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 736 "DROP": lambda self: self._parse_alter_table_drop(), 737 "RENAME": lambda self: self._parse_alter_table_rename(), 738 } 739 740 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 741 742 NO_PAREN_FUNCTION_PARSERS = { 743 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 744 "CASE": lambda self: self._parse_case(), 745 "IF": lambda self: self._parse_if(), 746 "NEXT": lambda self: self._parse_next_value_for(), 747 } 748 749 INVALID_FUNC_NAME_TOKENS = { 750 TokenType.IDENTIFIER, 751 TokenType.STRING, 752 } 753 754 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 755 756 FUNCTION_PARSERS = { 757 "ANY_VALUE": lambda self: self._parse_any_value(), 758 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 759 "CONCAT": lambda self: self._parse_concat(), 760 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 761 "DECODE": lambda self: self._parse_decode(), 762 "EXTRACT": lambda self: self._parse_extract(), 763 "JSON_OBJECT": lambda self: self._parse_json_object(), 764 "LOG": lambda self: self._parse_logarithm(), 765 "MATCH": lambda self: self._parse_match_against(), 766 "OPENJSON": lambda self: self._parse_open_json(), 767 "POSITION": lambda self: self._parse_position(), 768 "SAFE_CAST": lambda self: self._parse_cast(False), 769 "STRING_AGG": lambda self: self._parse_string_agg(), 770 "SUBSTRING": lambda self: self._parse_substring(), 771 "TRIM": lambda self: self._parse_trim(), 772 "TRY_CAST": lambda self: self._parse_cast(False), 773 "TRY_CONVERT": lambda self: self._parse_convert(False), 774 } 775 776 QUERY_MODIFIER_PARSERS = { 777 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 778 TokenType.WHERE: lambda self: ("where", self._parse_where()), 779 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 780 TokenType.HAVING: lambda self: ("having", self._parse_having()), 781 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 782 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 783 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 784 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 785 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 786 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 787 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 788 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 789 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 790 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 791 TokenType.CLUSTER_BY: lambda self: ( 792 "cluster", 793 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 ), 795 TokenType.DISTRIBUTE_BY: lambda self: ( 796 "distribute", 797 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 798 ), 799 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 800 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 801 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 802 } 803 804 SET_PARSERS = { 805 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 806 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 807 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 808 "TRANSACTION": lambda self: self._parse_set_transaction(), 809 } 810 811 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 812 813 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 814 815 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 816 817 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 818 819 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 820 821 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 822 TRANSACTION_CHARACTERISTICS = { 823 "ISOLATION LEVEL REPEATABLE READ", 824 "ISOLATION LEVEL READ COMMITTED", 825 "ISOLATION LEVEL READ UNCOMMITTED", 826 "ISOLATION LEVEL SERIALIZABLE", 827 "READ WRITE", 828 "READ ONLY", 829 } 830 831 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 832 833 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 834 835 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 836 837 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 838 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 839 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 840 841 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 842 843 DISTINCT_TOKENS = {TokenType.DISTINCT} 844 845 STRICT_CAST = True 846 847 # A NULL arg in CONCAT yields NULL by default 848 CONCAT_NULL_OUTPUTS_STRING = False 849 850 PREFIXED_PIVOT_COLUMNS = False 851 IDENTIFY_PIVOT_STRINGS = False 852 853 LOG_BASE_FIRST = True 854 LOG_DEFAULTS_TO_LN = False 855 856 SUPPORTS_USER_DEFINED_TYPES = True 857 858 __slots__ = ( 859 "error_level", 860 "error_message_context", 861 "max_errors", 862 "sql", 863 "errors", 864 "_tokens", 865 "_index", 866 "_curr", 867 "_next", 868 "_prev", 869 "_prev_comments", 870 "_tokenizer", 871 ) 872 873 # Autofilled 874 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 875 INDEX_OFFSET: int = 0 876 UNNEST_COLUMN_ONLY: bool = False 877 ALIAS_POST_TABLESAMPLE: bool = False 878 STRICT_STRING_CONCAT = False 879 NORMALIZE_FUNCTIONS = "upper" 880 NULL_ORDERING: str = "nulls_are_small" 881 SHOW_TRIE: t.Dict = {} 882 SET_TRIE: t.Dict = {} 883 FORMAT_MAPPING: t.Dict[str, str] = {} 884 FORMAT_TRIE: t.Dict = {} 885 TIME_MAPPING: t.Dict[str, str] = {} 886 TIME_TRIE: t.Dict = {} 887 888 def __init__( 889 self, 890 error_level: t.Optional[ErrorLevel] = None, 891 error_message_context: int = 100, 892 max_errors: int = 3, 893 ): 894 self.error_level = error_level or ErrorLevel.IMMEDIATE 895 self.error_message_context = error_message_context 896 self.max_errors = max_errors 897 self._tokenizer = self.TOKENIZER_CLASS() 898 self.reset() 899 900 def reset(self): 901 self.sql = "" 902 self.errors = [] 903 self._tokens = [] 904 self._index = 0 905 self._curr = None 906 self._next = None 907 self._prev = None 908 self._prev_comments = None 909 910 def parse( 911 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 912 ) -> t.List[t.Optional[exp.Expression]]: 913 """ 914 Parses a list of tokens and returns a list of syntax trees, one tree 915 per parsed SQL statement. 916 917 Args: 918 raw_tokens: The list of tokens. 919 sql: The original SQL string, used to produce helpful debug messages. 920 921 Returns: 922 The list of the produced syntax trees. 923 """ 924 return self._parse( 925 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 926 ) 927 928 def parse_into( 929 self, 930 expression_types: exp.IntoType, 931 raw_tokens: t.List[Token], 932 sql: t.Optional[str] = None, 933 ) -> t.List[t.Optional[exp.Expression]]: 934 """ 935 Parses a list of tokens into a given Expression type. If a collection of Expression 936 types is given instead, this method will try to parse the token list into each one 937 of them, stopping at the first for which the parsing succeeds. 938 939 Args: 940 expression_types: The expression type(s) to try and parse the token list into. 941 raw_tokens: The list of tokens. 942 sql: The original SQL string, used to produce helpful debug messages. 943 944 Returns: 945 The target Expression. 946 """ 947 errors = [] 948 for expression_type in ensure_list(expression_types): 949 parser = self.EXPRESSION_PARSERS.get(expression_type) 950 if not parser: 951 raise TypeError(f"No parser registered for {expression_type}") 952 953 try: 954 return self._parse(parser, raw_tokens, sql) 955 except ParseError as e: 956 e.errors[0]["into_expression"] = expression_type 957 errors.append(e) 958 959 raise ParseError( 960 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 961 errors=merge_errors(errors), 962 ) from errors[-1] 963 964 def _parse( 965 self, 966 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 967 raw_tokens: t.List[Token], 968 sql: t.Optional[str] = None, 969 ) -> t.List[t.Optional[exp.Expression]]: 970 self.reset() 971 self.sql = sql or "" 972 973 total = len(raw_tokens) 974 chunks: t.List[t.List[Token]] = [[]] 975 976 for i, token in enumerate(raw_tokens): 977 if token.token_type == TokenType.SEMICOLON: 978 if i < total - 1: 979 chunks.append([]) 980 else: 981 chunks[-1].append(token) 982 983 expressions = [] 984 985 for tokens in chunks: 986 self._index = -1 987 self._tokens = tokens 988 self._advance() 989 990 expressions.append(parse_method(self)) 991 992 if self._index < len(self._tokens): 993 self.raise_error("Invalid expression / Unexpected token") 994 995 self.check_errors() 996 997 return expressions 998 999 def check_errors(self) -> None: 1000 """Logs or raises any found errors, depending on the chosen error level setting.""" 1001 if self.error_level == ErrorLevel.WARN: 1002 for error in self.errors: 1003 logger.error(str(error)) 1004 elif self.error_level == ErrorLevel.RAISE and self.errors: 1005 raise ParseError( 1006 concat_messages(self.errors, self.max_errors), 1007 errors=merge_errors(self.errors), 1008 ) 1009 1010 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1011 """ 1012 Appends an error in the list of recorded errors or raises it, depending on the chosen 1013 error level setting. 1014 """ 1015 token = token or self._curr or self._prev or Token.string("") 1016 start = token.start 1017 end = token.end + 1 1018 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1019 highlight = self.sql[start:end] 1020 end_context = self.sql[end : end + self.error_message_context] 1021 1022 error = ParseError.new( 1023 f"{message}. Line {token.line}, Col: {token.col}.\n" 1024 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1025 description=message, 1026 line=token.line, 1027 col=token.col, 1028 start_context=start_context, 1029 highlight=highlight, 1030 end_context=end_context, 1031 ) 1032 1033 if self.error_level == ErrorLevel.IMMEDIATE: 1034 raise error 1035 1036 self.errors.append(error) 1037 1038 def expression( 1039 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1040 ) -> E: 1041 """ 1042 Creates a new, validated Expression. 1043 1044 Args: 1045 exp_class: The expression class to instantiate. 1046 comments: An optional list of comments to attach to the expression. 1047 kwargs: The arguments to set for the expression along with their respective values. 1048 1049 Returns: 1050 The target expression. 1051 """ 1052 instance = exp_class(**kwargs) 1053 instance.add_comments(comments) if comments else self._add_comments(instance) 1054 return self.validate_expression(instance) 1055 1056 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1057 if expression and self._prev_comments: 1058 expression.add_comments(self._prev_comments) 1059 self._prev_comments = None 1060 1061 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1062 """ 1063 Validates an Expression, making sure that all its mandatory arguments are set. 1064 1065 Args: 1066 expression: The expression to validate. 1067 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1068 1069 Returns: 1070 The validated expression. 1071 """ 1072 if self.error_level != ErrorLevel.IGNORE: 1073 for error_message in expression.error_messages(args): 1074 self.raise_error(error_message) 1075 1076 return expression 1077 1078 def _find_sql(self, start: Token, end: Token) -> str: 1079 return self.sql[start.start : end.end + 1] 1080 1081 def _advance(self, times: int = 1) -> None: 1082 self._index += times 1083 self._curr = seq_get(self._tokens, self._index) 1084 self._next = seq_get(self._tokens, self._index + 1) 1085 1086 if self._index > 0: 1087 self._prev = self._tokens[self._index - 1] 1088 self._prev_comments = self._prev.comments 1089 else: 1090 self._prev = None 1091 self._prev_comments = None 1092 1093 def _retreat(self, index: int) -> None: 1094 if index != self._index: 1095 self._advance(index - self._index) 1096 1097 def _parse_command(self) -> exp.Command: 1098 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1099 1100 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1101 start = self._prev 1102 exists = self._parse_exists() if allow_exists else None 1103 1104 self._match(TokenType.ON) 1105 1106 kind = self._match_set(self.CREATABLES) and self._prev 1107 if not kind: 1108 return self._parse_as_command(start) 1109 1110 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1111 this = self._parse_user_defined_function(kind=kind.token_type) 1112 elif kind.token_type == TokenType.TABLE: 1113 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1114 elif kind.token_type == TokenType.COLUMN: 1115 this = self._parse_column() 1116 else: 1117 this = self._parse_id_var() 1118 1119 self._match(TokenType.IS) 1120 1121 return self.expression( 1122 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1123 ) 1124 1125 def _parse_to_table( 1126 self, 1127 ) -> exp.ToTableProperty: 1128 table = self._parse_table_parts(schema=True) 1129 return self.expression(exp.ToTableProperty, this=table) 1130 1131 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1132 def _parse_ttl(self) -> exp.Expression: 1133 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1134 this = self._parse_bitwise() 1135 1136 if self._match_text_seq("DELETE"): 1137 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1138 if self._match_text_seq("RECOMPRESS"): 1139 return self.expression( 1140 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1141 ) 1142 if self._match_text_seq("TO", "DISK"): 1143 return self.expression( 1144 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1145 ) 1146 if self._match_text_seq("TO", "VOLUME"): 1147 return self.expression( 1148 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1149 ) 1150 1151 return this 1152 1153 expressions = self._parse_csv(_parse_ttl_action) 1154 where = self._parse_where() 1155 group = self._parse_group() 1156 1157 aggregates = None 1158 if group and self._match(TokenType.SET): 1159 aggregates = self._parse_csv(self._parse_set_item) 1160 1161 return self.expression( 1162 exp.MergeTreeTTL, 1163 expressions=expressions, 1164 where=where, 1165 group=group, 1166 aggregates=aggregates, 1167 ) 1168 1169 def _parse_statement(self) -> t.Optional[exp.Expression]: 1170 if self._curr is None: 1171 return None 1172 1173 if self._match_set(self.STATEMENT_PARSERS): 1174 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1175 1176 if self._match_set(Tokenizer.COMMANDS): 1177 return self._parse_command() 1178 1179 expression = self._parse_expression() 1180 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1181 return self._parse_query_modifiers(expression) 1182 1183 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1184 start = self._prev 1185 temporary = self._match(TokenType.TEMPORARY) 1186 materialized = self._match_text_seq("MATERIALIZED") 1187 1188 kind = self._match_set(self.CREATABLES) and self._prev.text 1189 if not kind: 1190 return self._parse_as_command(start) 1191 1192 return self.expression( 1193 exp.Drop, 1194 comments=start.comments, 1195 exists=exists or self._parse_exists(), 1196 this=self._parse_table(schema=True), 1197 kind=kind, 1198 temporary=temporary, 1199 materialized=materialized, 1200 cascade=self._match_text_seq("CASCADE"), 1201 constraints=self._match_text_seq("CONSTRAINTS"), 1202 purge=self._match_text_seq("PURGE"), 1203 ) 1204 1205 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1206 return ( 1207 self._match_text_seq("IF") 1208 and (not not_ or self._match(TokenType.NOT)) 1209 and self._match(TokenType.EXISTS) 1210 ) 1211 1212 def _parse_create(self) -> exp.Create | exp.Command: 1213 # Note: this can't be None because we've matched a statement parser 1214 start = self._prev 1215 comments = self._prev_comments 1216 1217 replace = start.text.upper() == "REPLACE" or self._match_pair( 1218 TokenType.OR, TokenType.REPLACE 1219 ) 1220 unique = self._match(TokenType.UNIQUE) 1221 1222 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1223 self._advance() 1224 1225 properties = None 1226 create_token = self._match_set(self.CREATABLES) and self._prev 1227 1228 if not create_token: 1229 # exp.Properties.Location.POST_CREATE 1230 properties = self._parse_properties() 1231 create_token = self._match_set(self.CREATABLES) and self._prev 1232 1233 if not properties or not create_token: 1234 return self._parse_as_command(start) 1235 1236 exists = self._parse_exists(not_=True) 1237 this = None 1238 expression: t.Optional[exp.Expression] = None 1239 indexes = None 1240 no_schema_binding = None 1241 begin = None 1242 clone = None 1243 1244 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1245 nonlocal properties 1246 if properties and temp_props: 1247 properties.expressions.extend(temp_props.expressions) 1248 elif temp_props: 1249 properties = temp_props 1250 1251 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1252 this = self._parse_user_defined_function(kind=create_token.token_type) 1253 1254 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1255 extend_props(self._parse_properties()) 1256 1257 self._match(TokenType.ALIAS) 1258 1259 if self._match(TokenType.COMMAND): 1260 expression = self._parse_as_command(self._prev) 1261 else: 1262 begin = self._match(TokenType.BEGIN) 1263 return_ = self._match_text_seq("RETURN") 1264 expression = self._parse_statement() 1265 1266 if return_: 1267 expression = self.expression(exp.Return, this=expression) 1268 elif create_token.token_type == TokenType.INDEX: 1269 this = self._parse_index(index=self._parse_id_var()) 1270 elif create_token.token_type in self.DB_CREATABLES: 1271 table_parts = self._parse_table_parts(schema=True) 1272 1273 # exp.Properties.Location.POST_NAME 1274 self._match(TokenType.COMMA) 1275 extend_props(self._parse_properties(before=True)) 1276 1277 this = self._parse_schema(this=table_parts) 1278 1279 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1280 extend_props(self._parse_properties()) 1281 1282 self._match(TokenType.ALIAS) 1283 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1284 # exp.Properties.Location.POST_ALIAS 1285 extend_props(self._parse_properties()) 1286 1287 expression = self._parse_ddl_select() 1288 1289 if create_token.token_type == TokenType.TABLE: 1290 # exp.Properties.Location.POST_EXPRESSION 1291 extend_props(self._parse_properties()) 1292 1293 indexes = [] 1294 while True: 1295 index = self._parse_index() 1296 1297 # exp.Properties.Location.POST_INDEX 1298 extend_props(self._parse_properties()) 1299 1300 if not index: 1301 break 1302 else: 1303 self._match(TokenType.COMMA) 1304 indexes.append(index) 1305 elif create_token.token_type == TokenType.VIEW: 1306 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1307 no_schema_binding = True 1308 1309 if self._match_text_seq("CLONE"): 1310 clone = self._parse_table(schema=True) 1311 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1312 clone_kind = ( 1313 self._match(TokenType.L_PAREN) 1314 and self._match_texts(self.CLONE_KINDS) 1315 and self._prev.text.upper() 1316 ) 1317 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1318 self._match(TokenType.R_PAREN) 1319 clone = self.expression( 1320 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1321 ) 1322 1323 return self.expression( 1324 exp.Create, 1325 comments=comments, 1326 this=this, 1327 kind=create_token.text, 1328 replace=replace, 1329 unique=unique, 1330 expression=expression, 1331 exists=exists, 1332 properties=properties, 1333 indexes=indexes, 1334 no_schema_binding=no_schema_binding, 1335 begin=begin, 1336 clone=clone, 1337 ) 1338 1339 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1340 # only used for teradata currently 1341 self._match(TokenType.COMMA) 1342 1343 kwargs = { 1344 "no": self._match_text_seq("NO"), 1345 "dual": self._match_text_seq("DUAL"), 1346 "before": self._match_text_seq("BEFORE"), 1347 "default": self._match_text_seq("DEFAULT"), 1348 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1349 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1350 "after": self._match_text_seq("AFTER"), 1351 "minimum": self._match_texts(("MIN", "MINIMUM")), 1352 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1353 } 1354 1355 if self._match_texts(self.PROPERTY_PARSERS): 1356 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1357 try: 1358 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1359 except TypeError: 1360 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1361 1362 return None 1363 1364 def _parse_property(self) -> t.Optional[exp.Expression]: 1365 if self._match_texts(self.PROPERTY_PARSERS): 1366 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1367 1368 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1369 return self._parse_character_set(default=True) 1370 1371 if self._match_text_seq("COMPOUND", "SORTKEY"): 1372 return self._parse_sortkey(compound=True) 1373 1374 if self._match_text_seq("SQL", "SECURITY"): 1375 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1376 1377 assignment = self._match_pair( 1378 TokenType.VAR, TokenType.EQ, advance=False 1379 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1380 1381 if assignment: 1382 key = self._parse_var_or_string() 1383 self._match(TokenType.EQ) 1384 return self.expression( 1385 exp.Property, 1386 this=key, 1387 value=self._parse_column() or self._parse_var(any_token=True), 1388 ) 1389 1390 return None 1391 1392 def _parse_stored(self) -> exp.FileFormatProperty: 1393 self._match(TokenType.ALIAS) 1394 1395 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1396 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1397 1398 return self.expression( 1399 exp.FileFormatProperty, 1400 this=self.expression( 1401 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1402 ) 1403 if input_format or output_format 1404 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1405 ) 1406 1407 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1408 self._match(TokenType.EQ) 1409 self._match(TokenType.ALIAS) 1410 return self.expression(exp_class, this=self._parse_field()) 1411 1412 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1413 properties = [] 1414 while True: 1415 if before: 1416 prop = self._parse_property_before() 1417 else: 1418 prop = self._parse_property() 1419 1420 if not prop: 1421 break 1422 for p in ensure_list(prop): 1423 properties.append(p) 1424 1425 if properties: 1426 return self.expression(exp.Properties, expressions=properties) 1427 1428 return None 1429 1430 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1431 return self.expression( 1432 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1433 ) 1434 1435 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1436 if self._index >= 2: 1437 pre_volatile_token = self._tokens[self._index - 2] 1438 else: 1439 pre_volatile_token = None 1440 1441 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1442 return exp.VolatileProperty() 1443 1444 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1445 1446 def _parse_with_property( 1447 self, 1448 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1449 if self._match(TokenType.L_PAREN, advance=False): 1450 return self._parse_wrapped_csv(self._parse_property) 1451 1452 if self._match_text_seq("JOURNAL"): 1453 return self._parse_withjournaltable() 1454 1455 if self._match_text_seq("DATA"): 1456 return self._parse_withdata(no=False) 1457 elif self._match_text_seq("NO", "DATA"): 1458 return self._parse_withdata(no=True) 1459 1460 if not self._next: 1461 return None 1462 1463 return self._parse_withisolatedloading() 1464 1465 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1466 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1467 self._match(TokenType.EQ) 1468 1469 user = self._parse_id_var() 1470 self._match(TokenType.PARAMETER) 1471 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1472 1473 if not user or not host: 1474 return None 1475 1476 return exp.DefinerProperty(this=f"{user}@{host}") 1477 1478 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1479 self._match(TokenType.TABLE) 1480 self._match(TokenType.EQ) 1481 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1482 1483 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1484 return self.expression(exp.LogProperty, no=no) 1485 1486 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1487 return self.expression(exp.JournalProperty, **kwargs) 1488 1489 def _parse_checksum(self) -> exp.ChecksumProperty: 1490 self._match(TokenType.EQ) 1491 1492 on = None 1493 if self._match(TokenType.ON): 1494 on = True 1495 elif self._match_text_seq("OFF"): 1496 on = False 1497 1498 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1499 1500 def _parse_cluster(self) -> exp.Cluster: 1501 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1502 1503 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1504 self._match_text_seq("BY") 1505 1506 self._match_l_paren() 1507 expressions = self._parse_csv(self._parse_column) 1508 self._match_r_paren() 1509 1510 if self._match_text_seq("SORTED", "BY"): 1511 self._match_l_paren() 1512 sorted_by = self._parse_csv(self._parse_ordered) 1513 self._match_r_paren() 1514 else: 1515 sorted_by = None 1516 1517 self._match(TokenType.INTO) 1518 buckets = self._parse_number() 1519 self._match_text_seq("BUCKETS") 1520 1521 return self.expression( 1522 exp.ClusteredByProperty, 1523 expressions=expressions, 1524 sorted_by=sorted_by, 1525 buckets=buckets, 1526 ) 1527 1528 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1529 if not self._match_text_seq("GRANTS"): 1530 self._retreat(self._index - 1) 1531 return None 1532 1533 return self.expression(exp.CopyGrantsProperty) 1534 1535 def _parse_freespace(self) -> exp.FreespaceProperty: 1536 self._match(TokenType.EQ) 1537 return self.expression( 1538 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1539 ) 1540 1541 def _parse_mergeblockratio( 1542 self, no: bool = False, default: bool = False 1543 ) -> exp.MergeBlockRatioProperty: 1544 if self._match(TokenType.EQ): 1545 return self.expression( 1546 exp.MergeBlockRatioProperty, 1547 this=self._parse_number(), 1548 percent=self._match(TokenType.PERCENT), 1549 ) 1550 1551 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1552 1553 def _parse_datablocksize( 1554 self, 1555 default: t.Optional[bool] = None, 1556 minimum: t.Optional[bool] = None, 1557 maximum: t.Optional[bool] = None, 1558 ) -> exp.DataBlocksizeProperty: 1559 self._match(TokenType.EQ) 1560 size = self._parse_number() 1561 1562 units = None 1563 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1564 units = self._prev.text 1565 1566 return self.expression( 1567 exp.DataBlocksizeProperty, 1568 size=size, 1569 units=units, 1570 default=default, 1571 minimum=minimum, 1572 maximum=maximum, 1573 ) 1574 1575 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1576 self._match(TokenType.EQ) 1577 always = self._match_text_seq("ALWAYS") 1578 manual = self._match_text_seq("MANUAL") 1579 never = self._match_text_seq("NEVER") 1580 default = self._match_text_seq("DEFAULT") 1581 1582 autotemp = None 1583 if self._match_text_seq("AUTOTEMP"): 1584 autotemp = self._parse_schema() 1585 1586 return self.expression( 1587 exp.BlockCompressionProperty, 1588 always=always, 1589 manual=manual, 1590 never=never, 1591 default=default, 1592 autotemp=autotemp, 1593 ) 1594 1595 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1596 no = self._match_text_seq("NO") 1597 concurrent = self._match_text_seq("CONCURRENT") 1598 self._match_text_seq("ISOLATED", "LOADING") 1599 for_all = self._match_text_seq("FOR", "ALL") 1600 for_insert = self._match_text_seq("FOR", "INSERT") 1601 for_none = self._match_text_seq("FOR", "NONE") 1602 return self.expression( 1603 exp.IsolatedLoadingProperty, 1604 no=no, 1605 concurrent=concurrent, 1606 for_all=for_all, 1607 for_insert=for_insert, 1608 for_none=for_none, 1609 ) 1610 1611 def _parse_locking(self) -> exp.LockingProperty: 1612 if self._match(TokenType.TABLE): 1613 kind = "TABLE" 1614 elif self._match(TokenType.VIEW): 1615 kind = "VIEW" 1616 elif self._match(TokenType.ROW): 1617 kind = "ROW" 1618 elif self._match_text_seq("DATABASE"): 1619 kind = "DATABASE" 1620 else: 1621 kind = None 1622 1623 if kind in ("DATABASE", "TABLE", "VIEW"): 1624 this = self._parse_table_parts() 1625 else: 1626 this = None 1627 1628 if self._match(TokenType.FOR): 1629 for_or_in = "FOR" 1630 elif self._match(TokenType.IN): 1631 for_or_in = "IN" 1632 else: 1633 for_or_in = None 1634 1635 if self._match_text_seq("ACCESS"): 1636 lock_type = "ACCESS" 1637 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1638 lock_type = "EXCLUSIVE" 1639 elif self._match_text_seq("SHARE"): 1640 lock_type = "SHARE" 1641 elif self._match_text_seq("READ"): 1642 lock_type = "READ" 1643 elif self._match_text_seq("WRITE"): 1644 lock_type = "WRITE" 1645 elif self._match_text_seq("CHECKSUM"): 1646 lock_type = "CHECKSUM" 1647 else: 1648 lock_type = None 1649 1650 override = self._match_text_seq("OVERRIDE") 1651 1652 return self.expression( 1653 exp.LockingProperty, 1654 this=this, 1655 kind=kind, 1656 for_or_in=for_or_in, 1657 lock_type=lock_type, 1658 override=override, 1659 ) 1660 1661 def _parse_partition_by(self) -> t.List[exp.Expression]: 1662 if self._match(TokenType.PARTITION_BY): 1663 return self._parse_csv(self._parse_conjunction) 1664 return [] 1665 1666 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1667 self._match(TokenType.EQ) 1668 return self.expression( 1669 exp.PartitionedByProperty, 1670 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1671 ) 1672 1673 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1674 if self._match_text_seq("AND", "STATISTICS"): 1675 statistics = True 1676 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1677 statistics = False 1678 else: 1679 statistics = None 1680 1681 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1682 1683 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1684 if self._match_text_seq("PRIMARY", "INDEX"): 1685 return exp.NoPrimaryIndexProperty() 1686 return None 1687 1688 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1689 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1690 return exp.OnCommitProperty() 1691 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1692 return exp.OnCommitProperty(delete=True) 1693 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1694 1695 def _parse_distkey(self) -> exp.DistKeyProperty: 1696 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1697 1698 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1699 table = self._parse_table(schema=True) 1700 1701 options = [] 1702 while self._match_texts(("INCLUDING", "EXCLUDING")): 1703 this = self._prev.text.upper() 1704 1705 id_var = self._parse_id_var() 1706 if not id_var: 1707 return None 1708 1709 options.append( 1710 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1711 ) 1712 1713 return self.expression(exp.LikeProperty, this=table, expressions=options) 1714 1715 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1716 return self.expression( 1717 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1718 ) 1719 1720 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1721 self._match(TokenType.EQ) 1722 return self.expression( 1723 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1724 ) 1725 1726 def _parse_returns(self) -> exp.ReturnsProperty: 1727 value: t.Optional[exp.Expression] 1728 is_table = self._match(TokenType.TABLE) 1729 1730 if is_table: 1731 if self._match(TokenType.LT): 1732 value = self.expression( 1733 exp.Schema, 1734 this="TABLE", 1735 expressions=self._parse_csv(self._parse_struct_types), 1736 ) 1737 if not self._match(TokenType.GT): 1738 self.raise_error("Expecting >") 1739 else: 1740 value = self._parse_schema(exp.var("TABLE")) 1741 else: 1742 value = self._parse_types() 1743 1744 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1745 1746 def _parse_describe(self) -> exp.Describe: 1747 kind = self._match_set(self.CREATABLES) and self._prev.text 1748 this = self._parse_table() 1749 return self.expression(exp.Describe, this=this, kind=kind) 1750 1751 def _parse_insert(self) -> exp.Insert: 1752 comments = ensure_list(self._prev_comments) 1753 overwrite = self._match(TokenType.OVERWRITE) 1754 ignore = self._match(TokenType.IGNORE) 1755 local = self._match_text_seq("LOCAL") 1756 alternative = None 1757 1758 if self._match_text_seq("DIRECTORY"): 1759 this: t.Optional[exp.Expression] = self.expression( 1760 exp.Directory, 1761 this=self._parse_var_or_string(), 1762 local=local, 1763 row_format=self._parse_row_format(match_row=True), 1764 ) 1765 else: 1766 if self._match(TokenType.OR): 1767 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1768 1769 self._match(TokenType.INTO) 1770 comments += ensure_list(self._prev_comments) 1771 self._match(TokenType.TABLE) 1772 this = self._parse_table(schema=True) 1773 1774 returning = self._parse_returning() 1775 1776 return self.expression( 1777 exp.Insert, 1778 comments=comments, 1779 this=this, 1780 exists=self._parse_exists(), 1781 partition=self._parse_partition(), 1782 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1783 and self._parse_conjunction(), 1784 expression=self._parse_ddl_select(), 1785 conflict=self._parse_on_conflict(), 1786 returning=returning or self._parse_returning(), 1787 overwrite=overwrite, 1788 alternative=alternative, 1789 ignore=ignore, 1790 ) 1791 1792 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1793 conflict = self._match_text_seq("ON", "CONFLICT") 1794 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1795 1796 if not conflict and not duplicate: 1797 return None 1798 1799 nothing = None 1800 expressions = None 1801 key = None 1802 constraint = None 1803 1804 if conflict: 1805 if self._match_text_seq("ON", "CONSTRAINT"): 1806 constraint = self._parse_id_var() 1807 else: 1808 key = self._parse_csv(self._parse_value) 1809 1810 self._match_text_seq("DO") 1811 if self._match_text_seq("NOTHING"): 1812 nothing = True 1813 else: 1814 self._match(TokenType.UPDATE) 1815 self._match(TokenType.SET) 1816 expressions = self._parse_csv(self._parse_equality) 1817 1818 return self.expression( 1819 exp.OnConflict, 1820 duplicate=duplicate, 1821 expressions=expressions, 1822 nothing=nothing, 1823 key=key, 1824 constraint=constraint, 1825 ) 1826 1827 def _parse_returning(self) -> t.Optional[exp.Returning]: 1828 if not self._match(TokenType.RETURNING): 1829 return None 1830 return self.expression( 1831 exp.Returning, 1832 expressions=self._parse_csv(self._parse_expression), 1833 into=self._match(TokenType.INTO) and self._parse_table_part(), 1834 ) 1835 1836 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1837 if not self._match(TokenType.FORMAT): 1838 return None 1839 return self._parse_row_format() 1840 1841 def _parse_row_format( 1842 self, match_row: bool = False 1843 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1844 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1845 return None 1846 1847 if self._match_text_seq("SERDE"): 1848 this = self._parse_string() 1849 1850 serde_properties = None 1851 if self._match(TokenType.SERDE_PROPERTIES): 1852 serde_properties = self.expression( 1853 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1854 ) 1855 1856 return self.expression( 1857 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1858 ) 1859 1860 self._match_text_seq("DELIMITED") 1861 1862 kwargs = {} 1863 1864 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1865 kwargs["fields"] = self._parse_string() 1866 if self._match_text_seq("ESCAPED", "BY"): 1867 kwargs["escaped"] = self._parse_string() 1868 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1869 kwargs["collection_items"] = self._parse_string() 1870 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1871 kwargs["map_keys"] = self._parse_string() 1872 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1873 kwargs["lines"] = self._parse_string() 1874 if self._match_text_seq("NULL", "DEFINED", "AS"): 1875 kwargs["null"] = self._parse_string() 1876 1877 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1878 1879 def _parse_load(self) -> exp.LoadData | exp.Command: 1880 if self._match_text_seq("DATA"): 1881 local = self._match_text_seq("LOCAL") 1882 self._match_text_seq("INPATH") 1883 inpath = self._parse_string() 1884 overwrite = self._match(TokenType.OVERWRITE) 1885 self._match_pair(TokenType.INTO, TokenType.TABLE) 1886 1887 return self.expression( 1888 exp.LoadData, 1889 this=self._parse_table(schema=True), 1890 local=local, 1891 overwrite=overwrite, 1892 inpath=inpath, 1893 partition=self._parse_partition(), 1894 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1895 serde=self._match_text_seq("SERDE") and self._parse_string(), 1896 ) 1897 return self._parse_as_command(self._prev) 1898 1899 def _parse_delete(self) -> exp.Delete: 1900 # This handles MySQL's "Multiple-Table Syntax" 1901 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1902 tables = None 1903 comments = self._prev_comments 1904 if not self._match(TokenType.FROM, advance=False): 1905 tables = self._parse_csv(self._parse_table) or None 1906 1907 returning = self._parse_returning() 1908 1909 return self.expression( 1910 exp.Delete, 1911 comments=comments, 1912 tables=tables, 1913 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1914 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1915 where=self._parse_where(), 1916 returning=returning or self._parse_returning(), 1917 limit=self._parse_limit(), 1918 ) 1919 1920 def _parse_update(self) -> exp.Update: 1921 comments = self._prev_comments 1922 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1923 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1924 returning = self._parse_returning() 1925 return self.expression( 1926 exp.Update, 1927 comments=comments, 1928 **{ # type: ignore 1929 "this": this, 1930 "expressions": expressions, 1931 "from": self._parse_from(joins=True), 1932 "where": self._parse_where(), 1933 "returning": returning or self._parse_returning(), 1934 "limit": self._parse_limit(), 1935 }, 1936 ) 1937 1938 def _parse_uncache(self) -> exp.Uncache: 1939 if not self._match(TokenType.TABLE): 1940 self.raise_error("Expecting TABLE after UNCACHE") 1941 1942 return self.expression( 1943 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1944 ) 1945 1946 def _parse_cache(self) -> exp.Cache: 1947 lazy = self._match_text_seq("LAZY") 1948 self._match(TokenType.TABLE) 1949 table = self._parse_table(schema=True) 1950 1951 options = [] 1952 if self._match_text_seq("OPTIONS"): 1953 self._match_l_paren() 1954 k = self._parse_string() 1955 self._match(TokenType.EQ) 1956 v = self._parse_string() 1957 options = [k, v] 1958 self._match_r_paren() 1959 1960 self._match(TokenType.ALIAS) 1961 return self.expression( 1962 exp.Cache, 1963 this=table, 1964 lazy=lazy, 1965 options=options, 1966 expression=self._parse_select(nested=True), 1967 ) 1968 1969 def _parse_partition(self) -> t.Optional[exp.Partition]: 1970 if not self._match(TokenType.PARTITION): 1971 return None 1972 1973 return self.expression( 1974 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1975 ) 1976 1977 def _parse_value(self) -> exp.Tuple: 1978 if self._match(TokenType.L_PAREN): 1979 expressions = self._parse_csv(self._parse_conjunction) 1980 self._match_r_paren() 1981 return self.expression(exp.Tuple, expressions=expressions) 1982 1983 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1984 # https://prestodb.io/docs/current/sql/values.html 1985 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1986 1987 def _parse_projections(self) -> t.List[exp.Expression]: 1988 return self._parse_expressions() 1989 1990 def _parse_select( 1991 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1992 ) -> t.Optional[exp.Expression]: 1993 cte = self._parse_with() 1994 if cte: 1995 this = self._parse_statement() 1996 1997 if not this: 1998 self.raise_error("Failed to parse any statement following CTE") 1999 return cte 2000 2001 if "with" in this.arg_types: 2002 this.set("with", cte) 2003 else: 2004 self.raise_error(f"{this.key} does not support CTE") 2005 this = cte 2006 elif self._match(TokenType.SELECT): 2007 comments = self._prev_comments 2008 2009 hint = self._parse_hint() 2010 all_ = self._match(TokenType.ALL) 2011 distinct = self._match_set(self.DISTINCT_TOKENS) 2012 2013 kind = ( 2014 self._match(TokenType.ALIAS) 2015 and self._match_texts(("STRUCT", "VALUE")) 2016 and self._prev.text 2017 ) 2018 2019 if distinct: 2020 distinct = self.expression( 2021 exp.Distinct, 2022 on=self._parse_value() if self._match(TokenType.ON) else None, 2023 ) 2024 2025 if all_ and distinct: 2026 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2027 2028 limit = self._parse_limit(top=True) 2029 projections = self._parse_projections() 2030 2031 this = self.expression( 2032 exp.Select, 2033 kind=kind, 2034 hint=hint, 2035 distinct=distinct, 2036 expressions=projections, 2037 limit=limit, 2038 ) 2039 this.comments = comments 2040 2041 into = self._parse_into() 2042 if into: 2043 this.set("into", into) 2044 2045 from_ = self._parse_from() 2046 if from_: 2047 this.set("from", from_) 2048 2049 this = self._parse_query_modifiers(this) 2050 elif (table or nested) and self._match(TokenType.L_PAREN): 2051 if self._match(TokenType.PIVOT): 2052 this = self._parse_simplified_pivot() 2053 elif self._match(TokenType.FROM): 2054 this = exp.select("*").from_( 2055 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2056 ) 2057 else: 2058 this = self._parse_table() if table else self._parse_select(nested=True) 2059 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2060 2061 self._match_r_paren() 2062 2063 # We return early here so that the UNION isn't attached to the subquery by the 2064 # following call to _parse_set_operations, but instead becomes the parent node 2065 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2066 elif self._match(TokenType.VALUES): 2067 this = self.expression( 2068 exp.Values, 2069 expressions=self._parse_csv(self._parse_value), 2070 alias=self._parse_table_alias(), 2071 ) 2072 else: 2073 this = None 2074 2075 return self._parse_set_operations(this) 2076 2077 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2078 if not skip_with_token and not self._match(TokenType.WITH): 2079 return None 2080 2081 comments = self._prev_comments 2082 recursive = self._match(TokenType.RECURSIVE) 2083 2084 expressions = [] 2085 while True: 2086 expressions.append(self._parse_cte()) 2087 2088 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2089 break 2090 else: 2091 self._match(TokenType.WITH) 2092 2093 return self.expression( 2094 exp.With, comments=comments, expressions=expressions, recursive=recursive 2095 ) 2096 2097 def _parse_cte(self) -> exp.CTE: 2098 alias = self._parse_table_alias() 2099 if not alias or not alias.this: 2100 self.raise_error("Expected CTE to have alias") 2101 2102 self._match(TokenType.ALIAS) 2103 return self.expression( 2104 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2105 ) 2106 2107 def _parse_table_alias( 2108 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2109 ) -> t.Optional[exp.TableAlias]: 2110 any_token = self._match(TokenType.ALIAS) 2111 alias = ( 2112 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2113 or self._parse_string_as_identifier() 2114 ) 2115 2116 index = self._index 2117 if self._match(TokenType.L_PAREN): 2118 columns = self._parse_csv(self._parse_function_parameter) 2119 self._match_r_paren() if columns else self._retreat(index) 2120 else: 2121 columns = None 2122 2123 if not alias and not columns: 2124 return None 2125 2126 return self.expression(exp.TableAlias, this=alias, columns=columns) 2127 2128 def _parse_subquery( 2129 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2130 ) -> t.Optional[exp.Subquery]: 2131 if not this: 2132 return None 2133 2134 return self.expression( 2135 exp.Subquery, 2136 this=this, 2137 pivots=self._parse_pivots(), 2138 alias=self._parse_table_alias() if parse_alias else None, 2139 ) 2140 2141 def _parse_query_modifiers( 2142 self, this: t.Optional[exp.Expression] 2143 ) -> t.Optional[exp.Expression]: 2144 if isinstance(this, self.MODIFIABLES): 2145 for join in iter(self._parse_join, None): 2146 this.append("joins", join) 2147 for lateral in iter(self._parse_lateral, None): 2148 this.append("laterals", lateral) 2149 2150 while True: 2151 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2152 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2153 key, expression = parser(self) 2154 2155 if expression: 2156 this.set(key, expression) 2157 if key == "limit": 2158 offset = expression.args.pop("offset", None) 2159 if offset: 2160 this.set("offset", exp.Offset(expression=offset)) 2161 continue 2162 break 2163 return this 2164 2165 def _parse_hint(self) -> t.Optional[exp.Hint]: 2166 if self._match(TokenType.HINT): 2167 hints = [] 2168 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2169 hints.extend(hint) 2170 2171 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2172 self.raise_error("Expected */ after HINT") 2173 2174 return self.expression(exp.Hint, expressions=hints) 2175 2176 return None 2177 2178 def _parse_into(self) -> t.Optional[exp.Into]: 2179 if not self._match(TokenType.INTO): 2180 return None 2181 2182 temp = self._match(TokenType.TEMPORARY) 2183 unlogged = self._match_text_seq("UNLOGGED") 2184 self._match(TokenType.TABLE) 2185 2186 return self.expression( 2187 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2188 ) 2189 2190 def _parse_from( 2191 self, joins: bool = False, skip_from_token: bool = False 2192 ) -> t.Optional[exp.From]: 2193 if not skip_from_token and not self._match(TokenType.FROM): 2194 return None 2195 2196 return self.expression( 2197 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2198 ) 2199 2200 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2201 if not self._match(TokenType.MATCH_RECOGNIZE): 2202 return None 2203 2204 self._match_l_paren() 2205 2206 partition = self._parse_partition_by() 2207 order = self._parse_order() 2208 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2209 2210 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2211 rows = exp.var("ONE ROW PER MATCH") 2212 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2213 text = "ALL ROWS PER MATCH" 2214 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2215 text += f" SHOW EMPTY MATCHES" 2216 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2217 text += f" OMIT EMPTY MATCHES" 2218 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2219 text += f" WITH UNMATCHED ROWS" 2220 rows = exp.var(text) 2221 else: 2222 rows = None 2223 2224 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2225 text = "AFTER MATCH SKIP" 2226 if self._match_text_seq("PAST", "LAST", "ROW"): 2227 text += f" PAST LAST ROW" 2228 elif self._match_text_seq("TO", "NEXT", "ROW"): 2229 text += f" TO NEXT ROW" 2230 elif self._match_text_seq("TO", "FIRST"): 2231 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2232 elif self._match_text_seq("TO", "LAST"): 2233 text += f" TO LAST {self._advance_any().text}" # type: ignore 2234 after = exp.var(text) 2235 else: 2236 after = None 2237 2238 if self._match_text_seq("PATTERN"): 2239 self._match_l_paren() 2240 2241 if not self._curr: 2242 self.raise_error("Expecting )", self._curr) 2243 2244 paren = 1 2245 start = self._curr 2246 2247 while self._curr and paren > 0: 2248 if self._curr.token_type == TokenType.L_PAREN: 2249 paren += 1 2250 if self._curr.token_type == TokenType.R_PAREN: 2251 paren -= 1 2252 2253 end = self._prev 2254 self._advance() 2255 2256 if paren > 0: 2257 self.raise_error("Expecting )", self._curr) 2258 2259 pattern = exp.var(self._find_sql(start, end)) 2260 else: 2261 pattern = None 2262 2263 define = ( 2264 self._parse_csv( 2265 lambda: self.expression( 2266 exp.Alias, 2267 alias=self._parse_id_var(any_token=True), 2268 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2269 ) 2270 ) 2271 if self._match_text_seq("DEFINE") 2272 else None 2273 ) 2274 2275 self._match_r_paren() 2276 2277 return self.expression( 2278 exp.MatchRecognize, 2279 partition_by=partition, 2280 order=order, 2281 measures=measures, 2282 rows=rows, 2283 after=after, 2284 pattern=pattern, 2285 define=define, 2286 alias=self._parse_table_alias(), 2287 ) 2288 2289 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2290 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2291 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2292 2293 if outer_apply or cross_apply: 2294 this = self._parse_select(table=True) 2295 view = None 2296 outer = not cross_apply 2297 elif self._match(TokenType.LATERAL): 2298 this = self._parse_select(table=True) 2299 view = self._match(TokenType.VIEW) 2300 outer = self._match(TokenType.OUTER) 2301 else: 2302 return None 2303 2304 if not this: 2305 this = ( 2306 self._parse_unnest() 2307 or self._parse_function() 2308 or self._parse_id_var(any_token=False) 2309 ) 2310 2311 while self._match(TokenType.DOT): 2312 this = exp.Dot( 2313 this=this, 2314 expression=self._parse_function() or self._parse_id_var(any_token=False), 2315 ) 2316 2317 if view: 2318 table = self._parse_id_var(any_token=False) 2319 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2320 table_alias: t.Optional[exp.TableAlias] = self.expression( 2321 exp.TableAlias, this=table, columns=columns 2322 ) 2323 elif isinstance(this, exp.Subquery) and this.alias: 2324 # Ensures parity between the Subquery's and the Lateral's "alias" args 2325 table_alias = this.args["alias"].copy() 2326 else: 2327 table_alias = self._parse_table_alias() 2328 2329 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2330 2331 def _parse_join_parts( 2332 self, 2333 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2334 return ( 2335 self._match_set(self.JOIN_METHODS) and self._prev, 2336 self._match_set(self.JOIN_SIDES) and self._prev, 2337 self._match_set(self.JOIN_KINDS) and self._prev, 2338 ) 2339 2340 def _parse_join( 2341 self, skip_join_token: bool = False, parse_bracket: bool = False 2342 ) -> t.Optional[exp.Join]: 2343 if self._match(TokenType.COMMA): 2344 return self.expression(exp.Join, this=self._parse_table()) 2345 2346 index = self._index 2347 method, side, kind = self._parse_join_parts() 2348 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2349 join = self._match(TokenType.JOIN) 2350 2351 if not skip_join_token and not join: 2352 self._retreat(index) 2353 kind = None 2354 method = None 2355 side = None 2356 2357 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2358 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2359 2360 if not skip_join_token and not join and not outer_apply and not cross_apply: 2361 return None 2362 2363 if outer_apply: 2364 side = Token(TokenType.LEFT, "LEFT") 2365 2366 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2367 2368 if method: 2369 kwargs["method"] = method.text 2370 if side: 2371 kwargs["side"] = side.text 2372 if kind: 2373 kwargs["kind"] = kind.text 2374 if hint: 2375 kwargs["hint"] = hint 2376 2377 if self._match(TokenType.ON): 2378 kwargs["on"] = self._parse_conjunction() 2379 elif self._match(TokenType.USING): 2380 kwargs["using"] = self._parse_wrapped_id_vars() 2381 elif not (kind and kind.token_type == TokenType.CROSS): 2382 index = self._index 2383 joins = self._parse_joins() 2384 2385 if joins and self._match(TokenType.ON): 2386 kwargs["on"] = self._parse_conjunction() 2387 elif joins and self._match(TokenType.USING): 2388 kwargs["using"] = self._parse_wrapped_id_vars() 2389 else: 2390 joins = None 2391 self._retreat(index) 2392 2393 kwargs["this"].set("joins", joins) 2394 2395 comments = [c for token in (method, side, kind) if token for c in token.comments] 2396 return self.expression(exp.Join, comments=comments, **kwargs) 2397 2398 def _parse_index( 2399 self, 2400 index: t.Optional[exp.Expression] = None, 2401 ) -> t.Optional[exp.Index]: 2402 if index: 2403 unique = None 2404 primary = None 2405 amp = None 2406 2407 self._match(TokenType.ON) 2408 self._match(TokenType.TABLE) # hive 2409 table = self._parse_table_parts(schema=True) 2410 else: 2411 unique = self._match(TokenType.UNIQUE) 2412 primary = self._match_text_seq("PRIMARY") 2413 amp = self._match_text_seq("AMP") 2414 2415 if not self._match(TokenType.INDEX): 2416 return None 2417 2418 index = self._parse_id_var() 2419 table = None 2420 2421 using = self._parse_field() if self._match(TokenType.USING) else None 2422 2423 if self._match(TokenType.L_PAREN, advance=False): 2424 columns = self._parse_wrapped_csv(self._parse_ordered) 2425 else: 2426 columns = None 2427 2428 return self.expression( 2429 exp.Index, 2430 this=index, 2431 table=table, 2432 using=using, 2433 columns=columns, 2434 unique=unique, 2435 primary=primary, 2436 amp=amp, 2437 partition_by=self._parse_partition_by(), 2438 ) 2439 2440 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2441 hints: t.List[exp.Expression] = [] 2442 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2443 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2444 hints.append( 2445 self.expression( 2446 exp.WithTableHint, 2447 expressions=self._parse_csv( 2448 lambda: self._parse_function() or self._parse_var(any_token=True) 2449 ), 2450 ) 2451 ) 2452 self._match_r_paren() 2453 else: 2454 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2455 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2456 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2457 2458 self._match_texts({"INDEX", "KEY"}) 2459 if self._match(TokenType.FOR): 2460 hint.set("target", self._advance_any() and self._prev.text.upper()) 2461 2462 hint.set("expressions", self._parse_wrapped_id_vars()) 2463 hints.append(hint) 2464 2465 return hints or None 2466 2467 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2468 return ( 2469 (not schema and self._parse_function(optional_parens=False)) 2470 or self._parse_id_var(any_token=False) 2471 or self._parse_string_as_identifier() 2472 or self._parse_placeholder() 2473 ) 2474 2475 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2476 catalog = None 2477 db = None 2478 table = self._parse_table_part(schema=schema) 2479 2480 while self._match(TokenType.DOT): 2481 if catalog: 2482 # This allows nesting the table in arbitrarily many dot expressions if needed 2483 table = self.expression( 2484 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2485 ) 2486 else: 2487 catalog = db 2488 db = table 2489 table = self._parse_table_part(schema=schema) 2490 2491 if not table: 2492 self.raise_error(f"Expected table name but got {self._curr}") 2493 2494 return self.expression( 2495 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2496 ) 2497 2498 def _parse_table( 2499 self, 2500 schema: bool = False, 2501 joins: bool = False, 2502 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2503 parse_bracket: bool = False, 2504 ) -> t.Optional[exp.Expression]: 2505 lateral = self._parse_lateral() 2506 if lateral: 2507 return lateral 2508 2509 unnest = self._parse_unnest() 2510 if unnest: 2511 return unnest 2512 2513 values = self._parse_derived_table_values() 2514 if values: 2515 return values 2516 2517 subquery = self._parse_select(table=True) 2518 if subquery: 2519 if not subquery.args.get("pivots"): 2520 subquery.set("pivots", self._parse_pivots()) 2521 return subquery 2522 2523 bracket = parse_bracket and self._parse_bracket(None) 2524 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2525 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2526 2527 if schema: 2528 return self._parse_schema(this=this) 2529 2530 if self.ALIAS_POST_TABLESAMPLE: 2531 table_sample = self._parse_table_sample() 2532 2533 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2534 if alias: 2535 this.set("alias", alias) 2536 2537 if not this.args.get("pivots"): 2538 this.set("pivots", self._parse_pivots()) 2539 2540 this.set("hints", self._parse_table_hints()) 2541 2542 if not self.ALIAS_POST_TABLESAMPLE: 2543 table_sample = self._parse_table_sample() 2544 2545 if table_sample: 2546 table_sample.set("this", this) 2547 this = table_sample 2548 2549 if joins: 2550 for join in iter(self._parse_join, None): 2551 this.append("joins", join) 2552 2553 return this 2554 2555 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2556 if not self._match(TokenType.UNNEST): 2557 return None 2558 2559 expressions = self._parse_wrapped_csv(self._parse_type) 2560 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2561 2562 alias = self._parse_table_alias() if with_alias else None 2563 2564 if alias and self.UNNEST_COLUMN_ONLY: 2565 if alias.args.get("columns"): 2566 self.raise_error("Unexpected extra column alias in unnest.") 2567 2568 alias.set("columns", [alias.this]) 2569 alias.set("this", None) 2570 2571 offset = None 2572 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2573 self._match(TokenType.ALIAS) 2574 offset = self._parse_id_var() or exp.to_identifier("offset") 2575 2576 return self.expression( 2577 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2578 ) 2579 2580 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2581 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2582 if not is_derived and not self._match(TokenType.VALUES): 2583 return None 2584 2585 expressions = self._parse_csv(self._parse_value) 2586 alias = self._parse_table_alias() 2587 2588 if is_derived: 2589 self._match_r_paren() 2590 2591 return self.expression( 2592 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2593 ) 2594 2595 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2596 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2597 as_modifier and self._match_text_seq("USING", "SAMPLE") 2598 ): 2599 return None 2600 2601 bucket_numerator = None 2602 bucket_denominator = None 2603 bucket_field = None 2604 percent = None 2605 rows = None 2606 size = None 2607 seed = None 2608 2609 kind = ( 2610 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2611 ) 2612 method = self._parse_var(tokens=(TokenType.ROW,)) 2613 2614 self._match(TokenType.L_PAREN) 2615 2616 num = self._parse_number() 2617 2618 if self._match_text_seq("BUCKET"): 2619 bucket_numerator = self._parse_number() 2620 self._match_text_seq("OUT", "OF") 2621 bucket_denominator = bucket_denominator = self._parse_number() 2622 self._match(TokenType.ON) 2623 bucket_field = self._parse_field() 2624 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2625 percent = num 2626 elif self._match(TokenType.ROWS): 2627 rows = num 2628 else: 2629 size = num 2630 2631 self._match(TokenType.R_PAREN) 2632 2633 if self._match(TokenType.L_PAREN): 2634 method = self._parse_var() 2635 seed = self._match(TokenType.COMMA) and self._parse_number() 2636 self._match_r_paren() 2637 elif self._match_texts(("SEED", "REPEATABLE")): 2638 seed = self._parse_wrapped(self._parse_number) 2639 2640 return self.expression( 2641 exp.TableSample, 2642 method=method, 2643 bucket_numerator=bucket_numerator, 2644 bucket_denominator=bucket_denominator, 2645 bucket_field=bucket_field, 2646 percent=percent, 2647 rows=rows, 2648 size=size, 2649 seed=seed, 2650 kind=kind, 2651 ) 2652 2653 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2654 return list(iter(self._parse_pivot, None)) or None 2655 2656 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2657 return list(iter(self._parse_join, None)) or None 2658 2659 # https://duckdb.org/docs/sql/statements/pivot 2660 def _parse_simplified_pivot(self) -> exp.Pivot: 2661 def _parse_on() -> t.Optional[exp.Expression]: 2662 this = self._parse_bitwise() 2663 return self._parse_in(this) if self._match(TokenType.IN) else this 2664 2665 this = self._parse_table() 2666 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2667 using = self._match(TokenType.USING) and self._parse_csv( 2668 lambda: self._parse_alias(self._parse_function()) 2669 ) 2670 group = self._parse_group() 2671 return self.expression( 2672 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2673 ) 2674 2675 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2676 index = self._index 2677 include_nulls = None 2678 2679 if self._match(TokenType.PIVOT): 2680 unpivot = False 2681 elif self._match(TokenType.UNPIVOT): 2682 unpivot = True 2683 2684 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2685 if self._match_text_seq("INCLUDE", "NULLS"): 2686 include_nulls = True 2687 elif self._match_text_seq("EXCLUDE", "NULLS"): 2688 include_nulls = False 2689 else: 2690 return None 2691 2692 expressions = [] 2693 field = None 2694 2695 if not self._match(TokenType.L_PAREN): 2696 self._retreat(index) 2697 return None 2698 2699 if unpivot: 2700 expressions = self._parse_csv(self._parse_column) 2701 else: 2702 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2703 2704 if not expressions: 2705 self.raise_error("Failed to parse PIVOT's aggregation list") 2706 2707 if not self._match(TokenType.FOR): 2708 self.raise_error("Expecting FOR") 2709 2710 value = self._parse_column() 2711 2712 if not self._match(TokenType.IN): 2713 self.raise_error("Expecting IN") 2714 2715 field = self._parse_in(value, alias=True) 2716 2717 self._match_r_paren() 2718 2719 pivot = self.expression( 2720 exp.Pivot, 2721 expressions=expressions, 2722 field=field, 2723 unpivot=unpivot, 2724 include_nulls=include_nulls, 2725 ) 2726 2727 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2728 pivot.set("alias", self._parse_table_alias()) 2729 2730 if not unpivot: 2731 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2732 2733 columns: t.List[exp.Expression] = [] 2734 for fld in pivot.args["field"].expressions: 2735 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2736 for name in names: 2737 if self.PREFIXED_PIVOT_COLUMNS: 2738 name = f"{name}_{field_name}" if name else field_name 2739 else: 2740 name = f"{field_name}_{name}" if name else field_name 2741 2742 columns.append(exp.to_identifier(name)) 2743 2744 pivot.set("columns", columns) 2745 2746 return pivot 2747 2748 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2749 return [agg.alias for agg in aggregations] 2750 2751 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2752 if not skip_where_token and not self._match(TokenType.WHERE): 2753 return None 2754 2755 return self.expression( 2756 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2757 ) 2758 2759 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2760 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2761 return None 2762 2763 elements = defaultdict(list) 2764 2765 if self._match(TokenType.ALL): 2766 return self.expression(exp.Group, all=True) 2767 2768 while True: 2769 expressions = self._parse_csv(self._parse_conjunction) 2770 if expressions: 2771 elements["expressions"].extend(expressions) 2772 2773 grouping_sets = self._parse_grouping_sets() 2774 if grouping_sets: 2775 elements["grouping_sets"].extend(grouping_sets) 2776 2777 rollup = None 2778 cube = None 2779 totals = None 2780 2781 with_ = self._match(TokenType.WITH) 2782 if self._match(TokenType.ROLLUP): 2783 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2784 elements["rollup"].extend(ensure_list(rollup)) 2785 2786 if self._match(TokenType.CUBE): 2787 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2788 elements["cube"].extend(ensure_list(cube)) 2789 2790 if self._match_text_seq("TOTALS"): 2791 totals = True 2792 elements["totals"] = True # type: ignore 2793 2794 if not (grouping_sets or rollup or cube or totals): 2795 break 2796 2797 return self.expression(exp.Group, **elements) # type: ignore 2798 2799 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2800 if not self._match(TokenType.GROUPING_SETS): 2801 return None 2802 2803 return self._parse_wrapped_csv(self._parse_grouping_set) 2804 2805 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2806 if self._match(TokenType.L_PAREN): 2807 grouping_set = self._parse_csv(self._parse_column) 2808 self._match_r_paren() 2809 return self.expression(exp.Tuple, expressions=grouping_set) 2810 2811 return self._parse_column() 2812 2813 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2814 if not skip_having_token and not self._match(TokenType.HAVING): 2815 return None 2816 return self.expression(exp.Having, this=self._parse_conjunction()) 2817 2818 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2819 if not self._match(TokenType.QUALIFY): 2820 return None 2821 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2822 2823 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2824 if skip_start_token: 2825 start = None 2826 elif self._match(TokenType.START_WITH): 2827 start = self._parse_conjunction() 2828 else: 2829 return None 2830 2831 self._match(TokenType.CONNECT_BY) 2832 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2833 exp.Prior, this=self._parse_bitwise() 2834 ) 2835 connect = self._parse_conjunction() 2836 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2837 return self.expression(exp.Connect, start=start, connect=connect) 2838 2839 def _parse_order( 2840 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2841 ) -> t.Optional[exp.Expression]: 2842 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2843 return this 2844 2845 return self.expression( 2846 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2847 ) 2848 2849 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2850 if not self._match(token): 2851 return None 2852 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2853 2854 def _parse_ordered(self) -> exp.Ordered: 2855 this = self._parse_conjunction() 2856 self._match(TokenType.ASC) 2857 2858 is_desc = self._match(TokenType.DESC) 2859 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2860 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2861 desc = is_desc or False 2862 asc = not desc 2863 nulls_first = is_nulls_first or False 2864 explicitly_null_ordered = is_nulls_first or is_nulls_last 2865 2866 if ( 2867 not explicitly_null_ordered 2868 and ( 2869 (asc and self.NULL_ORDERING == "nulls_are_small") 2870 or (desc and self.NULL_ORDERING != "nulls_are_small") 2871 ) 2872 and self.NULL_ORDERING != "nulls_are_last" 2873 ): 2874 nulls_first = True 2875 2876 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2877 2878 def _parse_limit( 2879 self, this: t.Optional[exp.Expression] = None, top: bool = False 2880 ) -> t.Optional[exp.Expression]: 2881 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2882 comments = self._prev_comments 2883 if top: 2884 limit_paren = self._match(TokenType.L_PAREN) 2885 expression = self._parse_number() 2886 2887 if limit_paren: 2888 self._match_r_paren() 2889 else: 2890 expression = self._parse_term() 2891 2892 if self._match(TokenType.COMMA): 2893 offset = expression 2894 expression = self._parse_term() 2895 else: 2896 offset = None 2897 2898 limit_exp = self.expression( 2899 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2900 ) 2901 2902 return limit_exp 2903 2904 if self._match(TokenType.FETCH): 2905 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2906 direction = self._prev.text if direction else "FIRST" 2907 2908 count = self._parse_number() 2909 percent = self._match(TokenType.PERCENT) 2910 2911 self._match_set((TokenType.ROW, TokenType.ROWS)) 2912 2913 only = self._match_text_seq("ONLY") 2914 with_ties = self._match_text_seq("WITH", "TIES") 2915 2916 if only and with_ties: 2917 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2918 2919 return self.expression( 2920 exp.Fetch, 2921 direction=direction, 2922 count=count, 2923 percent=percent, 2924 with_ties=with_ties, 2925 ) 2926 2927 return this 2928 2929 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2930 if not self._match(TokenType.OFFSET): 2931 return this 2932 2933 count = self._parse_term() 2934 self._match_set((TokenType.ROW, TokenType.ROWS)) 2935 return self.expression(exp.Offset, this=this, expression=count) 2936 2937 def _parse_locks(self) -> t.List[exp.Lock]: 2938 locks = [] 2939 while True: 2940 if self._match_text_seq("FOR", "UPDATE"): 2941 update = True 2942 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2943 "LOCK", "IN", "SHARE", "MODE" 2944 ): 2945 update = False 2946 else: 2947 break 2948 2949 expressions = None 2950 if self._match_text_seq("OF"): 2951 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2952 2953 wait: t.Optional[bool | exp.Expression] = None 2954 if self._match_text_seq("NOWAIT"): 2955 wait = True 2956 elif self._match_text_seq("WAIT"): 2957 wait = self._parse_primary() 2958 elif self._match_text_seq("SKIP", "LOCKED"): 2959 wait = False 2960 2961 locks.append( 2962 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2963 ) 2964 2965 return locks 2966 2967 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2968 if not self._match_set(self.SET_OPERATIONS): 2969 return this 2970 2971 token_type = self._prev.token_type 2972 2973 if token_type == TokenType.UNION: 2974 expression = exp.Union 2975 elif token_type == TokenType.EXCEPT: 2976 expression = exp.Except 2977 else: 2978 expression = exp.Intersect 2979 2980 return self.expression( 2981 expression, 2982 this=this, 2983 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2984 expression=self._parse_set_operations(self._parse_select(nested=True)), 2985 ) 2986 2987 def _parse_expression(self) -> t.Optional[exp.Expression]: 2988 return self._parse_alias(self._parse_conjunction()) 2989 2990 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2991 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2992 2993 def _parse_equality(self) -> t.Optional[exp.Expression]: 2994 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2995 2996 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2997 return self._parse_tokens(self._parse_range, self.COMPARISON) 2998 2999 def _parse_range(self) -> t.Optional[exp.Expression]: 3000 this = self._parse_bitwise() 3001 negate = self._match(TokenType.NOT) 3002 3003 if self._match_set(self.RANGE_PARSERS): 3004 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3005 if not expression: 3006 return this 3007 3008 this = expression 3009 elif self._match(TokenType.ISNULL): 3010 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3011 3012 # Postgres supports ISNULL and NOTNULL for conditions. 3013 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3014 if self._match(TokenType.NOTNULL): 3015 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3016 this = self.expression(exp.Not, this=this) 3017 3018 if negate: 3019 this = self.expression(exp.Not, this=this) 3020 3021 if self._match(TokenType.IS): 3022 this = self._parse_is(this) 3023 3024 return this 3025 3026 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3027 index = self._index - 1 3028 negate = self._match(TokenType.NOT) 3029 3030 if self._match_text_seq("DISTINCT", "FROM"): 3031 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3032 return self.expression(klass, this=this, expression=self._parse_expression()) 3033 3034 expression = self._parse_null() or self._parse_boolean() 3035 if not expression: 3036 self._retreat(index) 3037 return None 3038 3039 this = self.expression(exp.Is, this=this, expression=expression) 3040 return self.expression(exp.Not, this=this) if negate else this 3041 3042 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3043 unnest = self._parse_unnest(with_alias=False) 3044 if unnest: 3045 this = self.expression(exp.In, this=this, unnest=unnest) 3046 elif self._match(TokenType.L_PAREN): 3047 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3048 3049 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3050 this = self.expression(exp.In, this=this, query=expressions[0]) 3051 else: 3052 this = self.expression(exp.In, this=this, expressions=expressions) 3053 3054 self._match_r_paren(this) 3055 else: 3056 this = self.expression(exp.In, this=this, field=self._parse_field()) 3057 3058 return this 3059 3060 def _parse_between(self, this: exp.Expression) -> exp.Between: 3061 low = self._parse_bitwise() 3062 self._match(TokenType.AND) 3063 high = self._parse_bitwise() 3064 return self.expression(exp.Between, this=this, low=low, high=high) 3065 3066 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3067 if not self._match(TokenType.ESCAPE): 3068 return this 3069 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3070 3071 def _parse_interval(self) -> t.Optional[exp.Interval]: 3072 index = self._index 3073 3074 if not self._match(TokenType.INTERVAL): 3075 return None 3076 3077 if self._match(TokenType.STRING, advance=False): 3078 this = self._parse_primary() 3079 else: 3080 this = self._parse_term() 3081 3082 if not this: 3083 self._retreat(index) 3084 return None 3085 3086 unit = self._parse_function() or self._parse_var() 3087 3088 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3089 # each INTERVAL expression into this canonical form so it's easy to transpile 3090 if this and this.is_number: 3091 this = exp.Literal.string(this.name) 3092 elif this and this.is_string: 3093 parts = this.name.split() 3094 3095 if len(parts) == 2: 3096 if unit: 3097 # this is not actually a unit, it's something else 3098 unit = None 3099 self._retreat(self._index - 1) 3100 else: 3101 this = exp.Literal.string(parts[0]) 3102 unit = self.expression(exp.Var, this=parts[1]) 3103 3104 return self.expression(exp.Interval, this=this, unit=unit) 3105 3106 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3107 this = self._parse_term() 3108 3109 while True: 3110 if self._match_set(self.BITWISE): 3111 this = self.expression( 3112 self.BITWISE[self._prev.token_type], 3113 this=this, 3114 expression=self._parse_term(), 3115 ) 3116 elif self._match(TokenType.DQMARK): 3117 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3118 elif self._match_pair(TokenType.LT, TokenType.LT): 3119 this = self.expression( 3120 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3121 ) 3122 elif self._match_pair(TokenType.GT, TokenType.GT): 3123 this = self.expression( 3124 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3125 ) 3126 else: 3127 break 3128 3129 return this 3130 3131 def _parse_term(self) -> t.Optional[exp.Expression]: 3132 return self._parse_tokens(self._parse_factor, self.TERM) 3133 3134 def _parse_factor(self) -> t.Optional[exp.Expression]: 3135 return self._parse_tokens(self._parse_unary, self.FACTOR) 3136 3137 def _parse_unary(self) -> t.Optional[exp.Expression]: 3138 if self._match_set(self.UNARY_PARSERS): 3139 return self.UNARY_PARSERS[self._prev.token_type](self) 3140 return self._parse_at_time_zone(self._parse_type()) 3141 3142 def _parse_type(self) -> t.Optional[exp.Expression]: 3143 interval = self._parse_interval() 3144 if interval: 3145 return interval 3146 3147 index = self._index 3148 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3149 this = self._parse_column() 3150 3151 if data_type: 3152 if isinstance(this, exp.Literal): 3153 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3154 if parser: 3155 return parser(self, this, data_type) 3156 return self.expression(exp.Cast, this=this, to=data_type) 3157 if not data_type.expressions: 3158 self._retreat(index) 3159 return self._parse_column() 3160 return self._parse_column_ops(data_type) 3161 3162 return this 3163 3164 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3165 this = self._parse_type() 3166 if not this: 3167 return None 3168 3169 return self.expression( 3170 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3171 ) 3172 3173 def _parse_types( 3174 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3175 ) -> t.Optional[exp.Expression]: 3176 index = self._index 3177 3178 prefix = self._match_text_seq("SYSUDTLIB", ".") 3179 3180 if not self._match_set(self.TYPE_TOKENS): 3181 identifier = allow_identifiers and self._parse_id_var( 3182 any_token=False, tokens=(TokenType.VAR,) 3183 ) 3184 3185 if identifier: 3186 tokens = self._tokenizer.tokenize(identifier.name) 3187 3188 if len(tokens) != 1: 3189 self.raise_error("Unexpected identifier", self._prev) 3190 3191 if tokens[0].token_type in self.TYPE_TOKENS: 3192 self._prev = tokens[0] 3193 elif self.SUPPORTS_USER_DEFINED_TYPES: 3194 return identifier 3195 else: 3196 return None 3197 else: 3198 return None 3199 3200 type_token = self._prev.token_type 3201 3202 if type_token == TokenType.PSEUDO_TYPE: 3203 return self.expression(exp.PseudoType, this=self._prev.text) 3204 3205 nested = type_token in self.NESTED_TYPE_TOKENS 3206 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3207 expressions = None 3208 maybe_func = False 3209 3210 if self._match(TokenType.L_PAREN): 3211 if is_struct: 3212 expressions = self._parse_csv(self._parse_struct_types) 3213 elif nested: 3214 expressions = self._parse_csv( 3215 lambda: self._parse_types( 3216 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3217 ) 3218 ) 3219 elif type_token in self.ENUM_TYPE_TOKENS: 3220 expressions = self._parse_csv(self._parse_equality) 3221 else: 3222 expressions = self._parse_csv(self._parse_type_size) 3223 3224 if not expressions or not self._match(TokenType.R_PAREN): 3225 self._retreat(index) 3226 return None 3227 3228 maybe_func = True 3229 3230 this: t.Optional[exp.Expression] = None 3231 values: t.Optional[t.List[exp.Expression]] = None 3232 3233 if nested and self._match(TokenType.LT): 3234 if is_struct: 3235 expressions = self._parse_csv(self._parse_struct_types) 3236 else: 3237 expressions = self._parse_csv( 3238 lambda: self._parse_types( 3239 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3240 ) 3241 ) 3242 3243 if not self._match(TokenType.GT): 3244 self.raise_error("Expecting >") 3245 3246 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3247 values = self._parse_csv(self._parse_conjunction) 3248 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3249 3250 if type_token in self.TIMESTAMPS: 3251 if self._match_text_seq("WITH", "TIME", "ZONE"): 3252 maybe_func = False 3253 tz_type = ( 3254 exp.DataType.Type.TIMETZ 3255 if type_token in self.TIMES 3256 else exp.DataType.Type.TIMESTAMPTZ 3257 ) 3258 this = exp.DataType(this=tz_type, expressions=expressions) 3259 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3260 maybe_func = False 3261 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3262 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3263 maybe_func = False 3264 elif type_token == TokenType.INTERVAL: 3265 if self._match_text_seq("YEAR", "TO", "MONTH"): 3266 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3267 elif self._match_text_seq("DAY", "TO", "SECOND"): 3268 span = [exp.IntervalDayToSecondSpan()] 3269 else: 3270 span = None 3271 3272 unit = not span and self._parse_var() 3273 if not unit: 3274 this = self.expression( 3275 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3276 ) 3277 else: 3278 this = self.expression(exp.Interval, unit=unit) 3279 3280 if maybe_func and check_func: 3281 index2 = self._index 3282 peek = self._parse_string() 3283 3284 if not peek: 3285 self._retreat(index) 3286 return None 3287 3288 self._retreat(index2) 3289 3290 if not this: 3291 this = exp.DataType( 3292 this=exp.DataType.Type[type_token.value], 3293 expressions=expressions, 3294 nested=nested, 3295 values=values, 3296 prefix=prefix, 3297 ) 3298 3299 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3300 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3301 3302 return this 3303 3304 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3305 this = self._parse_type() or self._parse_id_var() 3306 self._match(TokenType.COLON) 3307 return self._parse_column_def(this) 3308 3309 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3310 if not self._match_text_seq("AT", "TIME", "ZONE"): 3311 return this 3312 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3313 3314 def _parse_column(self) -> t.Optional[exp.Expression]: 3315 this = self._parse_field() 3316 if isinstance(this, exp.Identifier): 3317 this = self.expression(exp.Column, this=this) 3318 elif not this: 3319 return self._parse_bracket(this) 3320 return self._parse_column_ops(this) 3321 3322 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3323 this = self._parse_bracket(this) 3324 3325 while self._match_set(self.COLUMN_OPERATORS): 3326 op_token = self._prev.token_type 3327 op = self.COLUMN_OPERATORS.get(op_token) 3328 3329 if op_token == TokenType.DCOLON: 3330 field = self._parse_types() 3331 if not field: 3332 self.raise_error("Expected type") 3333 elif op and self._curr: 3334 self._advance() 3335 value = self._prev.text 3336 field = ( 3337 exp.Literal.number(value) 3338 if self._prev.token_type == TokenType.NUMBER 3339 else exp.Literal.string(value) 3340 ) 3341 else: 3342 field = self._parse_field(anonymous_func=True, any_token=True) 3343 3344 if isinstance(field, exp.Func): 3345 # bigquery allows function calls like x.y.count(...) 3346 # SAFE.SUBSTR(...) 3347 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3348 this = self._replace_columns_with_dots(this) 3349 3350 if op: 3351 this = op(self, this, field) 3352 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3353 this = self.expression( 3354 exp.Column, 3355 this=field, 3356 table=this.this, 3357 db=this.args.get("table"), 3358 catalog=this.args.get("db"), 3359 ) 3360 else: 3361 this = self.expression(exp.Dot, this=this, expression=field) 3362 this = self._parse_bracket(this) 3363 return this 3364 3365 def _parse_primary(self) -> t.Optional[exp.Expression]: 3366 if self._match_set(self.PRIMARY_PARSERS): 3367 token_type = self._prev.token_type 3368 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3369 3370 if token_type == TokenType.STRING: 3371 expressions = [primary] 3372 while self._match(TokenType.STRING): 3373 expressions.append(exp.Literal.string(self._prev.text)) 3374 3375 if len(expressions) > 1: 3376 return self.expression(exp.Concat, expressions=expressions) 3377 3378 return primary 3379 3380 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3381 return exp.Literal.number(f"0.{self._prev.text}") 3382 3383 if self._match(TokenType.L_PAREN): 3384 comments = self._prev_comments 3385 query = self._parse_select() 3386 3387 if query: 3388 expressions = [query] 3389 else: 3390 expressions = self._parse_expressions() 3391 3392 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3393 3394 if isinstance(this, exp.Subqueryable): 3395 this = self._parse_set_operations( 3396 self._parse_subquery(this=this, parse_alias=False) 3397 ) 3398 elif len(expressions) > 1: 3399 this = self.expression(exp.Tuple, expressions=expressions) 3400 else: 3401 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3402 3403 if this: 3404 this.add_comments(comments) 3405 3406 self._match_r_paren(expression=this) 3407 return this 3408 3409 return None 3410 3411 def _parse_field( 3412 self, 3413 any_token: bool = False, 3414 tokens: t.Optional[t.Collection[TokenType]] = None, 3415 anonymous_func: bool = False, 3416 ) -> t.Optional[exp.Expression]: 3417 return ( 3418 self._parse_primary() 3419 or self._parse_function(anonymous=anonymous_func) 3420 or self._parse_id_var(any_token=any_token, tokens=tokens) 3421 ) 3422 3423 def _parse_function( 3424 self, 3425 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3426 anonymous: bool = False, 3427 optional_parens: bool = True, 3428 ) -> t.Optional[exp.Expression]: 3429 if not self._curr: 3430 return None 3431 3432 token_type = self._curr.token_type 3433 this = self._curr.text 3434 upper = this.upper() 3435 3436 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3437 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3438 self._advance() 3439 return parser(self) 3440 3441 if not self._next or self._next.token_type != TokenType.L_PAREN: 3442 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3443 self._advance() 3444 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3445 3446 return None 3447 3448 if token_type not in self.FUNC_TOKENS: 3449 return None 3450 3451 self._advance(2) 3452 3453 parser = self.FUNCTION_PARSERS.get(upper) 3454 if parser and not anonymous: 3455 this = parser(self) 3456 else: 3457 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3458 3459 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3460 this = self.expression(subquery_predicate, this=self._parse_select()) 3461 self._match_r_paren() 3462 return this 3463 3464 if functions is None: 3465 functions = self.FUNCTIONS 3466 3467 function = functions.get(upper) 3468 3469 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3470 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3471 3472 if function and not anonymous: 3473 func = self.validate_expression(function(args), args) 3474 if not self.NORMALIZE_FUNCTIONS: 3475 func.meta["name"] = this 3476 this = func 3477 else: 3478 this = self.expression(exp.Anonymous, this=this, expressions=args) 3479 3480 self._match_r_paren(this) 3481 return self._parse_window(this) 3482 3483 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3484 return self._parse_column_def(self._parse_id_var()) 3485 3486 def _parse_user_defined_function( 3487 self, kind: t.Optional[TokenType] = None 3488 ) -> t.Optional[exp.Expression]: 3489 this = self._parse_id_var() 3490 3491 while self._match(TokenType.DOT): 3492 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3493 3494 if not self._match(TokenType.L_PAREN): 3495 return this 3496 3497 expressions = self._parse_csv(self._parse_function_parameter) 3498 self._match_r_paren() 3499 return self.expression( 3500 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3501 ) 3502 3503 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3504 literal = self._parse_primary() 3505 if literal: 3506 return self.expression(exp.Introducer, this=token.text, expression=literal) 3507 3508 return self.expression(exp.Identifier, this=token.text) 3509 3510 def _parse_session_parameter(self) -> exp.SessionParameter: 3511 kind = None 3512 this = self._parse_id_var() or self._parse_primary() 3513 3514 if this and self._match(TokenType.DOT): 3515 kind = this.name 3516 this = self._parse_var() or self._parse_primary() 3517 3518 return self.expression(exp.SessionParameter, this=this, kind=kind) 3519 3520 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3521 index = self._index 3522 3523 if self._match(TokenType.L_PAREN): 3524 expressions = t.cast( 3525 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3526 ) 3527 3528 if not self._match(TokenType.R_PAREN): 3529 self._retreat(index) 3530 else: 3531 expressions = [self._parse_id_var()] 3532 3533 if self._match_set(self.LAMBDAS): 3534 return self.LAMBDAS[self._prev.token_type](self, expressions) 3535 3536 self._retreat(index) 3537 3538 this: t.Optional[exp.Expression] 3539 3540 if self._match(TokenType.DISTINCT): 3541 this = self.expression( 3542 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3543 ) 3544 else: 3545 this = self._parse_select_or_expression(alias=alias) 3546 3547 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3548 3549 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3550 index = self._index 3551 3552 if not self.errors: 3553 try: 3554 if self._parse_select(nested=True): 3555 return this 3556 except ParseError: 3557 pass 3558 finally: 3559 self.errors.clear() 3560 self._retreat(index) 3561 3562 if not self._match(TokenType.L_PAREN): 3563 return this 3564 3565 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3566 3567 self._match_r_paren() 3568 return self.expression(exp.Schema, this=this, expressions=args) 3569 3570 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3571 return self._parse_column_def(self._parse_field(any_token=True)) 3572 3573 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3574 # column defs are not really columns, they're identifiers 3575 if isinstance(this, exp.Column): 3576 this = this.this 3577 3578 kind = self._parse_types(schema=True) 3579 3580 if self._match_text_seq("FOR", "ORDINALITY"): 3581 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3582 3583 constraints: t.List[exp.Expression] = [] 3584 3585 if not kind and self._match(TokenType.ALIAS): 3586 constraints.append( 3587 self.expression( 3588 exp.ComputedColumnConstraint, 3589 this=self._parse_conjunction(), 3590 persisted=self._match_text_seq("PERSISTED"), 3591 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3592 ) 3593 ) 3594 3595 while True: 3596 constraint = self._parse_column_constraint() 3597 if not constraint: 3598 break 3599 constraints.append(constraint) 3600 3601 if not kind and not constraints: 3602 return this 3603 3604 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3605 3606 def _parse_auto_increment( 3607 self, 3608 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3609 start = None 3610 increment = None 3611 3612 if self._match(TokenType.L_PAREN, advance=False): 3613 args = self._parse_wrapped_csv(self._parse_bitwise) 3614 start = seq_get(args, 0) 3615 increment = seq_get(args, 1) 3616 elif self._match_text_seq("START"): 3617 start = self._parse_bitwise() 3618 self._match_text_seq("INCREMENT") 3619 increment = self._parse_bitwise() 3620 3621 if start and increment: 3622 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3623 3624 return exp.AutoIncrementColumnConstraint() 3625 3626 def _parse_compress(self) -> exp.CompressColumnConstraint: 3627 if self._match(TokenType.L_PAREN, advance=False): 3628 return self.expression( 3629 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3630 ) 3631 3632 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3633 3634 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3635 if self._match_text_seq("BY", "DEFAULT"): 3636 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3637 this = self.expression( 3638 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3639 ) 3640 else: 3641 self._match_text_seq("ALWAYS") 3642 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3643 3644 self._match(TokenType.ALIAS) 3645 identity = self._match_text_seq("IDENTITY") 3646 3647 if self._match(TokenType.L_PAREN): 3648 if self._match(TokenType.START_WITH): 3649 this.set("start", self._parse_bitwise()) 3650 if self._match_text_seq("INCREMENT", "BY"): 3651 this.set("increment", self._parse_bitwise()) 3652 if self._match_text_seq("MINVALUE"): 3653 this.set("minvalue", self._parse_bitwise()) 3654 if self._match_text_seq("MAXVALUE"): 3655 this.set("maxvalue", self._parse_bitwise()) 3656 3657 if self._match_text_seq("CYCLE"): 3658 this.set("cycle", True) 3659 elif self._match_text_seq("NO", "CYCLE"): 3660 this.set("cycle", False) 3661 3662 if not identity: 3663 this.set("expression", self._parse_bitwise()) 3664 3665 self._match_r_paren() 3666 3667 return this 3668 3669 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3670 self._match_text_seq("LENGTH") 3671 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3672 3673 def _parse_not_constraint( 3674 self, 3675 ) -> t.Optional[exp.Expression]: 3676 if self._match_text_seq("NULL"): 3677 return self.expression(exp.NotNullColumnConstraint) 3678 if self._match_text_seq("CASESPECIFIC"): 3679 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3680 if self._match_text_seq("FOR", "REPLICATION"): 3681 return self.expression(exp.NotForReplicationColumnConstraint) 3682 return None 3683 3684 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3685 if self._match(TokenType.CONSTRAINT): 3686 this = self._parse_id_var() 3687 else: 3688 this = None 3689 3690 if self._match_texts(self.CONSTRAINT_PARSERS): 3691 return self.expression( 3692 exp.ColumnConstraint, 3693 this=this, 3694 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3695 ) 3696 3697 return this 3698 3699 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3700 if not self._match(TokenType.CONSTRAINT): 3701 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3702 3703 this = self._parse_id_var() 3704 expressions = [] 3705 3706 while True: 3707 constraint = self._parse_unnamed_constraint() or self._parse_function() 3708 if not constraint: 3709 break 3710 expressions.append(constraint) 3711 3712 return self.expression(exp.Constraint, this=this, expressions=expressions) 3713 3714 def _parse_unnamed_constraint( 3715 self, constraints: t.Optional[t.Collection[str]] = None 3716 ) -> t.Optional[exp.Expression]: 3717 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3718 return None 3719 3720 constraint = self._prev.text.upper() 3721 if constraint not in self.CONSTRAINT_PARSERS: 3722 self.raise_error(f"No parser found for schema constraint {constraint}.") 3723 3724 return self.CONSTRAINT_PARSERS[constraint](self) 3725 3726 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3727 self._match_text_seq("KEY") 3728 return self.expression( 3729 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3730 ) 3731 3732 def _parse_key_constraint_options(self) -> t.List[str]: 3733 options = [] 3734 while True: 3735 if not self._curr: 3736 break 3737 3738 if self._match(TokenType.ON): 3739 action = None 3740 on = self._advance_any() and self._prev.text 3741 3742 if self._match_text_seq("NO", "ACTION"): 3743 action = "NO ACTION" 3744 elif self._match_text_seq("CASCADE"): 3745 action = "CASCADE" 3746 elif self._match_pair(TokenType.SET, TokenType.NULL): 3747 action = "SET NULL" 3748 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3749 action = "SET DEFAULT" 3750 else: 3751 self.raise_error("Invalid key constraint") 3752 3753 options.append(f"ON {on} {action}") 3754 elif self._match_text_seq("NOT", "ENFORCED"): 3755 options.append("NOT ENFORCED") 3756 elif self._match_text_seq("DEFERRABLE"): 3757 options.append("DEFERRABLE") 3758 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3759 options.append("INITIALLY DEFERRED") 3760 elif self._match_text_seq("NORELY"): 3761 options.append("NORELY") 3762 elif self._match_text_seq("MATCH", "FULL"): 3763 options.append("MATCH FULL") 3764 else: 3765 break 3766 3767 return options 3768 3769 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3770 if match and not self._match(TokenType.REFERENCES): 3771 return None 3772 3773 expressions = None 3774 this = self._parse_table(schema=True) 3775 options = self._parse_key_constraint_options() 3776 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3777 3778 def _parse_foreign_key(self) -> exp.ForeignKey: 3779 expressions = self._parse_wrapped_id_vars() 3780 reference = self._parse_references() 3781 options = {} 3782 3783 while self._match(TokenType.ON): 3784 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3785 self.raise_error("Expected DELETE or UPDATE") 3786 3787 kind = self._prev.text.lower() 3788 3789 if self._match_text_seq("NO", "ACTION"): 3790 action = "NO ACTION" 3791 elif self._match(TokenType.SET): 3792 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3793 action = "SET " + self._prev.text.upper() 3794 else: 3795 self._advance() 3796 action = self._prev.text.upper() 3797 3798 options[kind] = action 3799 3800 return self.expression( 3801 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3802 ) 3803 3804 def _parse_primary_key( 3805 self, wrapped_optional: bool = False, in_props: bool = False 3806 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3807 desc = ( 3808 self._match_set((TokenType.ASC, TokenType.DESC)) 3809 and self._prev.token_type == TokenType.DESC 3810 ) 3811 3812 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3813 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3814 3815 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3816 options = self._parse_key_constraint_options() 3817 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3818 3819 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3820 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3821 return this 3822 3823 bracket_kind = self._prev.token_type 3824 3825 if self._match(TokenType.COLON): 3826 expressions: t.List[exp.Expression] = [ 3827 self.expression(exp.Slice, expression=self._parse_conjunction()) 3828 ] 3829 else: 3830 expressions = self._parse_csv( 3831 lambda: self._parse_slice( 3832 self._parse_alias(self._parse_conjunction(), explicit=True) 3833 ) 3834 ) 3835 3836 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3837 if bracket_kind == TokenType.L_BRACE: 3838 this = self.expression(exp.Struct, expressions=expressions) 3839 elif not this or this.name.upper() == "ARRAY": 3840 this = self.expression(exp.Array, expressions=expressions) 3841 else: 3842 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3843 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3844 3845 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3846 self.raise_error("Expected ]") 3847 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3848 self.raise_error("Expected }") 3849 3850 self._add_comments(this) 3851 return self._parse_bracket(this) 3852 3853 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3854 if self._match(TokenType.COLON): 3855 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3856 return this 3857 3858 def _parse_case(self) -> t.Optional[exp.Expression]: 3859 ifs = [] 3860 default = None 3861 3862 comments = self._prev_comments 3863 expression = self._parse_conjunction() 3864 3865 while self._match(TokenType.WHEN): 3866 this = self._parse_conjunction() 3867 self._match(TokenType.THEN) 3868 then = self._parse_conjunction() 3869 ifs.append(self.expression(exp.If, this=this, true=then)) 3870 3871 if self._match(TokenType.ELSE): 3872 default = self._parse_conjunction() 3873 3874 if not self._match(TokenType.END): 3875 self.raise_error("Expected END after CASE", self._prev) 3876 3877 return self._parse_window( 3878 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3879 ) 3880 3881 def _parse_if(self) -> t.Optional[exp.Expression]: 3882 if self._match(TokenType.L_PAREN): 3883 args = self._parse_csv(self._parse_conjunction) 3884 this = self.validate_expression(exp.If.from_arg_list(args), args) 3885 self._match_r_paren() 3886 else: 3887 index = self._index - 1 3888 condition = self._parse_conjunction() 3889 3890 if not condition: 3891 self._retreat(index) 3892 return None 3893 3894 self._match(TokenType.THEN) 3895 true = self._parse_conjunction() 3896 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3897 self._match(TokenType.END) 3898 this = self.expression(exp.If, this=condition, true=true, false=false) 3899 3900 return self._parse_window(this) 3901 3902 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3903 if not self._match_text_seq("VALUE", "FOR"): 3904 self._retreat(self._index - 1) 3905 return None 3906 3907 return self.expression( 3908 exp.NextValueFor, 3909 this=self._parse_column(), 3910 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3911 ) 3912 3913 def _parse_extract(self) -> exp.Extract: 3914 this = self._parse_function() or self._parse_var() or self._parse_type() 3915 3916 if self._match(TokenType.FROM): 3917 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3918 3919 if not self._match(TokenType.COMMA): 3920 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3921 3922 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3923 3924 def _parse_any_value(self) -> exp.AnyValue: 3925 this = self._parse_lambda() 3926 is_max = None 3927 having = None 3928 3929 if self._match(TokenType.HAVING): 3930 self._match_texts(("MAX", "MIN")) 3931 is_max = self._prev.text == "MAX" 3932 having = self._parse_column() 3933 3934 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3935 3936 def _parse_cast(self, strict: bool) -> exp.Expression: 3937 this = self._parse_conjunction() 3938 3939 if not self._match(TokenType.ALIAS): 3940 if self._match(TokenType.COMMA): 3941 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3942 3943 self.raise_error("Expected AS after CAST") 3944 3945 fmt = None 3946 to = self._parse_types() 3947 3948 if not to: 3949 self.raise_error("Expected TYPE after CAST") 3950 elif isinstance(to, exp.Identifier): 3951 to = exp.DataType.build(to.name, udt=True) 3952 elif to.this == exp.DataType.Type.CHAR: 3953 if self._match(TokenType.CHARACTER_SET): 3954 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3955 elif self._match(TokenType.FORMAT): 3956 fmt_string = self._parse_string() 3957 fmt = self._parse_at_time_zone(fmt_string) 3958 3959 if to.this in exp.DataType.TEMPORAL_TYPES: 3960 this = self.expression( 3961 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3962 this=this, 3963 format=exp.Literal.string( 3964 format_time( 3965 fmt_string.this if fmt_string else "", 3966 self.FORMAT_MAPPING or self.TIME_MAPPING, 3967 self.FORMAT_TRIE or self.TIME_TRIE, 3968 ) 3969 ), 3970 ) 3971 3972 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3973 this.set("zone", fmt.args["zone"]) 3974 3975 return this 3976 3977 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3978 3979 def _parse_concat(self) -> t.Optional[exp.Expression]: 3980 args = self._parse_csv(self._parse_conjunction) 3981 if self.CONCAT_NULL_OUTPUTS_STRING: 3982 args = [ 3983 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3984 for arg in args 3985 if arg 3986 ] 3987 3988 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3989 # we find such a call we replace it with its argument. 3990 if len(args) == 1: 3991 return args[0] 3992 3993 return self.expression( 3994 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3995 ) 3996 3997 def _parse_string_agg(self) -> exp.Expression: 3998 if self._match(TokenType.DISTINCT): 3999 args: t.List[t.Optional[exp.Expression]] = [ 4000 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4001 ] 4002 if self._match(TokenType.COMMA): 4003 args.extend(self._parse_csv(self._parse_conjunction)) 4004 else: 4005 args = self._parse_csv(self._parse_conjunction) # type: ignore 4006 4007 index = self._index 4008 if not self._match(TokenType.R_PAREN) and args: 4009 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4010 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4011 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4012 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4013 4014 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4015 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4016 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4017 if not self._match_text_seq("WITHIN", "GROUP"): 4018 self._retreat(index) 4019 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4020 4021 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4022 order = self._parse_order(this=seq_get(args, 0)) 4023 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4024 4025 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4026 this = self._parse_bitwise() 4027 4028 if self._match(TokenType.USING): 4029 to: t.Optional[exp.Expression] = self.expression( 4030 exp.CharacterSet, this=self._parse_var() 4031 ) 4032 elif self._match(TokenType.COMMA): 4033 to = self._parse_types() 4034 else: 4035 to = None 4036 4037 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4038 4039 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4040 """ 4041 There are generally two variants of the DECODE function: 4042 4043 - DECODE(bin, charset) 4044 - DECODE(expression, search, result [, search, result] ... [, default]) 4045 4046 The second variant will always be parsed into a CASE expression. Note that NULL 4047 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4048 instead of relying on pattern matching. 4049 """ 4050 args = self._parse_csv(self._parse_conjunction) 4051 4052 if len(args) < 3: 4053 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4054 4055 expression, *expressions = args 4056 if not expression: 4057 return None 4058 4059 ifs = [] 4060 for search, result in zip(expressions[::2], expressions[1::2]): 4061 if not search or not result: 4062 return None 4063 4064 if isinstance(search, exp.Literal): 4065 ifs.append( 4066 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4067 ) 4068 elif isinstance(search, exp.Null): 4069 ifs.append( 4070 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4071 ) 4072 else: 4073 cond = exp.or_( 4074 exp.EQ(this=expression.copy(), expression=search), 4075 exp.and_( 4076 exp.Is(this=expression.copy(), expression=exp.Null()), 4077 exp.Is(this=search.copy(), expression=exp.Null()), 4078 copy=False, 4079 ), 4080 copy=False, 4081 ) 4082 ifs.append(exp.If(this=cond, true=result)) 4083 4084 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4085 4086 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4087 self._match_text_seq("KEY") 4088 key = self._parse_field() 4089 self._match(TokenType.COLON) 4090 self._match_text_seq("VALUE") 4091 value = self._parse_field() 4092 4093 if not key and not value: 4094 return None 4095 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4096 4097 def _parse_json_object(self) -> exp.JSONObject: 4098 star = self._parse_star() 4099 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4100 4101 null_handling = None 4102 if self._match_text_seq("NULL", "ON", "NULL"): 4103 null_handling = "NULL ON NULL" 4104 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4105 null_handling = "ABSENT ON NULL" 4106 4107 unique_keys = None 4108 if self._match_text_seq("WITH", "UNIQUE"): 4109 unique_keys = True 4110 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4111 unique_keys = False 4112 4113 self._match_text_seq("KEYS") 4114 4115 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4116 format_json = self._match_text_seq("FORMAT", "JSON") 4117 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4118 4119 return self.expression( 4120 exp.JSONObject, 4121 expressions=expressions, 4122 null_handling=null_handling, 4123 unique_keys=unique_keys, 4124 return_type=return_type, 4125 format_json=format_json, 4126 encoding=encoding, 4127 ) 4128 4129 def _parse_logarithm(self) -> exp.Func: 4130 # Default argument order is base, expression 4131 args = self._parse_csv(self._parse_range) 4132 4133 if len(args) > 1: 4134 if not self.LOG_BASE_FIRST: 4135 args.reverse() 4136 return exp.Log.from_arg_list(args) 4137 4138 return self.expression( 4139 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4140 ) 4141 4142 def _parse_match_against(self) -> exp.MatchAgainst: 4143 expressions = self._parse_csv(self._parse_column) 4144 4145 self._match_text_seq(")", "AGAINST", "(") 4146 4147 this = self._parse_string() 4148 4149 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4150 modifier = "IN NATURAL LANGUAGE MODE" 4151 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4152 modifier = f"{modifier} WITH QUERY EXPANSION" 4153 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4154 modifier = "IN BOOLEAN MODE" 4155 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4156 modifier = "WITH QUERY EXPANSION" 4157 else: 4158 modifier = None 4159 4160 return self.expression( 4161 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4162 ) 4163 4164 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4165 def _parse_open_json(self) -> exp.OpenJSON: 4166 this = self._parse_bitwise() 4167 path = self._match(TokenType.COMMA) and self._parse_string() 4168 4169 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4170 this = self._parse_field(any_token=True) 4171 kind = self._parse_types() 4172 path = self._parse_string() 4173 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4174 4175 return self.expression( 4176 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4177 ) 4178 4179 expressions = None 4180 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4181 self._match_l_paren() 4182 expressions = self._parse_csv(_parse_open_json_column_def) 4183 4184 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4185 4186 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4187 args = self._parse_csv(self._parse_bitwise) 4188 4189 if self._match(TokenType.IN): 4190 return self.expression( 4191 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4192 ) 4193 4194 if haystack_first: 4195 haystack = seq_get(args, 0) 4196 needle = seq_get(args, 1) 4197 else: 4198 needle = seq_get(args, 0) 4199 haystack = seq_get(args, 1) 4200 4201 return self.expression( 4202 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4203 ) 4204 4205 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4206 args = self._parse_csv(self._parse_table) 4207 return exp.JoinHint(this=func_name.upper(), expressions=args) 4208 4209 def _parse_substring(self) -> exp.Substring: 4210 # Postgres supports the form: substring(string [from int] [for int]) 4211 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4212 4213 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4214 4215 if self._match(TokenType.FROM): 4216 args.append(self._parse_bitwise()) 4217 if self._match(TokenType.FOR): 4218 args.append(self._parse_bitwise()) 4219 4220 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4221 4222 def _parse_trim(self) -> exp.Trim: 4223 # https://www.w3resource.com/sql/character-functions/trim.php 4224 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4225 4226 position = None 4227 collation = None 4228 4229 if self._match_texts(self.TRIM_TYPES): 4230 position = self._prev.text.upper() 4231 4232 expression = self._parse_bitwise() 4233 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4234 this = self._parse_bitwise() 4235 else: 4236 this = expression 4237 expression = None 4238 4239 if self._match(TokenType.COLLATE): 4240 collation = self._parse_bitwise() 4241 4242 return self.expression( 4243 exp.Trim, this=this, position=position, expression=expression, collation=collation 4244 ) 4245 4246 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4247 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4248 4249 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4250 return self._parse_window(self._parse_id_var(), alias=True) 4251 4252 def _parse_respect_or_ignore_nulls( 4253 self, this: t.Optional[exp.Expression] 4254 ) -> t.Optional[exp.Expression]: 4255 if self._match_text_seq("IGNORE", "NULLS"): 4256 return self.expression(exp.IgnoreNulls, this=this) 4257 if self._match_text_seq("RESPECT", "NULLS"): 4258 return self.expression(exp.RespectNulls, this=this) 4259 return this 4260 4261 def _parse_window( 4262 self, this: t.Optional[exp.Expression], alias: bool = False 4263 ) -> t.Optional[exp.Expression]: 4264 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4265 self._match(TokenType.WHERE) 4266 this = self.expression( 4267 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4268 ) 4269 self._match_r_paren() 4270 4271 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4272 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4273 if self._match_text_seq("WITHIN", "GROUP"): 4274 order = self._parse_wrapped(self._parse_order) 4275 this = self.expression(exp.WithinGroup, this=this, expression=order) 4276 4277 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4278 # Some dialects choose to implement and some do not. 4279 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4280 4281 # There is some code above in _parse_lambda that handles 4282 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4283 4284 # The below changes handle 4285 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4286 4287 # Oracle allows both formats 4288 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4289 # and Snowflake chose to do the same for familiarity 4290 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4291 this = self._parse_respect_or_ignore_nulls(this) 4292 4293 # bigquery select from window x AS (partition by ...) 4294 if alias: 4295 over = None 4296 self._match(TokenType.ALIAS) 4297 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4298 return this 4299 else: 4300 over = self._prev.text.upper() 4301 4302 if not self._match(TokenType.L_PAREN): 4303 return self.expression( 4304 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4305 ) 4306 4307 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4308 4309 first = self._match(TokenType.FIRST) 4310 if self._match_text_seq("LAST"): 4311 first = False 4312 4313 partition, order = self._parse_partition_and_order() 4314 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4315 4316 if kind: 4317 self._match(TokenType.BETWEEN) 4318 start = self._parse_window_spec() 4319 self._match(TokenType.AND) 4320 end = self._parse_window_spec() 4321 4322 spec = self.expression( 4323 exp.WindowSpec, 4324 kind=kind, 4325 start=start["value"], 4326 start_side=start["side"], 4327 end=end["value"], 4328 end_side=end["side"], 4329 ) 4330 else: 4331 spec = None 4332 4333 self._match_r_paren() 4334 4335 window = self.expression( 4336 exp.Window, 4337 this=this, 4338 partition_by=partition, 4339 order=order, 4340 spec=spec, 4341 alias=window_alias, 4342 over=over, 4343 first=first, 4344 ) 4345 4346 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4347 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4348 return self._parse_window(window, alias=alias) 4349 4350 return window 4351 4352 def _parse_partition_and_order( 4353 self, 4354 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4355 return self._parse_partition_by(), self._parse_order() 4356 4357 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4358 self._match(TokenType.BETWEEN) 4359 4360 return { 4361 "value": ( 4362 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4363 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4364 or self._parse_bitwise() 4365 ), 4366 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4367 } 4368 4369 def _parse_alias( 4370 self, this: t.Optional[exp.Expression], explicit: bool = False 4371 ) -> t.Optional[exp.Expression]: 4372 any_token = self._match(TokenType.ALIAS) 4373 4374 if explicit and not any_token: 4375 return this 4376 4377 if self._match(TokenType.L_PAREN): 4378 aliases = self.expression( 4379 exp.Aliases, 4380 this=this, 4381 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4382 ) 4383 self._match_r_paren(aliases) 4384 return aliases 4385 4386 alias = self._parse_id_var(any_token) 4387 4388 if alias: 4389 return self.expression(exp.Alias, this=this, alias=alias) 4390 4391 return this 4392 4393 def _parse_id_var( 4394 self, 4395 any_token: bool = True, 4396 tokens: t.Optional[t.Collection[TokenType]] = None, 4397 ) -> t.Optional[exp.Expression]: 4398 identifier = self._parse_identifier() 4399 4400 if identifier: 4401 return identifier 4402 4403 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4404 quoted = self._prev.token_type == TokenType.STRING 4405 return exp.Identifier(this=self._prev.text, quoted=quoted) 4406 4407 return None 4408 4409 def _parse_string(self) -> t.Optional[exp.Expression]: 4410 if self._match(TokenType.STRING): 4411 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4412 return self._parse_placeholder() 4413 4414 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4415 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4416 4417 def _parse_number(self) -> t.Optional[exp.Expression]: 4418 if self._match(TokenType.NUMBER): 4419 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4420 return self._parse_placeholder() 4421 4422 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4423 if self._match(TokenType.IDENTIFIER): 4424 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4425 return self._parse_placeholder() 4426 4427 def _parse_var( 4428 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4429 ) -> t.Optional[exp.Expression]: 4430 if ( 4431 (any_token and self._advance_any()) 4432 or self._match(TokenType.VAR) 4433 or (self._match_set(tokens) if tokens else False) 4434 ): 4435 return self.expression(exp.Var, this=self._prev.text) 4436 return self._parse_placeholder() 4437 4438 def _advance_any(self) -> t.Optional[Token]: 4439 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4440 self._advance() 4441 return self._prev 4442 return None 4443 4444 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4445 return self._parse_var() or self._parse_string() 4446 4447 def _parse_null(self) -> t.Optional[exp.Expression]: 4448 if self._match(TokenType.NULL): 4449 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4450 return self._parse_placeholder() 4451 4452 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4453 if self._match(TokenType.TRUE): 4454 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4455 if self._match(TokenType.FALSE): 4456 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4457 return self._parse_placeholder() 4458 4459 def _parse_star(self) -> t.Optional[exp.Expression]: 4460 if self._match(TokenType.STAR): 4461 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4462 return self._parse_placeholder() 4463 4464 def _parse_parameter(self) -> exp.Parameter: 4465 wrapped = self._match(TokenType.L_BRACE) 4466 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4467 self._match(TokenType.R_BRACE) 4468 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4469 4470 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4471 if self._match_set(self.PLACEHOLDER_PARSERS): 4472 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4473 if placeholder: 4474 return placeholder 4475 self._advance(-1) 4476 return None 4477 4478 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4479 if not self._match(TokenType.EXCEPT): 4480 return None 4481 if self._match(TokenType.L_PAREN, advance=False): 4482 return self._parse_wrapped_csv(self._parse_column) 4483 return self._parse_csv(self._parse_column) 4484 4485 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4486 if not self._match(TokenType.REPLACE): 4487 return None 4488 if self._match(TokenType.L_PAREN, advance=False): 4489 return self._parse_wrapped_csv(self._parse_expression) 4490 return self._parse_expressions() 4491 4492 def _parse_csv( 4493 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4494 ) -> t.List[exp.Expression]: 4495 parse_result = parse_method() 4496 items = [parse_result] if parse_result is not None else [] 4497 4498 while self._match(sep): 4499 self._add_comments(parse_result) 4500 parse_result = parse_method() 4501 if parse_result is not None: 4502 items.append(parse_result) 4503 4504 return items 4505 4506 def _parse_tokens( 4507 self, parse_method: t.Callable, expressions: t.Dict 4508 ) -> t.Optional[exp.Expression]: 4509 this = parse_method() 4510 4511 while self._match_set(expressions): 4512 this = self.expression( 4513 expressions[self._prev.token_type], 4514 this=this, 4515 comments=self._prev_comments, 4516 expression=parse_method(), 4517 ) 4518 4519 return this 4520 4521 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4522 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4523 4524 def _parse_wrapped_csv( 4525 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4526 ) -> t.List[exp.Expression]: 4527 return self._parse_wrapped( 4528 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4529 ) 4530 4531 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4532 wrapped = self._match(TokenType.L_PAREN) 4533 if not wrapped and not optional: 4534 self.raise_error("Expecting (") 4535 parse_result = parse_method() 4536 if wrapped: 4537 self._match_r_paren() 4538 return parse_result 4539 4540 def _parse_expressions(self) -> t.List[exp.Expression]: 4541 return self._parse_csv(self._parse_expression) 4542 4543 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4544 return self._parse_select() or self._parse_set_operations( 4545 self._parse_expression() if alias else self._parse_conjunction() 4546 ) 4547 4548 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4549 return self._parse_query_modifiers( 4550 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4551 ) 4552 4553 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4554 this = None 4555 if self._match_texts(self.TRANSACTION_KIND): 4556 this = self._prev.text 4557 4558 self._match_texts({"TRANSACTION", "WORK"}) 4559 4560 modes = [] 4561 while True: 4562 mode = [] 4563 while self._match(TokenType.VAR): 4564 mode.append(self._prev.text) 4565 4566 if mode: 4567 modes.append(" ".join(mode)) 4568 if not self._match(TokenType.COMMA): 4569 break 4570 4571 return self.expression(exp.Transaction, this=this, modes=modes) 4572 4573 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4574 chain = None 4575 savepoint = None 4576 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4577 4578 self._match_texts({"TRANSACTION", "WORK"}) 4579 4580 if self._match_text_seq("TO"): 4581 self._match_text_seq("SAVEPOINT") 4582 savepoint = self._parse_id_var() 4583 4584 if self._match(TokenType.AND): 4585 chain = not self._match_text_seq("NO") 4586 self._match_text_seq("CHAIN") 4587 4588 if is_rollback: 4589 return self.expression(exp.Rollback, savepoint=savepoint) 4590 4591 return self.expression(exp.Commit, chain=chain) 4592 4593 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4594 if not self._match_text_seq("ADD"): 4595 return None 4596 4597 self._match(TokenType.COLUMN) 4598 exists_column = self._parse_exists(not_=True) 4599 expression = self._parse_field_def() 4600 4601 if expression: 4602 expression.set("exists", exists_column) 4603 4604 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4605 if self._match_texts(("FIRST", "AFTER")): 4606 position = self._prev.text 4607 column_position = self.expression( 4608 exp.ColumnPosition, this=self._parse_column(), position=position 4609 ) 4610 expression.set("position", column_position) 4611 4612 return expression 4613 4614 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4615 drop = self._match(TokenType.DROP) and self._parse_drop() 4616 if drop and not isinstance(drop, exp.Command): 4617 drop.set("kind", drop.args.get("kind", "COLUMN")) 4618 return drop 4619 4620 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4621 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4622 return self.expression( 4623 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4624 ) 4625 4626 def _parse_add_constraint(self) -> exp.AddConstraint: 4627 this = None 4628 kind = self._prev.token_type 4629 4630 if kind == TokenType.CONSTRAINT: 4631 this = self._parse_id_var() 4632 4633 if self._match_text_seq("CHECK"): 4634 expression = self._parse_wrapped(self._parse_conjunction) 4635 enforced = self._match_text_seq("ENFORCED") 4636 4637 return self.expression( 4638 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4639 ) 4640 4641 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4642 expression = self._parse_foreign_key() 4643 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4644 expression = self._parse_primary_key() 4645 else: 4646 expression = None 4647 4648 return self.expression(exp.AddConstraint, this=this, expression=expression) 4649 4650 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4651 index = self._index - 1 4652 4653 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4654 return self._parse_csv(self._parse_add_constraint) 4655 4656 self._retreat(index) 4657 return self._parse_csv(self._parse_add_column) 4658 4659 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4660 self._match(TokenType.COLUMN) 4661 column = self._parse_field(any_token=True) 4662 4663 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4664 return self.expression(exp.AlterColumn, this=column, drop=True) 4665 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4666 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4667 4668 self._match_text_seq("SET", "DATA") 4669 return self.expression( 4670 exp.AlterColumn, 4671 this=column, 4672 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4673 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4674 using=self._match(TokenType.USING) and self._parse_conjunction(), 4675 ) 4676 4677 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4678 index = self._index - 1 4679 4680 partition_exists = self._parse_exists() 4681 if self._match(TokenType.PARTITION, advance=False): 4682 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4683 4684 self._retreat(index) 4685 return self._parse_csv(self._parse_drop_column) 4686 4687 def _parse_alter_table_rename(self) -> exp.RenameTable: 4688 self._match_text_seq("TO") 4689 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4690 4691 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4692 start = self._prev 4693 4694 if not self._match(TokenType.TABLE): 4695 return self._parse_as_command(start) 4696 4697 exists = self._parse_exists() 4698 this = self._parse_table(schema=True) 4699 4700 if self._next: 4701 self._advance() 4702 4703 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4704 if parser: 4705 actions = ensure_list(parser(self)) 4706 4707 if not self._curr: 4708 return self.expression( 4709 exp.AlterTable, 4710 this=this, 4711 exists=exists, 4712 actions=actions, 4713 ) 4714 return self._parse_as_command(start) 4715 4716 def _parse_merge(self) -> exp.Merge: 4717 self._match(TokenType.INTO) 4718 target = self._parse_table() 4719 4720 if target and self._match(TokenType.ALIAS, advance=False): 4721 target.set("alias", self._parse_table_alias()) 4722 4723 self._match(TokenType.USING) 4724 using = self._parse_table() 4725 4726 self._match(TokenType.ON) 4727 on = self._parse_conjunction() 4728 4729 whens = [] 4730 while self._match(TokenType.WHEN): 4731 matched = not self._match(TokenType.NOT) 4732 self._match_text_seq("MATCHED") 4733 source = ( 4734 False 4735 if self._match_text_seq("BY", "TARGET") 4736 else self._match_text_seq("BY", "SOURCE") 4737 ) 4738 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4739 4740 self._match(TokenType.THEN) 4741 4742 if self._match(TokenType.INSERT): 4743 _this = self._parse_star() 4744 if _this: 4745 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4746 else: 4747 then = self.expression( 4748 exp.Insert, 4749 this=self._parse_value(), 4750 expression=self._match(TokenType.VALUES) and self._parse_value(), 4751 ) 4752 elif self._match(TokenType.UPDATE): 4753 expressions = self._parse_star() 4754 if expressions: 4755 then = self.expression(exp.Update, expressions=expressions) 4756 else: 4757 then = self.expression( 4758 exp.Update, 4759 expressions=self._match(TokenType.SET) 4760 and self._parse_csv(self._parse_equality), 4761 ) 4762 elif self._match(TokenType.DELETE): 4763 then = self.expression(exp.Var, this=self._prev.text) 4764 else: 4765 then = None 4766 4767 whens.append( 4768 self.expression( 4769 exp.When, 4770 matched=matched, 4771 source=source, 4772 condition=condition, 4773 then=then, 4774 ) 4775 ) 4776 4777 return self.expression( 4778 exp.Merge, 4779 this=target, 4780 using=using, 4781 on=on, 4782 expressions=whens, 4783 ) 4784 4785 def _parse_show(self) -> t.Optional[exp.Expression]: 4786 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4787 if parser: 4788 return parser(self) 4789 self._advance() 4790 return self.expression(exp.Show, this=self._prev.text.upper()) 4791 4792 def _parse_set_item_assignment( 4793 self, kind: t.Optional[str] = None 4794 ) -> t.Optional[exp.Expression]: 4795 index = self._index 4796 4797 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4798 return self._parse_set_transaction(global_=kind == "GLOBAL") 4799 4800 left = self._parse_primary() or self._parse_id_var() 4801 4802 if not self._match_texts(("=", "TO")): 4803 self._retreat(index) 4804 return None 4805 4806 right = self._parse_statement() or self._parse_id_var() 4807 this = self.expression(exp.EQ, this=left, expression=right) 4808 4809 return self.expression(exp.SetItem, this=this, kind=kind) 4810 4811 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4812 self._match_text_seq("TRANSACTION") 4813 characteristics = self._parse_csv( 4814 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4815 ) 4816 return self.expression( 4817 exp.SetItem, 4818 expressions=characteristics, 4819 kind="TRANSACTION", 4820 **{"global": global_}, # type: ignore 4821 ) 4822 4823 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4824 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4825 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4826 4827 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4828 index = self._index 4829 set_ = self.expression( 4830 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4831 ) 4832 4833 if self._curr: 4834 self._retreat(index) 4835 return self._parse_as_command(self._prev) 4836 4837 return set_ 4838 4839 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4840 for option in options: 4841 if self._match_text_seq(*option.split(" ")): 4842 return exp.var(option) 4843 return None 4844 4845 def _parse_as_command(self, start: Token) -> exp.Command: 4846 while self._curr: 4847 self._advance() 4848 text = self._find_sql(start, self._prev) 4849 size = len(start.text) 4850 return exp.Command(this=text[:size], expression=text[size:]) 4851 4852 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4853 settings = [] 4854 4855 self._match_l_paren() 4856 kind = self._parse_id_var() 4857 4858 if self._match(TokenType.L_PAREN): 4859 while True: 4860 key = self._parse_id_var() 4861 value = self._parse_primary() 4862 4863 if not key and value is None: 4864 break 4865 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4866 self._match(TokenType.R_PAREN) 4867 4868 self._match_r_paren() 4869 4870 return self.expression( 4871 exp.DictProperty, 4872 this=this, 4873 kind=kind.this if kind else None, 4874 settings=settings, 4875 ) 4876 4877 def _parse_dict_range(self, this: str) -> exp.DictRange: 4878 self._match_l_paren() 4879 has_min = self._match_text_seq("MIN") 4880 if has_min: 4881 min = self._parse_var() or self._parse_primary() 4882 self._match_text_seq("MAX") 4883 max = self._parse_var() or self._parse_primary() 4884 else: 4885 max = self._parse_var() or self._parse_primary() 4886 min = exp.Literal.number(0) 4887 self._match_r_paren() 4888 return self.expression(exp.DictRange, this=this, min=min, max=max) 4889 4890 def _find_parser( 4891 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4892 ) -> t.Optional[t.Callable]: 4893 if not self._curr: 4894 return None 4895 4896 index = self._index 4897 this = [] 4898 while True: 4899 # The current token might be multiple words 4900 curr = self._curr.text.upper() 4901 key = curr.split(" ") 4902 this.append(curr) 4903 4904 self._advance() 4905 result, trie = in_trie(trie, key) 4906 if result == TrieResult.FAILED: 4907 break 4908 4909 if result == TrieResult.EXISTS: 4910 subparser = parsers[" ".join(this)] 4911 return subparser 4912 4913 self._retreat(index) 4914 return None 4915 4916 def _match(self, token_type, advance=True, expression=None): 4917 if not self._curr: 4918 return None 4919 4920 if self._curr.token_type == token_type: 4921 if advance: 4922 self._advance() 4923 self._add_comments(expression) 4924 return True 4925 4926 return None 4927 4928 def _match_set(self, types, advance=True): 4929 if not self._curr: 4930 return None 4931 4932 if self._curr.token_type in types: 4933 if advance: 4934 self._advance() 4935 return True 4936 4937 return None 4938 4939 def _match_pair(self, token_type_a, token_type_b, advance=True): 4940 if not self._curr or not self._next: 4941 return None 4942 4943 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4944 if advance: 4945 self._advance(2) 4946 return True 4947 4948 return None 4949 4950 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4951 if not self._match(TokenType.L_PAREN, expression=expression): 4952 self.raise_error("Expecting (") 4953 4954 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4955 if not self._match(TokenType.R_PAREN, expression=expression): 4956 self.raise_error("Expecting )") 4957 4958 def _match_texts(self, texts, advance=True): 4959 if self._curr and self._curr.text.upper() in texts: 4960 if advance: 4961 self._advance() 4962 return True 4963 return False 4964 4965 def _match_text_seq(self, *texts, advance=True): 4966 index = self._index 4967 for text in texts: 4968 if self._curr and self._curr.text.upper() == text: 4969 self._advance() 4970 else: 4971 self._retreat(index) 4972 return False 4973 4974 if not advance: 4975 self._retreat(index) 4976 4977 return True 4978 4979 @t.overload 4980 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4981 ... 4982 4983 @t.overload 4984 def _replace_columns_with_dots( 4985 self, this: t.Optional[exp.Expression] 4986 ) -> t.Optional[exp.Expression]: 4987 ... 4988 4989 def _replace_columns_with_dots(self, this): 4990 if isinstance(this, exp.Dot): 4991 exp.replace_children(this, self._replace_columns_with_dots) 4992 elif isinstance(this, exp.Column): 4993 exp.replace_children(this, self._replace_columns_with_dots) 4994 table = this.args.get("table") 4995 this = ( 4996 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4997 ) 4998 4999 return this 5000 5001 def _replace_lambda( 5002 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5003 ) -> t.Optional[exp.Expression]: 5004 if not node: 5005 return node 5006 5007 for column in node.find_all(exp.Column): 5008 if column.parts[0].name in lambda_variables: 5009 dot_or_id = column.to_dot() if column.table else column.this 5010 parent = column.parent 5011 5012 while isinstance(parent, exp.Dot): 5013 if not isinstance(parent.parent, exp.Dot): 5014 parent.replace(dot_or_id) 5015 break 5016 parent = parent.parent 5017 else: 5018 if column is node: 5019 node = dot_or_id 5020 else: 5021 column.replace(dot_or_id) 5022 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
888 def __init__( 889 self, 890 error_level: t.Optional[ErrorLevel] = None, 891 error_message_context: int = 100, 892 max_errors: int = 3, 893 ): 894 self.error_level = error_level or ErrorLevel.IMMEDIATE 895 self.error_message_context = error_message_context 896 self.max_errors = max_errors 897 self._tokenizer = self.TOKENIZER_CLASS() 898 self.reset()
910 def parse( 911 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 912 ) -> t.List[t.Optional[exp.Expression]]: 913 """ 914 Parses a list of tokens and returns a list of syntax trees, one tree 915 per parsed SQL statement. 916 917 Args: 918 raw_tokens: The list of tokens. 919 sql: The original SQL string, used to produce helpful debug messages. 920 921 Returns: 922 The list of the produced syntax trees. 923 """ 924 return self._parse( 925 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 926 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
928 def parse_into( 929 self, 930 expression_types: exp.IntoType, 931 raw_tokens: t.List[Token], 932 sql: t.Optional[str] = None, 933 ) -> t.List[t.Optional[exp.Expression]]: 934 """ 935 Parses a list of tokens into a given Expression type. If a collection of Expression 936 types is given instead, this method will try to parse the token list into each one 937 of them, stopping at the first for which the parsing succeeds. 938 939 Args: 940 expression_types: The expression type(s) to try and parse the token list into. 941 raw_tokens: The list of tokens. 942 sql: The original SQL string, used to produce helpful debug messages. 943 944 Returns: 945 The target Expression. 946 """ 947 errors = [] 948 for expression_type in ensure_list(expression_types): 949 parser = self.EXPRESSION_PARSERS.get(expression_type) 950 if not parser: 951 raise TypeError(f"No parser registered for {expression_type}") 952 953 try: 954 return self._parse(parser, raw_tokens, sql) 955 except ParseError as e: 956 e.errors[0]["into_expression"] = expression_type 957 errors.append(e) 958 959 raise ParseError( 960 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 961 errors=merge_errors(errors), 962 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
999 def check_errors(self) -> None: 1000 """Logs or raises any found errors, depending on the chosen error level setting.""" 1001 if self.error_level == ErrorLevel.WARN: 1002 for error in self.errors: 1003 logger.error(str(error)) 1004 elif self.error_level == ErrorLevel.RAISE and self.errors: 1005 raise ParseError( 1006 concat_messages(self.errors, self.max_errors), 1007 errors=merge_errors(self.errors), 1008 )
Logs or raises any found errors, depending on the chosen error level setting.
1010 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1011 """ 1012 Appends an error in the list of recorded errors or raises it, depending on the chosen 1013 error level setting. 1014 """ 1015 token = token or self._curr or self._prev or Token.string("") 1016 start = token.start 1017 end = token.end + 1 1018 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1019 highlight = self.sql[start:end] 1020 end_context = self.sql[end : end + self.error_message_context] 1021 1022 error = ParseError.new( 1023 f"{message}. Line {token.line}, Col: {token.col}.\n" 1024 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1025 description=message, 1026 line=token.line, 1027 col=token.col, 1028 start_context=start_context, 1029 highlight=highlight, 1030 end_context=end_context, 1031 ) 1032 1033 if self.error_level == ErrorLevel.IMMEDIATE: 1034 raise error 1035 1036 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1038 def expression( 1039 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1040 ) -> E: 1041 """ 1042 Creates a new, validated Expression. 1043 1044 Args: 1045 exp_class: The expression class to instantiate. 1046 comments: An optional list of comments to attach to the expression. 1047 kwargs: The arguments to set for the expression along with their respective values. 1048 1049 Returns: 1050 The target expression. 1051 """ 1052 instance = exp_class(**kwargs) 1053 instance.add_comments(comments) if comments else self._add_comments(instance) 1054 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1061 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1062 """ 1063 Validates an Expression, making sure that all its mandatory arguments are set. 1064 1065 Args: 1066 expression: The expression to validate. 1067 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1068 1069 Returns: 1070 The validated expression. 1071 """ 1072 if self.error_level != ErrorLevel.IGNORE: 1073 for error_message in expression.error_messages(args): 1074 self.raise_error(error_message) 1075 1076 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.