sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 is_parse_json, 13 left_to_substring_sql, 14 locate_to_strposition, 15 max_or_greatest, 16 min_or_least, 17 no_ilike_sql, 18 no_recursive_cte_sql, 19 no_safe_divide_sql, 20 no_trycast_sql, 21 regexp_extract_sql, 22 regexp_replace_sql, 23 rename_func, 24 right_to_substring_sql, 25 strposition_to_locate_sql, 26 struct_extract_sql, 27 time_format, 28 timestrtotime_sql, 29 var_map_sql, 30) 31from sqlglot.helper import seq_get 32from sqlglot.parser import parse_var_map 33from sqlglot.tokens import TokenType 34 35# (FuncType, Multiplier) 36DATE_DELTA_INTERVAL = { 37 "YEAR": ("ADD_MONTHS", 12), 38 "MONTH": ("ADD_MONTHS", 1), 39 "QUARTER": ("ADD_MONTHS", 3), 40 "WEEK": ("DATE_ADD", 7), 41 "DAY": ("DATE_ADD", 1), 42} 43 44TIME_DIFF_FACTOR = { 45 "MILLISECOND": " * 1000", 46 "SECOND": "", 47 "MINUTE": " / 60", 48 "HOUR": " / 3600", 49} 50 51DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 52 53 54def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 55 unit = expression.text("unit").upper() 56 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 57 58 if isinstance(expression, exp.DateSub): 59 multiplier *= -1 60 61 if expression.expression.is_number: 62 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 63 else: 64 modified_increment = expression.expression.copy() 65 if multiplier != 1: 66 modified_increment = exp.Mul( # type: ignore 67 this=modified_increment, expression=exp.Literal.number(multiplier) 68 ) 69 70 return self.func(func, expression.this, modified_increment) 71 72 73def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 74 unit = expression.text("unit").upper() 75 76 factor = TIME_DIFF_FACTOR.get(unit) 77 if factor is not None: 78 left = self.sql(expression, "this") 79 right = self.sql(expression, "expression") 80 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 81 return f"({sec_diff}){factor}" if factor else sec_diff 82 83 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 84 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 85 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 86 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 87 88 return f"{diff_sql}{multiplier_sql}" 89 90 91def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 92 this = expression.this 93 if is_parse_json(this) and this.this.is_string: 94 # Since FROM_JSON requires a nested type, we always wrap the json string with 95 # an array to ensure that "naked" strings like "'a'" will be handled correctly 96 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 97 98 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 99 to_json = self.func("TO_JSON", from_json) 100 101 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 102 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 103 104 return self.func("TO_JSON", this, expression.args.get("options")) 105 106 107def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 108 if expression.expression: 109 self.unsupported("Hive SORT_ARRAY does not support a comparator") 110 return f"SORT_ARRAY({self.sql(expression, 'this')})" 111 112 113def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 114 return f"'{expression.name}'={self.sql(expression, 'value')}" 115 116 117def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 118 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 119 120 121def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 122 this = self.sql(expression, "this") 123 time_format = self.format_time(expression) 124 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 125 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 126 return f"CAST({this} AS DATE)" 127 128 129def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 130 this = self.sql(expression, "this") 131 time_format = self.format_time(expression) 132 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 133 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 134 return f"CAST({this} AS TIMESTAMP)" 135 136 137def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 138 this = self.sql(expression, "this") 139 time_format = self.format_time(expression) 140 return f"DATE_FORMAT({this}, {time_format})" 141 142 143def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 144 this = self.sql(expression, "this") 145 time_format = self.format_time(expression) 146 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 147 return f"TO_DATE({this}, {time_format})" 148 return f"TO_DATE({this})" 149 150 151class Hive(Dialect): 152 ALIAS_POST_TABLESAMPLE = True 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 SUPPORTS_USER_DEFINED_TYPES = False 155 156 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 157 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 158 159 TIME_MAPPING = { 160 "y": "%Y", 161 "Y": "%Y", 162 "YYYY": "%Y", 163 "yyyy": "%Y", 164 "YY": "%y", 165 "yy": "%y", 166 "MMMM": "%B", 167 "MMM": "%b", 168 "MM": "%m", 169 "M": "%-m", 170 "dd": "%d", 171 "d": "%-d", 172 "HH": "%H", 173 "H": "%-H", 174 "hh": "%I", 175 "h": "%-I", 176 "mm": "%M", 177 "m": "%-M", 178 "ss": "%S", 179 "s": "%-S", 180 "SSSSSS": "%f", 181 "a": "%p", 182 "DD": "%j", 183 "D": "%-j", 184 "E": "%a", 185 "EE": "%a", 186 "EEE": "%a", 187 "EEEE": "%A", 188 } 189 190 DATE_FORMAT = "'yyyy-MM-dd'" 191 DATEINT_FORMAT = "'yyyyMMdd'" 192 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 193 194 class Tokenizer(tokens.Tokenizer): 195 QUOTES = ["'", '"'] 196 IDENTIFIERS = ["`"] 197 STRING_ESCAPES = ["\\"] 198 ENCODE = "utf-8" 199 200 KEYWORDS = { 201 **tokens.Tokenizer.KEYWORDS, 202 "ADD ARCHIVE": TokenType.COMMAND, 203 "ADD ARCHIVES": TokenType.COMMAND, 204 "ADD FILE": TokenType.COMMAND, 205 "ADD FILES": TokenType.COMMAND, 206 "ADD JAR": TokenType.COMMAND, 207 "ADD JARS": TokenType.COMMAND, 208 "MSCK REPAIR": TokenType.COMMAND, 209 "REFRESH": TokenType.COMMAND, 210 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 211 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 212 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 213 } 214 215 NUMERIC_LITERALS = { 216 "L": "BIGINT", 217 "S": "SMALLINT", 218 "Y": "TINYINT", 219 "D": "DOUBLE", 220 "F": "FLOAT", 221 "BD": "DECIMAL", 222 } 223 224 class Parser(parser.Parser): 225 LOG_DEFAULTS_TO_LN = True 226 STRICT_CAST = False 227 228 FUNCTIONS = { 229 **parser.Parser.FUNCTIONS, 230 "BASE64": exp.ToBase64.from_arg_list, 231 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 232 "COLLECT_SET": exp.SetAgg.from_arg_list, 233 "DATE_ADD": lambda args: exp.TsOrDsAdd( 234 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 235 ), 236 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 237 [ 238 exp.TimeStrToTime(this=seq_get(args, 0)), 239 seq_get(args, 1), 240 ] 241 ), 242 "DATE_SUB": lambda args: exp.TsOrDsAdd( 243 this=seq_get(args, 0), 244 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 245 unit=exp.Literal.string("DAY"), 246 ), 247 "DATEDIFF": lambda args: exp.DateDiff( 248 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 249 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 250 ), 251 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 252 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 253 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 254 "LOCATE": locate_to_strposition, 255 "MAP": parse_var_map, 256 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 257 "PERCENTILE": exp.Quantile.from_arg_list, 258 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 259 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 260 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 261 ), 262 "SIZE": exp.ArraySize.from_arg_list, 263 "SPLIT": exp.RegexpSplit.from_arg_list, 264 "STR_TO_MAP": lambda args: exp.StrToMap( 265 this=seq_get(args, 0), 266 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 267 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 268 ), 269 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 270 "TO_JSON": exp.JSONFormat.from_arg_list, 271 "UNBASE64": exp.FromBase64.from_arg_list, 272 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 273 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 274 } 275 276 NO_PAREN_FUNCTION_PARSERS = { 277 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 278 "TRANSFORM": lambda self: self._parse_transform(), 279 } 280 281 PROPERTY_PARSERS = { 282 **parser.Parser.PROPERTY_PARSERS, 283 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 284 expressions=self._parse_wrapped_csv(self._parse_property) 285 ), 286 } 287 288 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 289 if not self._match(TokenType.L_PAREN, advance=False): 290 self._retreat(self._index - 1) 291 return None 292 293 args = self._parse_wrapped_csv(self._parse_lambda) 294 row_format_before = self._parse_row_format(match_row=True) 295 296 record_writer = None 297 if self._match_text_seq("RECORDWRITER"): 298 record_writer = self._parse_string() 299 300 if not self._match(TokenType.USING): 301 return exp.Transform.from_arg_list(args) 302 303 command_script = self._parse_string() 304 305 self._match(TokenType.ALIAS) 306 schema = self._parse_schema() 307 308 row_format_after = self._parse_row_format(match_row=True) 309 record_reader = None 310 if self._match_text_seq("RECORDREADER"): 311 record_reader = self._parse_string() 312 313 return self.expression( 314 exp.QueryTransform, 315 expressions=args, 316 command_script=command_script, 317 schema=schema, 318 row_format_before=row_format_before, 319 record_writer=record_writer, 320 row_format_after=row_format_after, 321 record_reader=record_reader, 322 ) 323 324 def _parse_types( 325 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 326 ) -> t.Optional[exp.Expression]: 327 """ 328 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 329 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 330 331 spark-sql (default)> select cast(1234 as varchar(2)); 332 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 333 char/varchar type and simply treats them as string type. Please use string type 334 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 335 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 336 337 1234 338 Time taken: 4.265 seconds, Fetched 1 row(s) 339 340 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 341 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 342 343 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 344 """ 345 this = super()._parse_types( 346 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 347 ) 348 349 if this and not schema: 350 return this.transform( 351 lambda node: node.replace(exp.DataType.build("text")) 352 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 353 else node, 354 copy=False, 355 ) 356 357 return this 358 359 def _parse_partition_and_order( 360 self, 361 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 362 return ( 363 self._parse_csv(self._parse_conjunction) 364 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 365 else [], 366 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 367 ) 368 369 class Generator(generator.Generator): 370 LIMIT_FETCH = "LIMIT" 371 TABLESAMPLE_WITH_METHOD = False 372 TABLESAMPLE_SIZE_IS_PERCENT = True 373 JOIN_HINTS = False 374 TABLE_HINTS = False 375 QUERY_HINTS = False 376 INDEX_ON = "ON TABLE" 377 EXTRACT_ALLOWS_QUOTES = False 378 NVL2_SUPPORTED = False 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.BIT: "BOOLEAN", 383 exp.DataType.Type.DATETIME: "TIMESTAMP", 384 exp.DataType.Type.TEXT: "STRING", 385 exp.DataType.Type.TIME: "TIMESTAMP", 386 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 387 exp.DataType.Type.VARBINARY: "BINARY", 388 } 389 390 TRANSFORMS = { 391 **generator.Generator.TRANSFORMS, 392 exp.Group: transforms.preprocess([transforms.unalias_group]), 393 exp.Select: transforms.preprocess( 394 [ 395 transforms.eliminate_qualify, 396 transforms.eliminate_distinct_on, 397 transforms.unnest_to_explode, 398 ] 399 ), 400 exp.Property: _property_sql, 401 exp.AnyValue: rename_func("FIRST"), 402 exp.ApproxDistinct: approx_count_distinct_sql, 403 exp.ArrayConcat: rename_func("CONCAT"), 404 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 405 exp.ArraySize: rename_func("SIZE"), 406 exp.ArraySort: _array_sort_sql, 407 exp.With: no_recursive_cte_sql, 408 exp.DateAdd: _add_date_sql, 409 exp.DateDiff: _date_diff_sql, 410 exp.DateStrToDate: rename_func("TO_DATE"), 411 exp.DateSub: _add_date_sql, 412 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 413 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 414 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 415 exp.FromBase64: rename_func("UNBASE64"), 416 exp.If: if_sql, 417 exp.ILike: no_ilike_sql, 418 exp.IsNan: rename_func("ISNAN"), 419 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 420 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 421 exp.JSONFormat: _json_format_sql, 422 exp.Left: left_to_substring_sql, 423 exp.Map: var_map_sql, 424 exp.Max: max_or_greatest, 425 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 426 exp.Min: min_or_least, 427 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 428 exp.NotNullColumnConstraint: lambda self, e: "" 429 if e.args.get("allow_null") 430 else "NOT NULL", 431 exp.VarMap: var_map_sql, 432 exp.Create: create_with_partitions_sql, 433 exp.Quantile: rename_func("PERCENTILE"), 434 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 435 exp.RegexpExtract: regexp_extract_sql, 436 exp.RegexpReplace: regexp_replace_sql, 437 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 438 exp.RegexpSplit: rename_func("SPLIT"), 439 exp.Right: right_to_substring_sql, 440 exp.SafeDivide: no_safe_divide_sql, 441 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 442 exp.SetAgg: rename_func("COLLECT_SET"), 443 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 444 exp.StrPosition: strposition_to_locate_sql, 445 exp.StrToDate: _str_to_date_sql, 446 exp.StrToTime: _str_to_time_sql, 447 exp.StrToUnix: _str_to_unix_sql, 448 exp.StructExtract: struct_extract_sql, 449 exp.TimeStrToDate: rename_func("TO_DATE"), 450 exp.TimeStrToTime: timestrtotime_sql, 451 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 452 exp.TimeToStr: _time_to_str, 453 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 454 exp.ToBase64: rename_func("BASE64"), 455 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 456 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 457 exp.TsOrDsToDate: _to_date_sql, 458 exp.TryCast: no_trycast_sql, 459 exp.UnixToStr: lambda self, e: self.func( 460 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 461 ), 462 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 463 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 464 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 465 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 466 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 467 exp.LastDateOfMonth: rename_func("LAST_DAY"), 468 exp.National: lambda self, e: self.national_sql(e, prefix=""), 469 } 470 471 PROPERTIES_LOCATION = { 472 **generator.Generator.PROPERTIES_LOCATION, 473 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 474 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 475 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 476 } 477 478 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 479 serde_props = self.sql(expression, "serde_properties") 480 serde_props = f" {serde_props}" if serde_props else "" 481 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 482 483 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 484 return self.func( 485 "COLLECT_LIST", 486 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 487 ) 488 489 def with_properties(self, properties: exp.Properties) -> str: 490 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 491 492 def datatype_sql(self, expression: exp.DataType) -> str: 493 if ( 494 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 495 and not expression.expressions 496 ): 497 expression = exp.DataType.build("text") 498 elif expression.this in exp.DataType.TEMPORAL_TYPES: 499 expression = exp.DataType.build(expression.this) 500 elif expression.is_type("float"): 501 size_expression = expression.find(exp.DataTypeParam) 502 if size_expression: 503 size = int(size_expression.name) 504 expression = ( 505 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 506 ) 507 508 return super().datatype_sql(expression) 509 510 def version_sql(self, expression: exp.Version) -> str: 511 sql = super().version_sql(expression) 512 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
152class Hive(Dialect): 153 ALIAS_POST_TABLESAMPLE = True 154 IDENTIFIERS_CAN_START_WITH_DIGIT = True 155 SUPPORTS_USER_DEFINED_TYPES = False 156 157 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 158 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 159 160 TIME_MAPPING = { 161 "y": "%Y", 162 "Y": "%Y", 163 "YYYY": "%Y", 164 "yyyy": "%Y", 165 "YY": "%y", 166 "yy": "%y", 167 "MMMM": "%B", 168 "MMM": "%b", 169 "MM": "%m", 170 "M": "%-m", 171 "dd": "%d", 172 "d": "%-d", 173 "HH": "%H", 174 "H": "%-H", 175 "hh": "%I", 176 "h": "%-I", 177 "mm": "%M", 178 "m": "%-M", 179 "ss": "%S", 180 "s": "%-S", 181 "SSSSSS": "%f", 182 "a": "%p", 183 "DD": "%j", 184 "D": "%-j", 185 "E": "%a", 186 "EE": "%a", 187 "EEE": "%a", 188 "EEEE": "%A", 189 } 190 191 DATE_FORMAT = "'yyyy-MM-dd'" 192 DATEINT_FORMAT = "'yyyyMMdd'" 193 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 194 195 class Tokenizer(tokens.Tokenizer): 196 QUOTES = ["'", '"'] 197 IDENTIFIERS = ["`"] 198 STRING_ESCAPES = ["\\"] 199 ENCODE = "utf-8" 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "ADD ARCHIVE": TokenType.COMMAND, 204 "ADD ARCHIVES": TokenType.COMMAND, 205 "ADD FILE": TokenType.COMMAND, 206 "ADD FILES": TokenType.COMMAND, 207 "ADD JAR": TokenType.COMMAND, 208 "ADD JARS": TokenType.COMMAND, 209 "MSCK REPAIR": TokenType.COMMAND, 210 "REFRESH": TokenType.COMMAND, 211 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 212 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 213 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 214 } 215 216 NUMERIC_LITERALS = { 217 "L": "BIGINT", 218 "S": "SMALLINT", 219 "Y": "TINYINT", 220 "D": "DOUBLE", 221 "F": "FLOAT", 222 "BD": "DECIMAL", 223 } 224 225 class Parser(parser.Parser): 226 LOG_DEFAULTS_TO_LN = True 227 STRICT_CAST = False 228 229 FUNCTIONS = { 230 **parser.Parser.FUNCTIONS, 231 "BASE64": exp.ToBase64.from_arg_list, 232 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 233 "COLLECT_SET": exp.SetAgg.from_arg_list, 234 "DATE_ADD": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 236 ), 237 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 238 [ 239 exp.TimeStrToTime(this=seq_get(args, 0)), 240 seq_get(args, 1), 241 ] 242 ), 243 "DATE_SUB": lambda args: exp.TsOrDsAdd( 244 this=seq_get(args, 0), 245 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 246 unit=exp.Literal.string("DAY"), 247 ), 248 "DATEDIFF": lambda args: exp.DateDiff( 249 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 250 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 251 ), 252 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 253 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 254 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 255 "LOCATE": locate_to_strposition, 256 "MAP": parse_var_map, 257 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 258 "PERCENTILE": exp.Quantile.from_arg_list, 259 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 260 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 261 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 262 ), 263 "SIZE": exp.ArraySize.from_arg_list, 264 "SPLIT": exp.RegexpSplit.from_arg_list, 265 "STR_TO_MAP": lambda args: exp.StrToMap( 266 this=seq_get(args, 0), 267 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 268 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 269 ), 270 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 271 "TO_JSON": exp.JSONFormat.from_arg_list, 272 "UNBASE64": exp.FromBase64.from_arg_list, 273 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 274 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 275 } 276 277 NO_PAREN_FUNCTION_PARSERS = { 278 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 279 "TRANSFORM": lambda self: self._parse_transform(), 280 } 281 282 PROPERTY_PARSERS = { 283 **parser.Parser.PROPERTY_PARSERS, 284 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 285 expressions=self._parse_wrapped_csv(self._parse_property) 286 ), 287 } 288 289 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 290 if not self._match(TokenType.L_PAREN, advance=False): 291 self._retreat(self._index - 1) 292 return None 293 294 args = self._parse_wrapped_csv(self._parse_lambda) 295 row_format_before = self._parse_row_format(match_row=True) 296 297 record_writer = None 298 if self._match_text_seq("RECORDWRITER"): 299 record_writer = self._parse_string() 300 301 if not self._match(TokenType.USING): 302 return exp.Transform.from_arg_list(args) 303 304 command_script = self._parse_string() 305 306 self._match(TokenType.ALIAS) 307 schema = self._parse_schema() 308 309 row_format_after = self._parse_row_format(match_row=True) 310 record_reader = None 311 if self._match_text_seq("RECORDREADER"): 312 record_reader = self._parse_string() 313 314 return self.expression( 315 exp.QueryTransform, 316 expressions=args, 317 command_script=command_script, 318 schema=schema, 319 row_format_before=row_format_before, 320 record_writer=record_writer, 321 row_format_after=row_format_after, 322 record_reader=record_reader, 323 ) 324 325 def _parse_types( 326 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 327 ) -> t.Optional[exp.Expression]: 328 """ 329 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 330 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 331 332 spark-sql (default)> select cast(1234 as varchar(2)); 333 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 334 char/varchar type and simply treats them as string type. Please use string type 335 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 336 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 337 338 1234 339 Time taken: 4.265 seconds, Fetched 1 row(s) 340 341 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 342 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 343 344 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 345 """ 346 this = super()._parse_types( 347 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 348 ) 349 350 if this and not schema: 351 return this.transform( 352 lambda node: node.replace(exp.DataType.build("text")) 353 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 354 else node, 355 copy=False, 356 ) 357 358 return this 359 360 def _parse_partition_and_order( 361 self, 362 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 363 return ( 364 self._parse_csv(self._parse_conjunction) 365 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 366 else [], 367 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 368 ) 369 370 class Generator(generator.Generator): 371 LIMIT_FETCH = "LIMIT" 372 TABLESAMPLE_WITH_METHOD = False 373 TABLESAMPLE_SIZE_IS_PERCENT = True 374 JOIN_HINTS = False 375 TABLE_HINTS = False 376 QUERY_HINTS = False 377 INDEX_ON = "ON TABLE" 378 EXTRACT_ALLOWS_QUOTES = False 379 NVL2_SUPPORTED = False 380 381 TYPE_MAPPING = { 382 **generator.Generator.TYPE_MAPPING, 383 exp.DataType.Type.BIT: "BOOLEAN", 384 exp.DataType.Type.DATETIME: "TIMESTAMP", 385 exp.DataType.Type.TEXT: "STRING", 386 exp.DataType.Type.TIME: "TIMESTAMP", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.VARBINARY: "BINARY", 389 } 390 391 TRANSFORMS = { 392 **generator.Generator.TRANSFORMS, 393 exp.Group: transforms.preprocess([transforms.unalias_group]), 394 exp.Select: transforms.preprocess( 395 [ 396 transforms.eliminate_qualify, 397 transforms.eliminate_distinct_on, 398 transforms.unnest_to_explode, 399 ] 400 ), 401 exp.Property: _property_sql, 402 exp.AnyValue: rename_func("FIRST"), 403 exp.ApproxDistinct: approx_count_distinct_sql, 404 exp.ArrayConcat: rename_func("CONCAT"), 405 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 406 exp.ArraySize: rename_func("SIZE"), 407 exp.ArraySort: _array_sort_sql, 408 exp.With: no_recursive_cte_sql, 409 exp.DateAdd: _add_date_sql, 410 exp.DateDiff: _date_diff_sql, 411 exp.DateStrToDate: rename_func("TO_DATE"), 412 exp.DateSub: _add_date_sql, 413 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 414 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 415 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 416 exp.FromBase64: rename_func("UNBASE64"), 417 exp.If: if_sql, 418 exp.ILike: no_ilike_sql, 419 exp.IsNan: rename_func("ISNAN"), 420 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 421 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 422 exp.JSONFormat: _json_format_sql, 423 exp.Left: left_to_substring_sql, 424 exp.Map: var_map_sql, 425 exp.Max: max_or_greatest, 426 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 427 exp.Min: min_or_least, 428 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 429 exp.NotNullColumnConstraint: lambda self, e: "" 430 if e.args.get("allow_null") 431 else "NOT NULL", 432 exp.VarMap: var_map_sql, 433 exp.Create: create_with_partitions_sql, 434 exp.Quantile: rename_func("PERCENTILE"), 435 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 436 exp.RegexpExtract: regexp_extract_sql, 437 exp.RegexpReplace: regexp_replace_sql, 438 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 439 exp.RegexpSplit: rename_func("SPLIT"), 440 exp.Right: right_to_substring_sql, 441 exp.SafeDivide: no_safe_divide_sql, 442 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 443 exp.SetAgg: rename_func("COLLECT_SET"), 444 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 445 exp.StrPosition: strposition_to_locate_sql, 446 exp.StrToDate: _str_to_date_sql, 447 exp.StrToTime: _str_to_time_sql, 448 exp.StrToUnix: _str_to_unix_sql, 449 exp.StructExtract: struct_extract_sql, 450 exp.TimeStrToDate: rename_func("TO_DATE"), 451 exp.TimeStrToTime: timestrtotime_sql, 452 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 453 exp.TimeToStr: _time_to_str, 454 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 455 exp.ToBase64: rename_func("BASE64"), 456 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 457 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 458 exp.TsOrDsToDate: _to_date_sql, 459 exp.TryCast: no_trycast_sql, 460 exp.UnixToStr: lambda self, e: self.func( 461 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 462 ), 463 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 464 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 465 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 466 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 467 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 468 exp.LastDateOfMonth: rename_func("LAST_DAY"), 469 exp.National: lambda self, e: self.national_sql(e, prefix=""), 470 } 471 472 PROPERTIES_LOCATION = { 473 **generator.Generator.PROPERTIES_LOCATION, 474 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 475 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 476 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 477 } 478 479 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 480 serde_props = self.sql(expression, "serde_properties") 481 serde_props = f" {serde_props}" if serde_props else "" 482 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 483 484 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 485 return self.func( 486 "COLLECT_LIST", 487 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 488 ) 489 490 def with_properties(self, properties: exp.Properties) -> str: 491 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 492 493 def datatype_sql(self, expression: exp.DataType) -> str: 494 if ( 495 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 496 and not expression.expressions 497 ): 498 expression = exp.DataType.build("text") 499 elif expression.this in exp.DataType.TEMPORAL_TYPES: 500 expression = exp.DataType.build(expression.this) 501 elif expression.is_type("float"): 502 size_expression = expression.find(exp.DataTypeParam) 503 if size_expression: 504 size = int(size_expression.name) 505 expression = ( 506 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 507 ) 508 509 return super().datatype_sql(expression) 510 511 def version_sql(self, expression: exp.Version) -> str: 512 sql = super().version_sql(expression) 513 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
195 class Tokenizer(tokens.Tokenizer): 196 QUOTES = ["'", '"'] 197 IDENTIFIERS = ["`"] 198 STRING_ESCAPES = ["\\"] 199 ENCODE = "utf-8" 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "ADD ARCHIVE": TokenType.COMMAND, 204 "ADD ARCHIVES": TokenType.COMMAND, 205 "ADD FILE": TokenType.COMMAND, 206 "ADD FILES": TokenType.COMMAND, 207 "ADD JAR": TokenType.COMMAND, 208 "ADD JARS": TokenType.COMMAND, 209 "MSCK REPAIR": TokenType.COMMAND, 210 "REFRESH": TokenType.COMMAND, 211 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 212 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 213 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 214 } 215 216 NUMERIC_LITERALS = { 217 "L": "BIGINT", 218 "S": "SMALLINT", 219 "Y": "TINYINT", 220 "D": "DOUBLE", 221 "F": "FLOAT", 222 "BD": "DECIMAL", 223 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>}
225 class Parser(parser.Parser): 226 LOG_DEFAULTS_TO_LN = True 227 STRICT_CAST = False 228 229 FUNCTIONS = { 230 **parser.Parser.FUNCTIONS, 231 "BASE64": exp.ToBase64.from_arg_list, 232 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 233 "COLLECT_SET": exp.SetAgg.from_arg_list, 234 "DATE_ADD": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 236 ), 237 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 238 [ 239 exp.TimeStrToTime(this=seq_get(args, 0)), 240 seq_get(args, 1), 241 ] 242 ), 243 "DATE_SUB": lambda args: exp.TsOrDsAdd( 244 this=seq_get(args, 0), 245 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 246 unit=exp.Literal.string("DAY"), 247 ), 248 "DATEDIFF": lambda args: exp.DateDiff( 249 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 250 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 251 ), 252 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 253 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 254 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 255 "LOCATE": locate_to_strposition, 256 "MAP": parse_var_map, 257 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 258 "PERCENTILE": exp.Quantile.from_arg_list, 259 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 260 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 261 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 262 ), 263 "SIZE": exp.ArraySize.from_arg_list, 264 "SPLIT": exp.RegexpSplit.from_arg_list, 265 "STR_TO_MAP": lambda args: exp.StrToMap( 266 this=seq_get(args, 0), 267 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 268 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 269 ), 270 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 271 "TO_JSON": exp.JSONFormat.from_arg_list, 272 "UNBASE64": exp.FromBase64.from_arg_list, 273 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 274 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 275 } 276 277 NO_PAREN_FUNCTION_PARSERS = { 278 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 279 "TRANSFORM": lambda self: self._parse_transform(), 280 } 281 282 PROPERTY_PARSERS = { 283 **parser.Parser.PROPERTY_PARSERS, 284 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 285 expressions=self._parse_wrapped_csv(self._parse_property) 286 ), 287 } 288 289 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 290 if not self._match(TokenType.L_PAREN, advance=False): 291 self._retreat(self._index - 1) 292 return None 293 294 args = self._parse_wrapped_csv(self._parse_lambda) 295 row_format_before = self._parse_row_format(match_row=True) 296 297 record_writer = None 298 if self._match_text_seq("RECORDWRITER"): 299 record_writer = self._parse_string() 300 301 if not self._match(TokenType.USING): 302 return exp.Transform.from_arg_list(args) 303 304 command_script = self._parse_string() 305 306 self._match(TokenType.ALIAS) 307 schema = self._parse_schema() 308 309 row_format_after = self._parse_row_format(match_row=True) 310 record_reader = None 311 if self._match_text_seq("RECORDREADER"): 312 record_reader = self._parse_string() 313 314 return self.expression( 315 exp.QueryTransform, 316 expressions=args, 317 command_script=command_script, 318 schema=schema, 319 row_format_before=row_format_before, 320 record_writer=record_writer, 321 row_format_after=row_format_after, 322 record_reader=record_reader, 323 ) 324 325 def _parse_types( 326 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 327 ) -> t.Optional[exp.Expression]: 328 """ 329 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 330 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 331 332 spark-sql (default)> select cast(1234 as varchar(2)); 333 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 334 char/varchar type and simply treats them as string type. Please use string type 335 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 336 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 337 338 1234 339 Time taken: 4.265 seconds, Fetched 1 row(s) 340 341 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 342 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 343 344 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 345 """ 346 this = super()._parse_types( 347 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 348 ) 349 350 if this and not schema: 351 return this.transform( 352 lambda node: node.replace(exp.DataType.build("text")) 353 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 354 else node, 355 copy=False, 356 ) 357 358 return this 359 360 def _parse_partition_and_order( 361 self, 362 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 363 return ( 364 self._parse_csv(self._parse_conjunction) 365 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 366 else [], 367 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 368 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
370 class Generator(generator.Generator): 371 LIMIT_FETCH = "LIMIT" 372 TABLESAMPLE_WITH_METHOD = False 373 TABLESAMPLE_SIZE_IS_PERCENT = True 374 JOIN_HINTS = False 375 TABLE_HINTS = False 376 QUERY_HINTS = False 377 INDEX_ON = "ON TABLE" 378 EXTRACT_ALLOWS_QUOTES = False 379 NVL2_SUPPORTED = False 380 381 TYPE_MAPPING = { 382 **generator.Generator.TYPE_MAPPING, 383 exp.DataType.Type.BIT: "BOOLEAN", 384 exp.DataType.Type.DATETIME: "TIMESTAMP", 385 exp.DataType.Type.TEXT: "STRING", 386 exp.DataType.Type.TIME: "TIMESTAMP", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.VARBINARY: "BINARY", 389 } 390 391 TRANSFORMS = { 392 **generator.Generator.TRANSFORMS, 393 exp.Group: transforms.preprocess([transforms.unalias_group]), 394 exp.Select: transforms.preprocess( 395 [ 396 transforms.eliminate_qualify, 397 transforms.eliminate_distinct_on, 398 transforms.unnest_to_explode, 399 ] 400 ), 401 exp.Property: _property_sql, 402 exp.AnyValue: rename_func("FIRST"), 403 exp.ApproxDistinct: approx_count_distinct_sql, 404 exp.ArrayConcat: rename_func("CONCAT"), 405 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 406 exp.ArraySize: rename_func("SIZE"), 407 exp.ArraySort: _array_sort_sql, 408 exp.With: no_recursive_cte_sql, 409 exp.DateAdd: _add_date_sql, 410 exp.DateDiff: _date_diff_sql, 411 exp.DateStrToDate: rename_func("TO_DATE"), 412 exp.DateSub: _add_date_sql, 413 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 414 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 415 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 416 exp.FromBase64: rename_func("UNBASE64"), 417 exp.If: if_sql, 418 exp.ILike: no_ilike_sql, 419 exp.IsNan: rename_func("ISNAN"), 420 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 421 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 422 exp.JSONFormat: _json_format_sql, 423 exp.Left: left_to_substring_sql, 424 exp.Map: var_map_sql, 425 exp.Max: max_or_greatest, 426 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 427 exp.Min: min_or_least, 428 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 429 exp.NotNullColumnConstraint: lambda self, e: "" 430 if e.args.get("allow_null") 431 else "NOT NULL", 432 exp.VarMap: var_map_sql, 433 exp.Create: create_with_partitions_sql, 434 exp.Quantile: rename_func("PERCENTILE"), 435 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 436 exp.RegexpExtract: regexp_extract_sql, 437 exp.RegexpReplace: regexp_replace_sql, 438 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 439 exp.RegexpSplit: rename_func("SPLIT"), 440 exp.Right: right_to_substring_sql, 441 exp.SafeDivide: no_safe_divide_sql, 442 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 443 exp.SetAgg: rename_func("COLLECT_SET"), 444 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 445 exp.StrPosition: strposition_to_locate_sql, 446 exp.StrToDate: _str_to_date_sql, 447 exp.StrToTime: _str_to_time_sql, 448 exp.StrToUnix: _str_to_unix_sql, 449 exp.StructExtract: struct_extract_sql, 450 exp.TimeStrToDate: rename_func("TO_DATE"), 451 exp.TimeStrToTime: timestrtotime_sql, 452 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 453 exp.TimeToStr: _time_to_str, 454 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 455 exp.ToBase64: rename_func("BASE64"), 456 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 457 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 458 exp.TsOrDsToDate: _to_date_sql, 459 exp.TryCast: no_trycast_sql, 460 exp.UnixToStr: lambda self, e: self.func( 461 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 462 ), 463 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 464 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 465 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 466 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 467 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 468 exp.LastDateOfMonth: rename_func("LAST_DAY"), 469 exp.National: lambda self, e: self.national_sql(e, prefix=""), 470 } 471 472 PROPERTIES_LOCATION = { 473 **generator.Generator.PROPERTIES_LOCATION, 474 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 475 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 476 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 477 } 478 479 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 480 serde_props = self.sql(expression, "serde_properties") 481 serde_props = f" {serde_props}" if serde_props else "" 482 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 483 484 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 485 return self.func( 486 "COLLECT_LIST", 487 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 488 ) 489 490 def with_properties(self, properties: exp.Properties) -> str: 491 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 492 493 def datatype_sql(self, expression: exp.DataType) -> str: 494 if ( 495 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 496 and not expression.expressions 497 ): 498 expression = exp.DataType.build("text") 499 elif expression.this in exp.DataType.TEMPORAL_TYPES: 500 expression = exp.DataType.build(expression.this) 501 elif expression.is_type("float"): 502 size_expression = expression.find(exp.DataTypeParam) 503 if size_expression: 504 size = int(size_expression.name) 505 expression = ( 506 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 507 ) 508 509 return super().datatype_sql(expression) 510 511 def version_sql(self, expression: exp.Version) -> str: 512 sql = super().version_sql(expression) 513 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
493 def datatype_sql(self, expression: exp.DataType) -> str: 494 if ( 495 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 496 and not expression.expressions 497 ): 498 expression = exp.DataType.build("text") 499 elif expression.this in exp.DataType.TEMPORAL_TYPES: 500 expression = exp.DataType.build(expression.this) 501 elif expression.is_type("float"): 502 size_expression = expression.find(exp.DataTypeParam) 503 if size_expression: 504 size = int(size_expression.name) 505 expression = ( 506 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 507 ) 508 509 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
260 @classmethod 261 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 262 """Checks if text can be identified given an identify option. 263 264 Args: 265 text: The text to check. 266 identify: 267 "always" or `True`: Always returns true. 268 "safe": True if the identifier is case-insensitive. 269 270 Returns: 271 Whether or not the given text can be identified. 272 """ 273 if identify is True or identify == "always": 274 return True 275 276 if identify == "safe": 277 return not cls.case_sensitive(text) 278 279 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql