|
101 | 101 | import static java.util.Collections.singletonList;
|
102 | 102 | import static java.util.Collections.singletonMap;
|
103 | 103 | import static java.util.concurrent.TimeUnit.MILLISECONDS;
|
| 104 | +import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType; |
104 | 105 | import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
|
105 | 106 | import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
|
106 | 107 | import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
|
@@ -276,6 +277,43 @@ public void testShortDecimalWithNoScale()
|
276 | 277 | .withMessage("Malformed Parquet file. Corrupted statistics for column \"[] required int32 ShortDecimalColumnWithNoScale\": [min: 100, max: 10, num_nulls: 0] [testFile]");
|
277 | 278 | }
|
278 | 279 |
|
| 280 | + @Test |
| 281 | + public void testShortDecimalWithLongDecimalAnnotation() |
| 282 | + throws Exception |
| 283 | + { |
| 284 | + ColumnDescriptor columnDescriptor = createColumnDescriptor(FIXED_LEN_BYTE_ARRAY, decimalType(2, 38), "ShortDecimalColumnWithDecimalAnnotation"); |
| 285 | + BigInteger maximum = new BigInteger("12345"); |
| 286 | + |
| 287 | + Type type = createDecimalType(5, 2); |
| 288 | + assertThat(getDomain(columnDescriptor, type, 0, null, ID, UTC)).isEqualTo(all(type)); |
| 289 | + assertThat(getDomain(columnDescriptor, type, 10, binaryColumnStats(maximum, maximum), ID, UTC)).isEqualTo(singleValue(type, 12345L)); |
| 290 | + |
| 291 | + assertThat(getDomain(columnDescriptor, type, 10, binaryColumnStats(0L, 100L), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, 0L, true, 100L, true)), false)); |
| 292 | + assertThat(getDomain(columnDescriptor, type, 10, intColumnStats(0, 100), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, 0L, true, 100L, true)), false)); |
| 293 | + |
| 294 | + type = createDecimalType(15, 2); |
| 295 | + assertThat(getDomain(columnDescriptor, type, 0, null, ID, UTC)).isEqualTo(all(type)); |
| 296 | + assertThat(getDomain(columnDescriptor, type, 10, binaryColumnStats(maximum, maximum), ID, UTC)).isEqualTo(singleValue(type, 12345L)); |
| 297 | + |
| 298 | + assertThat(getDomain(columnDescriptor, type, 10, binaryColumnStats(0L, 100L), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, 0L, true, 100L, true)), false)); |
| 299 | + assertThat(getDomain(columnDescriptor, type, 10, intColumnStats(0, 100), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, 0L, true, 100L, true)), false)); |
| 300 | + |
| 301 | + Type typeWithDifferentScale = createDecimalType(5, 1); |
| 302 | + assertThat(getDomain(columnDescriptor, typeWithDifferentScale, 0, null, ID, UTC)).isEqualTo(all(typeWithDifferentScale)); |
| 303 | + |
| 304 | + assertThat(getDomain(columnDescriptor, typeWithDifferentScale, 10, longColumnStats(10012L, 10012L), ID, UTC)).isEqualTo(singleValue(typeWithDifferentScale, 1001L)); |
| 305 | + |
| 306 | + // Test that statistics overflowing the size of the type are not used |
| 307 | + assertThat(getDomain(columnDescriptor, typeWithDifferentScale, 10, longColumnStats(100012L, 100012L), ID, UTC)).isEqualTo(singleValue(typeWithDifferentScale, 10001L)); |
| 308 | + |
| 309 | + assertThat(getDomain(columnDescriptor, typeWithDifferentScale, 10, longColumnStats(0L, 100L), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(typeWithDifferentScale, 0L, true, 10L, true)), false)); |
| 310 | + |
| 311 | + // fail on higher precision values |
| 312 | + assertThatExceptionOfType(ParquetCorruptionException.class) |
| 313 | + .isThrownBy(() -> getDomain(columnDescriptor, createDecimalType(4, 2), 10, binaryColumnStats(maximum, maximum), ID, UTC)) |
| 314 | + .withMessage("Malformed Parquet file. Corrupted statistics for column \"[] required fixed_len_byte_array(0) ShortDecimalColumnWithDecimalAnnotation (DECIMAL(38,2))\": [min: 0x00000000000000000000000000003039, max: 0x00000000000000000000000000003039, num_nulls: 0] [testFile]"); |
| 315 | + } |
| 316 | + |
279 | 317 | @Test
|
280 | 318 | public void testLongDecimal()
|
281 | 319 | throws Exception
|
@@ -319,6 +357,49 @@ public void testLongDecimalWithNoScale()
|
319 | 357 | .withMessage("Malformed Parquet file. Corrupted statistics for column \"[] required fixed_len_byte_array(0) LongDecimalColumnWithNoScale\": [min: 0x00000000000000000000000000000064, max: 0x0000000000000000000000000000000A, num_nulls: 0] [testFile]");
|
320 | 358 | }
|
321 | 359 |
|
| 360 | + @Test |
| 361 | + public void testLongDecimalWithShortDecimalAnnotation() |
| 362 | + throws Exception |
| 363 | + { |
| 364 | + ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, decimalType(2, 5), "ShortDecimalColumn"); |
| 365 | + DecimalType type = createDecimalType(20, 2); |
| 366 | + |
| 367 | + assertThat(getDomain(columnDescriptor, type, 0, null, ID, UTC)).isEqualTo(all(type)); |
| 368 | + assertThat(getDomain(columnDescriptor, type, 10, longColumnStats(10012L, 10012L), ID, UTC)).isEqualTo(singleValue(type, Int128.valueOf(10012L))); |
| 369 | + |
| 370 | + assertThat(getDomain(columnDescriptor, type, 10, longColumnStats(0L, 10012L), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, Int128.valueOf(0L), true, Int128.valueOf(10012L), true)), false)); |
| 371 | + assertThat(getDomain(columnDescriptor, type, 10, longColumnStats(0, 100L), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, Int128.valueOf(0L), true, Int128.valueOf(100L), true)), false)); |
| 372 | + |
| 373 | + // fail on corrupted statistics |
| 374 | + assertThatExceptionOfType(ParquetCorruptionException.class) |
| 375 | + .isThrownBy(() -> getDomain(columnDescriptor, type, 10, longColumnStats(100L, 10L), ID, UTC)) |
| 376 | + .withMessage("Malformed Parquet file. Corrupted statistics for column \"[] required int32 ShortDecimalColumn (DECIMAL(5,2))\": [min: 100, max: 10, num_nulls: 0] [testFile]"); |
| 377 | + } |
| 378 | + |
| 379 | + @Test |
| 380 | + public void testLongDecimalWithInt64DecimalAnnotation() |
| 381 | + throws Exception |
| 382 | + { |
| 383 | + ColumnDescriptor columnDescriptor = createColumnDescriptor(INT64, decimalType(2, 5), "ShortDecimalColumn"); |
| 384 | + DecimalType type = createDecimalType(20, 2); |
| 385 | + BigInteger maximum = new BigInteger("12345"); |
| 386 | + |
| 387 | + Int128 zero = Int128.ZERO; |
| 388 | + Int128 hundred = Int128.valueOf(100L); |
| 389 | + Int128 max = Int128.valueOf(maximum); |
| 390 | + |
| 391 | + assertThat(getDomain(columnDescriptor, type, 0, null, ID, UTC)).isEqualTo(all(type)); |
| 392 | + assertThat(getDomain(columnDescriptor, type, 10, longColumnStats(maximum.longValue(), maximum.longValue()), ID, UTC)).isEqualTo(singleValue(type, max)); |
| 393 | + |
| 394 | + assertThat(getDomain(columnDescriptor, type, 10, longColumnStats(0L, 100L), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, zero, true, hundred, true)), false)); |
| 395 | + assertThat(getDomain(columnDescriptor, type, 10, longColumnStats(0, 100), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(type, zero, true, hundred, true)), false)); |
| 396 | + |
| 397 | + // fail on corrupted statistics |
| 398 | + assertThatExceptionOfType(ParquetCorruptionException.class) |
| 399 | + .isThrownBy(() -> getDomain(columnDescriptor, type, 10, longColumnStats(100L, 10L), ID, UTC)) |
| 400 | + .withMessage("Malformed Parquet file. Corrupted statistics for column \"[] required int64 ShortDecimalColumn (DECIMAL(5,2))\": [min: 100, max: 10, num_nulls: 0] [testFile]"); |
| 401 | + } |
| 402 | + |
322 | 403 | @Test
|
323 | 404 | public void testDouble()
|
324 | 405 | throws Exception
|
@@ -744,6 +825,11 @@ private ColumnDescriptor createColumnDescriptor(PrimitiveTypeName typeName, Stri
|
744 | 825 | return new ColumnDescriptor(new String[] {}, new PrimitiveType(REQUIRED, typeName, columnName), 0, 0);
|
745 | 826 | }
|
746 | 827 |
|
| 828 | + private ColumnDescriptor createColumnDescriptor(PrimitiveTypeName typeName, LogicalTypeAnnotation typeAnnotation, String columnName) |
| 829 | + { |
| 830 | + return new ColumnDescriptor(new String[] {}, new PrimitiveType(REQUIRED, typeName, columnName).withLogicalTypeAnnotation(typeAnnotation), 0, 0); |
| 831 | + } |
| 832 | + |
747 | 833 | private TupleDomain<ColumnDescriptor> getEffectivePredicate(ColumnDescriptor column, VarcharType type, Slice value)
|
748 | 834 | {
|
749 | 835 | ColumnDescriptor predicateColumn = new ColumnDescriptor(column.getPath(), column.getPrimitiveType(), 0, 0);
|
|
0 commit comments