I'm encountering the same issue. Parquets on s3 that can import on h2o version 3.28.1.1 do not parse correctly on 3.28.1.2. In the following code I tried importing the same parquet file on both versions and provide the debug log-level. It shows that 3.28.1.2 attempts to parse the parquet as a CSV.
library(data.table)
library(h2o, lib.loc = file.path(.libPaths()[1], "h2o-3.28.1.1"))
h2o.init()
# 1. Downloaded copy of parquet file from:
https://s3.amazonaws.com/h2o-public-test-data/e2e-testing/dataset/file-format/parquet_file.parquet# 2. Moved to Scality/S3
# 3. Import works on h2o <= 3.28.1.1
x <- h2o.importFile(path = "s3://path/to/file/parquet_file.parquet")
#> x
# X customer_id is_churn city age gender registration_method registration_date plan_list_price avg_amount_paid auto_renew times_under_paid num_transactions num_payment_methods common_payment_method age2
# 1 4 m5uYkcpwZnNbCVoeT2OBWQGoCbWoQJfnpT6zxZMKjlY= 0 13 26 female 3 1.383091e+12 143.2692 149.0000 YES 0 26 1 40 3.258097
# 2 7 8+E0KjVTEOWqQ+WMpoz/Zx1j3zNlaRGh0MQKbyP/NeQ= 0 22 32 male 4 1.456358e+12 154.8182 154.8182 YES 0 11 2 40 3.465736
# 3 8 gcXPAp5mnOJYYAXx0DUGPH5WiFUynJgFpAqCkEHMF6w= 0 1 30 female 9 1.439510e+12 141.1579 141.1579 NO 0 19 2 38 3.401197
# 4 9 B+MnfvGmATwy6wDMXNR43lCKPathx6RaFNNc0fxe9L0= 0 8 32 male 3 1.447546e+12 246.5000 246.5000 YES 0 4 3 36 3.465736
# 5 10 awHJPaDlKLZL1rQtCLAbv/JHEBienVYVULrYmDgzdpo= 0 5 22 male 7 1.456531e+12 149.0000 149.0000 YES 0 13 1 41 3.091042
# 6 11 FzfNguMdUBc79Iiwd9qWWg0GAH3leUVwQHa2RCwdyoA= 0 15 23 male 7 1.396483e+12 144.7857 150.1071 YES 0 28 2 41 3.135494
#
# [14976 rows x 16 columns]
# From h2o.flow
# DEBUG view
# 01-11 08:31:39.314
10.194.118.110:24947 #9803 #33492-34 INFO: GET /3/ImportFiles, parms: {pattern=, path=s3://path/to/file/parquet_file.parquet}
# 01-11 08:31:39.315
10.194.118.110:24947 #9803 #33492-34 INFO: ImportS3 processing (s3://path/to/file/parquet_file.parquet)
# 01-11 08:31:39.586
10.194.118.110:24947 #9803 #33492-34 DEBUG: S3 endpoint specified:
https://obs/path/to/endpoint.com# 01-11 08:31:39.586
10.194.118.110:24947 #9803 #33492-34 DEBUG: S3 path style access enabled
# 01-11 08:31:39.894
10.194.118.110:24947 #9803 #33492-34 DEBUG: write-lock s3://path/to/file/parquet_file.parquet by job null
# 01-11 08:31:39.910
10.194.118.110:24947 #9803 #33492-34 DEBUG: update write-locked s3://path/to/file/parquet_file.parquet by job null
# 01-11 08:31:39.911
10.194.118.110:24947 #9803 #33492-34 DEBUG: unlock s3://path/to/file/parquet_file.parquet by job null
# 01-11 08:31:39.940
10.194.118.110:24947 #9803 #33492-34 INFO: POST /3/ParseSetup, parms: {skipped_columns=[], source_frames=["s3://path/to/file/parquet_file.parquet"], check_header=0}
# 01-11 08:31:40.588
10.194.118.110:24947 #9803 #e Thread DEBUG: GC CALLBACK: 1641911500588, USED:38.2 MB, CRIT: false
# 01-11 08:31:40.588
10.194.118.110:24947 #9803 #e Thread DEBUG: MemGood: GC CALLBACK, (K/V: 126 B + POJO:38.2 MB + FREE:8.85 GB == MEM_MAX:8.89 GB), desiredKV=5.88 GB NO-OOM
# 01-11 08:31:40.951
10.194.118.110:24947 #9803 #33492-34 INFO: ParseSetup heuristic: cloudSize: 1, cores: 192, numCols: 16, maxLineLength: 16383, totalSize: 991319, localParseSize: 991319, chunkSize: 163830, numChunks: 6, numChunks * cols: 96
# 01-11 08:31:41.006
10.194.118.110:24947 #9803 #33492-34 INFO: POST /3/Parse, parms: {number_columns=16, source_frames=["s3://path/to/file/parquet_file.parquet"], column_types=["Numeric","String","Numeric","Numeric","Numeric","Enum","Numeric","Time","Numeric","Numeric","Enum","Numeric","Numeric","Numeric","Numeric","Numeric"], single_quotes=TRUE, parse_type=PARQUET, destination_frame=parquet_file_parquet.hex_sid_8a5a_1, column_names=["X","customer_id","is_churn","city","age","gender","registration_method","registration_date","plan_list_price","avg_amount_paid","auto_renew","times_under_paid","num_transactions","num_payment_methods","common_payment_method","age2"], delete_on_done=TRUE, check_header=1, separator=124, blocking=FALSE, skipped_columns=[], na_strings=[], chunk_size=163830, decrypt_tool=NULL}
# 01-11 08:31:41.018
10.194.118.110:24947 #9803 #33492-34 INFO: Total file size: 968.1 KB
# 01-11 08:31:41.033
10.194.118.110:24947 #9803 #33492-34 INFO: Parse chunk size 163830
# 01-11 08:31:41.038
10.194.118.110:24947 #9803 #33492-34 DEBUG: write-lock parquet_file_parquet.hex_sid_8a5a_1 by job $03010ac2766e7461ffffffff$_ab94c2db132cd017b853b2ab63d24435
# 01-11 08:31:41.038
10.194.118.110:24947 #9803 #33492-34 DEBUG: shared-read-lock s3://path/to/file/parquet_file.parquet by job $03010ac2766e7461ffffffff$_ab94c2db132cd017b853b2ab63d24435
# 01-11 08:31:41.288
10.194.118.110:24947 #9803 FJ-2-15 DEBUG: Key s3://path/to/file/parquet_file.parquet will be parsed using method DistributedParse.
# 01-11 08:31:42.005
10.194.118.110:24947 #9803 FJ-3-29 INFO: Processing 1 blocks of chunk #2
# 01-11 08:31:43.054
10.194.118.110:24947 #9803 FJ-3-29 DEBUG: lock-then-delete s3://path/to/file/parquet_file.parquet by job $03010ac2766e7461ffffffff$_ab94c2db132cd017b853b2ab63d24435
# 01-11 08:31:43.070
10.194.118.110:24947 #9803 FJ-1-15 DEBUG: update write-locked parquet_file_parquet.hex_sid_8a5a_1 by job $03010ac2766e7461ffffffff$_ab94c2db132cd017b853b2ab63d24435
# 01-11 08:31:43.101
10.194.118.110:24947 #9803 FJ-1-15 INFO: Parse result for parquet_file_parquet.hex_sid_8a5a_1 (14976 rows, 16 columns):
# 01-11 08:31:43.151
10.194.118.110:24947 #9803 FJ-1-15 INFO: ColV2 type min max mean sigma NAs constant cardinality
# 01-11 08:31:43.152
10.194.118.110:24947 #9803 FJ-1-15 INFO: X: numeric 4.00000 39993.0 20133.0 11603.7
# 01-11 08:31:43.152
10.194.118.110:24947 #9803 FJ-1-15 INFO: customer_id: string
# 01-11 08:31:43.153
10.194.118.110:24947 #9803 FJ-1-15 INFO: is_churn: numeric 0.00000 1.00000 0.0922810 0.289432
# 01-11 08:31:43.153
10.194.118.110:24947 #9803 FJ-1-15 INFO: city: numeric 1.00000 22.0000 10.9635 5.90846
# 01-11 08:31:43.153
10.194.118.110:24947 #9803 FJ-1-15 INFO: age: numeric -43.0000 45.0000 27.2676 5.98002
# 01-11 08:31:43.153
10.194.118.110:24947 #9803 FJ-1-15 INFO: gender: factor female male 360 2
# 01-11 08:31:43.153
10.194.118.110:24947 #9803 FJ-1-15 INFO: registration_method: numeric 3.00000 13.0000 6.82365 2.54578
# 01-11 08:31:43.154
10.194.118.110:24947 #9803 FJ-1-15 INFO: registration_date: time 2004-03-25 1 2017-02-24 1
# 01-11 08:31:43.154
10.194.118.110:24947 #9803 FJ-1-15 INFO: plan_list_price: numeric 0.00000 1788.00 176.994 190.111
# 01-11 08:31:43.154
10.194.118.110:24947 #9803 FJ-1-15 INFO: avg_amount_paid: numeric 0.00000 1788.00 183.403 189.063
# 01-11 08:31:43.154
10.194.118.110:24947 #9803 FJ-1-15 INFO: auto_renew: factor NO YES 2
# 01-11 08:31:43.154
10.194.118.110:24947 #9803 FJ-1-15 INFO: times_under_paid: numeric 0.00000 10.0000 0.245059 1.14378
# 01-11 08:31:43.155
10.194.118.110:24947 #9803 FJ-1-15 INFO: num_transactions: numeric 1.00000 61.0000 17.2105 8.30895
# 01-11 08:31:43.155
10.194.118.110:24947 #9803 FJ-1-15 INFO: num_payment_methods: numeric 1.00000 6.00000 1.36311 0.650844
# 01-11 08:31:43.155
10.194.118.110:24947 #9803 FJ-1-15 INFO: common_payment_method: numeric 3.00000 41.0000 37.3642 4.08064
# 01-11 08:31:43.155
10.194.118.110:24947 #9803 FJ-1-15 INFO: age2: numeric 1.60944 3.80666 3.28246 0.218947 3
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: Chunk compression summary:
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: Chunk Type Chunk Name Count Count Percentage Size Size Percentage
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C0D Constant double 32 66.667 % 2.5 KB 0.201 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: CBS Binary 1 2.083 % 1.9 KB 0.153 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: CXI Sparse Integers 2 4.167 % 7.5 KB 0.607 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C1 1-Byte Integers 1 2.083 % 14.7 KB 1.183 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C1N 1-Byte Integers (w/o NAs) 5 10.417 % 73.5 KB 5.916 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C1S 1-Byte Fractions 1 2.083 % 14.7 KB 1.184 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C2S 2-Byte Fractions 1 2.083 % 29.3 KB 2.362 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C8 8-byte Integers 1 2.083 % 117.1 KB 9.428 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: CStr Strings 1 2.083 % 716.7 KB 57.722 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: CUD Unique Reals 1 2.083 % 29.6 KB 2.386 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: C8D 64-bit Reals 2 4.167 % 234.1 KB 18.857 %
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: Frame distribution summary:
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: Size Number of Rows Number of Chunks per Column Number of Chunks
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO:
10.194.118.110:24947 1.2 MB 14976 3 48
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: mean 1.2 MB 14976.000000 3.000000 48.000000
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: min 1.2 MB 14976.000000 3.000000 48.000000
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: max 1.2 MB 14976.000000 3.000000 48.000000
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: stddev 0 B 0.000000 0.000000 0.000000
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 INFO: total 1.2 MB 14976 3 48
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 DEBUG: update write-locked parquet_file_parquet.hex_sid_8a5a_1 by job $03010ac2766e7461ffffffff$_ab94c2db132cd017b853b2ab63d24435
# 01-11 08:31:43.168
10.194.118.110:24947 #9803 FJ-1-15 DEBUG: unlock parquet_file_parquet.hex_sid_8a5a_1 by job $03010ac2766e7461ffffffff$_ab94c2db132cd017b853b2ab63d24435
# 01-11 08:31:44.161
10.194.118.110:24947 #9803 #33492-34 INFO: GET /3/Frames/parquet_file_parquet.hex_sid_8a5a_1, parms: {row_count=10}
# 01-11 08:31:47.164
10.194.118.110:24947 #9803 #mCleaner DEBUG: MemGood: preclean, (K/V: 126 B + POJO:38.2 MB + FREE:8.85 GB == MEM_MAX:8.89 GB), desiredKV=5.89 GB NO-OOM
# 01-11 08:31:47.164
10.194.118.110:24947 #9803 #mCleaner DEBUG: H(cached:1M, eldest:1641911501038L < +4ms <...{47ms}...< +6016ms < +6126) DESIRED=6031M dirtysince=7261 force=false clean2age=5000
# 01-11 08:31:47.165
10.194.118.110:24947 #9803 #mCleaner DEBUG: MemGood: postclean, (K/V: 126 B + POJO:38.2 MB + FREE:8.85 GB == MEM_MAX:8.89 GB), desiredKV=5.89 GB NO-OOM
# 01-11 08:31:47.165
10.194.118.110:24947 #9803 #mCleaner DEBUG: Cleaner pass took: 0.001 sec, spilled Zero in 0 usecH(cached:1M, eldest:1641911501042L < +0ms <...{47ms}...< +6016ms < +6123) diski_o=Zero , freed=0M, DESIRED=6031M
# 01-11 08:31:52.166
10.194.118.110:24947 #9803 #mCleaner DEBUG: MemGood: preclean, (K/V: 126 B + POJO:38.2 MB + FREE:8.85 GB == MEM_MAX:8.89 GB), desiredKV=5.90 GB NO-OOM
# 01-11 08:31:52.166
10.194.118.110:24947 #9803 #mCleaner DEBUG: H(cached:1M, eldest:1641911501042L < +0ms <...{86ms}...< +11008ms < +11124) DESIRED=6039M dirtysince=9492 force=false clean2age=5000
# 01-11 08:31:52.166
10.194.118.110:24947 #9803 #mCleaner DEBUG: MemGood: postclean, (K/V: 126 B + POJO:38.2 MB + FREE:8.85 GB == MEM_MAX:8.89 GB), desiredKV=5.90 GB NO-OOM
# 01-11 08:31:52.166
10.194.118.110:24947 #9803 #mCleaner DEBUG: Cleaner pass took: 0.001 sec, spilled Zero in 0 usecH(cached:1M, eldest:1641911501042L < +0ms <...{86ms}...< +11008ms < +11124) diski_o=Zero , freed=0M, DESIRED=6039M
library(data.table)
library(h2o, lib.loc = file.path(.libPaths()[1], "h2o-3.28.1.2")) # incrementing forward one version from 3.28.1.1
h2o.init()
x <- h2o.importFile(path = "s3://path/to/file/parquet_file.parquet")
# > x
# C1 C2
# 1 PAR1Q\004Q<0x80D0>\016Q<0xC8B0>\aLQ<0x80EA> Q\004!
# 2 \a\004 <NA>
# 3 \b <NA>
# 4 \b <NA>
# 5 \b <NA>
# 6 \b <NA>
#
# [14230 rows x 2 columns]
# From h2o.flow
# DEBUG view **** Note the POST /3/Parse, params specifies the parse_type=CSV
# 01-11 09:15:14.449
10.194.118.110:28483 #0344 #33492-29 INFO: GET /3/ImportFiles, parms: {pattern=, path=s3://path/to/file/parquet_file.parquet}
# 01-11 09:15:14.451
10.194.118.110:28483 #0344 #33492-29 INFO: ImportS3 processing (s3://path/to/file/parquet_file.parquet)
# 01-11 09:15:14.743
10.194.118.110:28483 #0344 #33492-29 DEBUG: S3 endpoint specified:
https://obs/path/to/endpoint.com# 01-11 09:15:14.743
10.194.118.110:28483 #0344 #33492-29 DEBUG: S3 path style access enabled
# 01-11 09:15:15.045
10.194.118.110:28483 #0344 #33492-29 DEBUG: write-lock s3://path/to/file/parquet_file.parquet by job null
# 01-11 09:15:15.062
10.194.118.110:28483 #0344 #33492-29 DEBUG: update write-locked s3://path/to/file/parquet_file.parquet by job null
# 01-11 09:15:15.063
10.194.118.110:28483 #0344 #33492-29 DEBUG: unlock s3://path/to/file/parquet_file.parquet by job null
# 01-11 09:15:15.099
10.194.118.110:28483 #0344 #33492-29 INFO: POST /3/ParseSetup, parms: {skipped_columns=[], source_frames=["s3://path/to/file/parquet_file.parquet"], check_header=0}
# 01-11 09:15:15.853
10.194.118.110:28483 #0344 #33492-29 INFO: ParseSetup heuristic: cloudSize: 1, cores: 192, numCols: 2, maxLineLength: 16383, totalSize: 991319, localParseSize: 991319, chunkSize: 163830, numChunks: 6, numChunks * cols: 12
# 01-11 09:15:15.910
10.194.118.110:28483 #0344 #33492-29 INFO: POST /3/Parse, parms: {number_columns=2, source_frames=["s3://path/to/file/parquet_file.parquet"], column_types=["Enum","Enum"], single_quotes=FALSE, parse_type=CSV, destination_frame=parquet_file_parquet.hex_sid_8014_1, column_names=[""], delete_on_done=TRUE, check_header=-1, separator=1, blocking=FALSE, skipped_columns=[], na_strings=[], chunk_size=163830, decrypt_tool=NULL}
# 01-11 09:15:16.048
10.194.118.110:28483 #0344 #e Thread DEBUG: GC CALLBACK: 1641914116048, USED:30.7 MB, CRIT: false
# 01-11 09:15:16.049
10.194.118.110:28483 #0344 #e Thread DEBUG: MemGood: GC CALLBACK, (K/V: 126 B + POJO:30.7 MB + FREE:8.86 GB == MEM_MAX:8.89 GB), desiredKV=5.90 GB NO-OOM
# 01-11 09:15:16.052
10.194.118.110:28483 #0344 #33492-29 INFO: Total file size: 968.1 KB
# 01-11 09:15:16.073
10.194.118.110:28483 #0344 #33492-29 INFO: Parse chunk size 163830
# 01-11 09:15:16.079
10.194.118.110:28483 #0344 #33492-29 DEBUG: write-lock parquet_file_parquet.hex_sid_8014_1 by job $03010ac2766e446fffffffff$_a2686455dbd382667f61804300a94f1b
# 01-11 09:15:16.079
10.194.118.110:28483 #0344 #33492-29 DEBUG: shared-read-lock s3://path/to/file/parquet_file.parquet by job $03010ac2766e446fffffffff$_a2686455dbd382667f61804300a94f1b
# 01-11 09:15:16.332
10.194.118.110:28483 #0344 FJ-2-15 INFO: Key s3://path/to/file/parquet_file.parquet will be parsed using method DistributedParse.
# 01-11 09:15:16.773
10.194.118.110:28483 #0344 FJ-3-99 DEBUG: lock-then-delete s3://path/to/file/parquet_file.parquet by job $03010ac2766e446fffffffff$_a2686455dbd382667f61804300a94f1b
# 01-11 09:15:17.157
10.194.118.110:28483 #0344 FJ-1-15 INFO: Found categoricals with non-UTF-8 characters or NULL character in the 1st column. Converting unrecognized characters into hex: <0xAD>84
# h, @ p <0xB890>, UA!<0xC0>9<0xA4>, 'w&a<0xD3>( @, <0xA8>m<0xAA>!<0xB2>* !EhUv # 5<0x96>, <0xB0>, <0xB2>
# , <0xA8>NJpIgpOqJ62SFYzprBFfnCLtrp4+xprioky0cboz5sI `<0xA8>q8afzBt+7oqmpK7/fpznxYKBYpFwJWbvgQV0WpQ10m0 0<0xA0>GVHGNSIPc+U+g6VuYgvy37XnG9CRKR2Wtukwwj4RN¡<0xB0>ñ<0xA8>SUf7FhWzPUuHwP+EDUvnICiMPrgv4IyDPga85gBTEWc `<0xA8>j23m1mbbbGlfHZZQNAd+iRFRcA4JUiI6p86IxhUXH8E 0<0xA0>PFxBO8i1WN3rI3JgZRHbm5cg5u0mTb/qpp0/ct6T2¡<0xC0>-<0xA8>AsVn82nJW85A+baRkMgJG+ASgavX8dJTQ3UiW+gIoUU `<0xA0>fKl/cWvQlU/IAAnH3k302fr96HJ8SsPhkTtKOqF9Di0<0xA8>9qqfaUUbZFGAFJFjwcIFkB+8SUlQEPaWpLDJaBcjpdI `<0xA0>0nsJ2bIhg5nw5qdiJsnufSy0locv+EtOsragknp0O¡<0x80>^<0xA8>MI5abbyRlW7BRRHOzXFklSRBHQ1FS+j+doLz8/GPTC8 `<0xA0>KYVUFFy0q5xp9goCWgmg26yxRxrOwupdaUXUyF/4S¡0x<0xA0>K5pQIecSrsN3DMJA17ZkaLU2bOuL5dM7hi1Y46o0H¡pG<0xA8>377OnBOaML06gSTrejJ9IyoT4zG585qrdpRNH7U+Kgg <0x90A0>24+ikkIhsBgupUsEPOdTbyK01p3OjjHGGBEjr0+BH<0xADA0A8>Ga/yKzJ80Bck5nbLQd01RVeB2Y7TrTWjpLMvnikl1xU `<0xA8>CrVHQiJfE+/WbiKTkZV2V9b1qNQFYHtmPkokimffnvc 0<0xA8>4EeVCYYQriV17UdqHKtkPmrh7sTEXiKE3MFplh3uhMM 0<0xA8>bGlrbF9TMYjZZPcvJdmv5tMjY30J3wDjVT7sW60IoLk 0dYJK6rnm8KYJky+Str1+yGq78iB <0xC7>±0GNQIHf35FV7aI 0<0xA8>kLYm/Uev1wxs5B+w4ZAU0TrOJ5a0o83O3nJf+W1NFV4 0<0xA0>s9dFzA5pGgisO1cOZ9c6My721O9O4DPCY8vAgac5W¡<0x8082A0>avyzIcU8nJYuAphjY3IrhDvYjddYdJATZrPzUjclDá<0xD0> <0xA0>fF5UhqC6LKOBHSDqcPjhI5N8eyJmtAM87XvOs6AAya a<0xA8>saqSgR8Yzk6Ys86hJcXwzEj8gWhrGdrOqkJT1e7VtGM <0xC0A8>yXgF5CnWY8P8+47G2zQZjh96ONGmcX0AoezlFWLRbyc 0<0xA0>ObqwxDHjuAifDuZ25hee0MViBceuH4Tuwu/4SGsSj¡`á<0xA8>9/OxmiKIceqgdxX1VOFqIuu256St/oZdUx41qIieNI4 `<0xA0>StlGl0CKDgIA6fZMpGgkhj7FF3yp+hzCPTkV5S6Bg¡<0xB0>a<0xA8>Qiga0i6KHCF1cwsR5X7j5iN7ACowzU+/lofLQfSFekY `<0xA0>n2706L9g53tPP0oClx+c93qWfIXbT0XlWWGgb7Txk¡P, M<0x88><0xC4>¡8Q, - à W <0xA3>-<0xA0> dÑ , ...
# 01-11 09:15:17.214
10.194.118.110:28483 #0344 FJ-1-15 INFO: Found categoricals with non-UTF-8 characters or NULL character in the 2nd column. Converting unrecognized characters into hex: (La,<0xC3C3>@<0xD0>p(, <0xE4A1>Kq<0xD8> 0 <0xF0>¡A2, * E )P <0xCE> &<0x96>+<0xC1BA>*l+ <0xC9>uH!^aK 8!<0xB2A1>{, <0xC4>7U<0x84C1>X} ar
# <0xB6>) 5<0xEAA5>@Qb<0xA9A2>Q<0xAE> à "<0xA2>1 <0x8CCD> F E MvE<0xD8>5 %<0xDC>U<0xD8>!<0x96> Q8<0xE5>à Qb<0xC9>J1<0xCEA1>Y9<0x88>%&u<0x9CC5> 54!<0x8A>‘U>e<0xF0>5<0xB2> <0xC4>.8, <0x9C>@ <0x80>E<0x8F> , <0xA5> <0xA8>, 6D3<0x8982> A$X !, _*<0x9C>&<0xA1>u‘<0xFCE1>‘<0xB9>x b.T&<0xCA>á!<0xEA>= <0xA1>Y<0xB9A2>a mr F}:E<0xAE>Q<0xD2>%<0xEA>Q*!& <0x95>|<0xA9>a1<0x88>a<0xE6> <0xED>b <0xC4>&
# 01-11 09:15:17.214
10.194.118.110:28483 #0344 FJ-1-15 INFO: !<0x96BDF6>E<0xBC>Qb!> <0xF1E0>%<0xAF>Q<0xEE> F<0xF9>à C*<0xA6> !<0xC0> u,<0x89>¡M<0xE6> pQ <0xA1>a9 AL]Z<0x85DE>Q<0xE0>e<0x80>Uh!P "<0xC6>
# 01-11 09:15:17.214
10.194.118.110:28483 #0344 FJ-1-15 INFO: EZQ<0xE0>!<0x86>¡q *<A Tu<0x80898A>áx CQ&> an -<0xF8E9>p<0x95B4E1F9>Q<0xC4>!l&H AAa<0xBB>I0 b&T*!&, <0x81>H<D <0xD0>C @, <0x9B80>, ...
# 01-11 09:15:17.238
10.194.118.110:28483 #0344 FJ-1-15 DEBUG: update write-locked parquet_file_parquet.hex_sid_8014_1 by job $03010ac2766e446fffffffff$_a2686455dbd382667f61804300a94f1b
# 01-11 09:15:17.280
10.194.118.110:28483 #0344 FJ-1-15 INFO: Parse result for parquet_file_parquet.hex_sid_8014_1 (14230 rows, 2 columns):
# 01-11 09:15:17.295
10.194.118.110:28483 #0344 FJ-1-15 INFO: ColV2 type min max mean sigma NAs constant cardinality
# 01-11 09:15:17.296
10.194.118.110:28483 #0344 FJ-1-15 INFO: C1: factor 񃋳 @X& P AHa 346 7120
# 01-11 09:15:17.296
10.194.118.110:28483 #0344 FJ-1-15 INFO: C2: factor Q Q<0x8 ̡ ‘4 12535 1613
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: Chunk compression summary:
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: Chunk Type Chunk Name Count Count Percentage Size Size Percentage
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: CXI Sparse Integers 4 28.571 % 532 B 1.245 %
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: C2 2-Byte Integers 10 71.429 % 41.2 KB 98.755 %
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: Frame distribution summary:
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: Size Number of Rows Number of Chunks per Column Number of Chunks
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO:
10.194.118.110:28483 41.7 KB 14230 7 14
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: mean 41.7 KB 14230.000000 7.000000 14.000000
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: min 41.7 KB 14230.000000 7.000000 14.000000
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: max 41.7 KB 14230.000000 7.000000 14.000000
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: stddev 0 B 0.000000 0.000000 0.000000
# 01-11 09:15:17.315
10.194.118.110:28483 #0344 FJ-1-15 INFO: total 41.7 KB 14230 7 14
# 01-11 09:15:17.316
10.194.118.110:28483 #0344 FJ-1-15 DEBUG: update write-locked parquet_file_parquet.hex_sid_8014_1 by job $03010ac2766e446fffffffff$_a2686455dbd382667f61804300a94f1b
# 01-11 09:15:17.316
10.194.118.110:28483 #0344 FJ-1-15 DEBUG: unlock parquet_file_parquet.hex_sid_8014_1 by job $03010ac2766e446fffffffff$_a2686455dbd382667f61804300a94f1b
# 01-11 09:15:18.223
10.194.118.110:28483 #0344 #33492-29 INFO: GET /3/Frames/parquet_file_parquet.hex_sid_8014_1, parms: {row_count=10}
# 01-11 09:15:23.870
10.194.118.110:28483 #0344 #mCleaner DEBUG: MemGood: preclean, (K/V: 126 B + POJO:30.7 MB + FREE:8.86 GB == MEM_MAX:8.89 GB), desiredKV=5.90 GB NO-OOM
# 01-11 09:15:23.871
10.194.118.110:28483 #0344 #mCleaner DEBUG: H(cached:2M, eldest:1641914116085L < +0ms <...{60ms}...< +7680ms < +7786) DESIRED=6044M dirtysince=8814 force=false clean2age=5000
# 01-11 09:15:23.872
10.194.118.110:28483 #0344 #mCleaner DEBUG: MemGood: postclean, (K/V: 126 B + POJO:30.7 MB + FREE:8.86 GB == MEM_MAX:8.89 GB), desiredKV=5.90 GB NO-OOM
# 01-11 09:15:23.872
10.194.118.110:28483 #0344 #mCleaner DEBUG: Cleaner pass took: 0.001 sec, spilled Zero in 0 usecH(cached:2M, eldest:1641914116085L < +0ms <...{60ms}...< +7680ms < +7787) diski_o=Zero , freed=0M, DESIRED=6044M