# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Job properties job.name=SFTP_example_job job.group=SFTPExampleJobs job.description=An example config file for SFTP jobs # Source properties source.class=gobblin.source.extractor.extract.sftp.SftpSource # The SftpSource class will look for data on the SFTP server under this directory source.filebased.data.directory=/home/user1/airline # The SftpSource class will performs an "ls {source.filebased.data.directory}/*{source.entity}*" in order to determine what files to download source.entity=200 # SftpSource assumes that all data read is in this schema #source.schema=[{"columnName":"col1","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"col2","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"col3`","comment":"","isNullable":"true","dataType":{"type":"string"}}] source.schema=[{"columnName":"month","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"dayofmonth","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"dayofweek","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"deptime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"crsdeptime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"arrtime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"crsarrtime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"uniquecarrier","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"flightnum","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"tailnum","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"actualelapsedtime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"crselapsedtime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"airtime","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"arrdelay","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"depdelay","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"origin","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"dest","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"distance","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"taxiin","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"taxiout","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"cancelled","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"cancellationcode","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"diverted","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"carrierdelay","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"weatherdelay","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"nasdelay","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"securitydelay","comment":"","isNullable":"true","dataType":{"type":"string"}},{"columnName":"lateaircraftdelay","comment":"","isNullable":"true","dataType":{"type":"string"}}] # Extract properties extract.table.name=Example extract.namespace=com.linkedin.example.sftp extract.table.type=append_only # Converter properties # By specifying these converters, Gobblin will take each record returned by SftpSource, split each record by some delimiter, and then convert the record to an Avro record converter.classes=gobblin.converter.csv.CsvToJsonConverter,gobblin.converter.avro.JsonIntermediateToAvroConverter converter.csv.to.json.delimiter=, # Source connection properties # The current implementation of SftpSource only allows Gobblin to authenticate to the Sftp Server via public-key authentication. This requires a private key file and a known hosts file #source.conn.private.key=/user2/ssh_key/id_rsa_2048_a source.conn.private.key=hdfs://localhost:8020/tmp/sftp_staging/user2/id_rsa_2048_a source.conn.known.hosts=/tmp/sftp_staging/hosts # The host and port to connect to, along with the username to connect with source.filebased.fs.uri=sftp:///:21 source.conn.username=user1 source.conn.password=password #source.conn.username=user2 source.conn.host=vm