I'm unable to stream nginx ingress parsed logs into the Google bigQuery.
**** Fluent-bit configuration file.
apiVersion: v1
kind: ConfigMap
metadata:
name: fluent-bit-config
namespace: logging
labels:
k8s-app: fluent-bit
data:
# Configuration files: server, input, filters, and output
# =======================================================
fluent-bit.conf: |
[SERVICE]
Flush 1
Log_Level default
Daemon off
Parsers_File parsers.conf
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port 2020
# Plugins_File /fluent-bit/etc/plugins.conf
@INCLUDE input-kubernetes.conf
@INCLUDE filter-kubernetes.conf
@INCLUDE output-bigquery.conf
input-kubernetes.conf: |
[INPUT]
Name tail
Path /var/log/containers/nginx-ingress*.log
Tag kube.*
Parser nginx
DB /var/log/flb_kube.db
Mem_Buf_Limit 50MB
Skip_Long_Lines On
Refresh_Interval 10
# Control the log line length
Buffer_Chunk_Size 256k
Buffer_Max_Size 10240k
# Using the docker mode to deal with multiline messages emitted by docker
Docker_Mode On
replace_info.lua: |
function replace_sensitive_info(tag, timestamp, record)
-- mask social security number
record["log"] = string.gsub(record["log"], "%d%d%d%-*%d%d%-*%d%d%d%d", "xxx-xx-xxxx")
-- mask credit card number
record["log"] = string.gsub(record["log"], "%d%d%d%d *%d%d%d%d *%d%d%d%d *%d%d%d%d", "xxxx xxxx xxxx xxxx")
-- mask email address
record["log"] = string.gsub(record["log"], "[%w+%.%-_]+@[%w+%.%-_]+%.%a%a+", "us...@email.tld")
return 1, timestamp, record
end
filter-kubernetes.conf: |
[FILTER]
Name kubernetes
Match kube.*
Kube_Tag_Prefix kube.var.log.containers.
# Try to merge the log messages
Merge_Log On
Merge_Log_Key log_processed
K8S-Logging.Parser On
#K8S-Logging.Exclude On
# ### sample log scrubbing filters
#[FILTER]
# Name lua
# Match kube.*
# # lua script to redact sensitive data in log messages
# script replace_info.lua
# call replace_sensitive_info
# ### end sample log scrubbing
output-bigquery.conf: |
[OUTPUT]
# write the log records that still have the 'kube.*' tags.
Name bigquery
Match kube.*
# The following fields are necessary. They allow filtering.
# based on resource types. Change them accordingly based on your setup.
google_service_credentials /etc/bigquery-volume/bigquery.json
project_id a8platformdev
dataset_id fluentbit
table_id nginx-ingress
fetch_schema true
parsers.conf: |
[PARSER]
Name k8s-nginx-ingress
Format regex
Regex ^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name nginx
Format regex
Regex ^(?<httpRequest>(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")? (?<request_time>[^ ]*) (?<upstream_time>[^ ]*) (?<pipe>[^ ]*))$
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name json
Format json
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name docker
Format json
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L
Time_Keep On
Decode_Field_As escaped log
[PARSER]
Name syslog
Format regex
Regex ^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$
Time_Key time
Time_Format %b %d %H:%M:%S
**** Nginx Log Formate (Nginx.conf).
log_format json_combined escape=json '{ "httpRequest": {'
'"requestMethod": "$request_method", '
'"requestUrl": "$request_uri", '
'"responseSize": "$bytes_sent", '
'"status": "$status", '
'"userAgent": "$http_user_agent", '
'"remoteIp": "$remote_addr", '
'"referer": "$http_referer", '
'"host": "$host", '
'"requestTime": "$request_time", '
'"upstreamResponseTime": "$upstream_response_time" }, '
'"time": "$time_local" }';
access_log /var/log/nginx/access.log json_combined;
**** BigQuery table schema.