Below are the logs for a problematic instance (Upgraded on the 30th of June, restarted on the 7th):
level=warn ts=2020-07-07T16:21:32.123Z caller=main.go:291 deprecation_notice="'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead."
level=info ts=2020-07-07T16:21:32.124Z caller=main.go:337 msg="Starting Prometheus" version="(version=2.19.2, branch=HEAD, revision=c448ada63d83002e9c1d2c9f84e09f55a61f0ff7)"
level=info ts=2020-07-07T16:21:32.124Z caller=main.go:338 build_context="(go=go1.14.4, user=root@dd72efe1549d, date=20200626-09:02:20)"
level=info ts=2020-07-07T16:21:32.124Z caller=main.go:339 host_details="(Linux 4.15.0-45-generic #48-Ubuntu SMP Tue Jan 29 16:28:13 UTC 2019 x86_64 xqpao-prometheus-0 (none))"
level=info ts=2020-07-07T16:21:32.124Z caller=main.go:340 fd_limits="(soft=1048576, hard=1048576)"
level=info ts=2020-07-07T16:21:32.124Z caller=main.go:341 vm_limits="(soft=unlimited, hard=unlimited)"
level=info ts=2020-07-07T16:21:32.156Z caller=web.go:524 component=web msg="Start listening for connections" address=0.0.0.0:9090 level=info ts=2020-07-07T16:21:32.156Z caller=main.go:678 msg="Starting TSDB ..."
level=info ts=2020-07-07T16:21:32.242Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593432000000 maxt=1593439200000 ulid=01EC09G6FSY1306PKGCFCE3JFS
level=info ts=2020-07-07T16:21:32.250Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593439200000 maxt=1593446400000 ulid=01EC0GHPQBGQXD5WEYKXBDRDHR
level=info ts=2020-07-07T16:21:32.258Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593446400000 maxt=1593453600000 ulid=01EC0Q7MS6A1VFBJDSMTNE2NMG
level=info ts=2020-07-07T16:21:32.270Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593453600000 maxt=1593460800000 ulid=01EC0Y37H73PT3MXAY7PKH321K
level=info ts=2020-07-07T16:21:32.274Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593460800000 maxt=1593468000000 ulid=01EC14YZ1CBPN7CCC883Z0XAK2
level=info ts=2020-07-07T16:21:32.284Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593468000000 maxt=1593475200000 ulid=01EC1BTP183KP294JWSF0NTZVS
level=info ts=2020-07-07T16:21:32.288Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593475200000 maxt=1593482400000 ulid=01EC1JPDB1YPPSM5SR8EAHV33F
level=info ts=2020-07-07T16:21:32.291Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593482400000 maxt=1593489600000 ulid=01EC1SJ4Q8RFVXN9Q7C6A7E9KX
level=info ts=2020-07-07T16:21:32.294Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593489600000 maxt=1593496800000 ulid=01EC20DW1HD73GFRT26XWW5ZXC
level=info ts=2020-07-07T16:21:32.299Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593496800000 maxt=1593504000000 ulid=01EC279K6PYTEZJM7HT9Q7N7A9
level=info ts=2020-07-07T16:21:32.306Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593504000000 maxt=1593511200000 ulid=01EC2E5AN5541YBED244H0DTFX
level=info ts=2020-07-07T16:21:32.309Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593511200000 maxt=1593518400000 ulid=01EC2N11R7PPCQ4KW2TMD6AMA5
level=info ts=2020-07-07T16:21:32.312Z caller=repair.go:59 component=tsdb msg="Found healthy block" mint=1593518400000 maxt=1593525600000 ulid=01EC2VWRXDYE08Z1RWBGAZFZW2
level=info ts=2020-07-07T16:21:33.140Z caller=head.go:645 component=tsdb msg="Replaying WAL and on-disk memory mappable chunks if any, this may take a while"
level=info ts=2020-07-07T16:21:41.134Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=0 maxSegment=5
level=info ts=2020-07-07T16:21:47.454Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=1 maxSegment=5
level=info ts=2020-07-07T16:21:49.657Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=2 maxSegment=5
level=info ts=2020-07-07T16:21:49.667Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=3 maxSegment=5
level=info ts=2020-07-07T16:21:51.903Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=4 maxSegment=5
level=info ts=2020-07-07T16:21:51.916Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=5 maxSegment=5
level=info ts=2020-07-07T16:21:51.917Z caller=head.go:709 component=tsdb msg="WAL replay completed" duration=18.776595339s
level=info ts=2020-07-07T16:21:52.033Z caller=main.go:694 fs_type=65735546
level=info ts=2020-07-07T16:21:52.033Z caller=main.go:695 msg="TSDB started"
level=info ts=2020-07-07T16:21:52.033Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-07T16:21:52.040Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-07T16:21:52.040Z caller=main.go:646 msg="Server is ready to receive web requests."
level=info ts=2020-07-07T16:21:52.040Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-07T16:21:52.054Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-08T08:28:37.860Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-08T08:28:37.879Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-08T08:28:38.201Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-08T08:28:38.211Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T08:39:45.299Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T08:39:45.310Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T08:39:52.055Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T08:39:52.125Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T09:42:14.002Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T09:42:14.096Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T09:42:52.055Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-09T09:42:52.089Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T11:00:37.902Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T11:00:37.919Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T11:00:52.059Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T11:00:52.084Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T12:00:02.900Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T12:00:02.916Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T12:00:52.055Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-22T12:00:52.071Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T13:06:03.004Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T13:06:03.023Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T13:06:52.054Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T13:06:52.068Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T14:06:27.900Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T14:06:27.911Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T14:06:52.059Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-24T14:06:52.074Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-25T08:14:57.000Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-25T08:14:57.040Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-25T08:15:52.055Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-25T08:15:52.070Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-29T09:08:34.400Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-29T09:08:34.411Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-29T09:09:52.054Z caller=main.go:799 msg="Loading configuration file" filename=/etc/prometheus-shared/prometheus.yaml
level=info ts=2020-07-29T09:09:52.071Z caller=main.go:827 msg="Completed loading of configuration file" filename=/etc/prometheus-shared/prometheus.yaml
The configuration reloads are based on an in-house config management tool we wrote to add/remove scrape targets. It works fine and has never given us issues in the past.
And then if I exec into the Prometheus container it looks like this at the moment:
/data $ ls -la
total 97
drwxrwsr-x 20 root 2027 4096 Jul 29 14:50 .
drwxr-xr-x 1 root root 4096 Jul 7 16:21 ..
drwxr-sr-x 3 nobody 2027 4096 Jun 29 15:00 01EC09G6FSY1306PKGCFCE3JFS
drwxr-sr-x 3 nobody 2027 4096 Jun 29 16:59 01EC0GHPQBGQXD5WEYKXBDRDHR
drwxr-sr-x 3 nobody 2027 4096 Jun 29 19:00 01EC0Q7MS6A1VFBJDSMTNE2NMG
drwxr-sr-x 3 nobody 2027 4096 Jun 29 20:56 01EC0Y37H73PT3MXAY7PKH321K
drwxr-sr-x 3 nobody 2027 4096 Jun 29 23:00 01EC14YZ1CBPN7CCC883Z0XAK2
drwxr-sr-x 3 nobody 2027 4096 Jun 30 01:00 01EC1BTP183KP294JWSF0NTZVS
drwxr-sr-x 3 nobody 2027 4096 Jun 30 03:00 01EC1JPDB1YPPSM5SR8EAHV33F
drwxr-sr-x 3 nobody 2027 4096 Jun 30 05:00 01EC1SJ4Q8RFVXN9Q7C6A7E9KX
drwxr-sr-x 3 nobody 2027 4096 Jun 30 07:00 01EC20DW1HD73GFRT26XWW5ZXC
drwxr-sr-x 3 nobody 2027 4096 Jun 30 09:00 01EC279K6PYTEZJM7HT9Q7N7A9
drwxr-sr-x 3 nobody 2027 4096 Jun 30 11:00 01EC2E5AN5541YBED244H0DTFX
drwxr-sr-x 3 nobody 2027 4096 Jun 30 13:00 01EC2N11R7PPCQ4KW2TMD6AMA5
drwxr-sr-x 3 nobody 2027 4096 Jun 30 15:00 01EC2VWRXDYE08Z1RWBGAZFZW2
drwxr-sr-x 3 nobody 2027 4096 Jul 7 16:22 01ECN1B8E64NGD2APJE303YHAM.tmp
drwxr-sr-x 2 nobody 2027 4096 Jul 7 16:21 chunks_head
-rw-r--r-- 1 nobody 2027 0 Apr 16 14:17 lock
-rw-r--r-- 1 nobody 2027 20001 Jul 29 14:15 queries.active
drwxr-sr-x 3 root 2027 4096 Apr 16 17:27 thanos
-rw-r--r-- 1 root 2027 452 Jul 29 14:50 thanos.shipper.json
drwxr-sr-x 2 nobody 2027 4096 Jul 23 14:05 wal
Here are the contents of the .tmp block
/data/01ECN1B8E64NGD2APJE303YHAM.tmp $ ls -la
total 175
drwxr-sr-x 3 nobody 2027 4096 Jul 7 16:22 .
drwxrwsr-x 20 root 2027 4096 Jul 29 14:55 ..
drwxr-sr-x 2 nobody 2027 4096 Jul 7 16:22 chunks
-rw-r--r-- 1 nobody 2027 166448 Jul 7 16:22 index
-rw-r--r-- 1 nobody 2027 0 Jul 7 16:22 index_tmp_p
-rw-r--r-- 1 nobody 2027 0 Jul 7 16:22 index_tmp_po
/data/01ECN1B8E64NGD2APJE303YHAM.tmp $ ls -la chunks/
total 158
drwxr-sr-x 2 nobody 2027 4096 Jul 7 16:22 .
drwxr-sr-x 3 nobody 2027 4096 Jul 7 16:22 ..
-rw-r--r-- 1 nobody 2027 152909 Jul 7 16:22 000001
Here are the contents of the chunks_head
/data $ ls -la chunks_head/
total 100953
drwxr-sr-x 2 nobody 2027 4096 Jul 7 16:21 .
drwxrwsr-x 20 root 2027 4096 Jul 29 14:58 ..
-rw-r--r-- 1 nobody 2027 237641 Jun 30 16:46 000001
-rw-r--r-- 1 nobody 2027 23437912 Jul 7 16:20 000002
-rw-r--r-- 1 nobody 2027 79690827 Jul 28 22:00 000003
Here are the contents of the WAL
/data/wal $ ls -la
total 549732
drwxr-sr-x 2 nobody 2027 4096 Jul 23 14:05 .
drwxrwsr-x 20 root 2027 4096 Jul 29 14:54 ..
-rw-r--r-- 1 nobody 2027 134217728 Jun 8 16:09 00000000
-rw-r--r-- 1 nobody 2027 134217728 Jun 24 16:27 00000001
-rw-r--r-- 1 nobody 2027 50593792 Jun 30 16:45 00000002
-rw-r--r-- 1 nobody 2027 0 Jun 30 16:46 00000003
-rw-r--r-- 1 nobody 2027 55246848 Jul 7 16:20 00000004
-rw-r--r-- 1 nobody 2027 134217728 Jul 23 14:05 00000005
-rw-r--r-- 1 nobody 2027 54423530 Jul 29 14:54 00000006