Statefulset pods stuck 1/2 state

199 views
Skip to first unread message

Eren Cankurtaran

unread,
May 27, 2021, 5:25:08 AM5/27/21
to K8ssandra Users
Hi, I've deployed K8ssandra but I've encountered like this situation. 
k8ssandra-dc1-default-sts-* pods hangs 1/2 state. 

[root@node1 ~]# kubectl get pods
NAME                                                  READY   STATUS     RESTARTS   AGE
k8ssandra-cass-operator-766b945f65-ntb9s              1/1     Running    0          24m
k8ssandra-dc1-default-sts-0                           1/2     Running    0          24m
k8ssandra-dc1-default-sts-1                           1/2     Running    0          24m
k8ssandra-dc1-default-sts-2                           1/2     Running    0          24m
k8ssandra-dc1-default-sts-3                           1/2     Running    1          24m
k8ssandra-dc1-stargate-7d79856946-qjjl7               0/1     Init:0/1   0          24m
k8ssandra-grafana-dfdb5cc5c-4zq4n                     2/2     Running    0          24m
k8ssandra-kube-prometheus-operator-7dcccdcc86-tv7qc   1/1     Running    0          24m
k8ssandra-reaper-operator-566cdc787-nz5mf             1/1     Running    0          24m
prometheus-k8ssandra-kube-prometheus-prometheus-0     2/2     Running    1          24m


[root@node1 ~]# helm get values k8ssandra
helm values: 
[root@node1 ~]# helm get values k8ssandra
USER-SUPPLIED VALUES:
cassandra:
  allowMultipleNodesPerWorker: false
  cassandraLibDirVolume:
    size: 5Gi
    storageClass: rook-ceph-block
  datacenters:
  - name: dc1
    racks:
    - name: default
    size: 4
  enabled: true
  heap:
    newGenSize: 24G
    size: 24G
  resources:
    limits:
      cpu: 3000m
      memory: 24Gi
    requests:
      cpu: 3000m
      memory: 24Gi
  version: 3.11.10
kube-prometheus-stack:
  grafana:
    adminPassword: admin123
    adminUser: admin
stargate:
  cpuLimMillicores: 1000
  cpuReqMillicores: 200
  enabled: true
  heapMB: 1024
  replicas: 1

[root@node1 ~]# kubectl logs k8ssandra-dc1-default-sts-0 -c cassandra

INFO  [nioEventLoopGroup-2-2] 2021-05-27 09:21:52,340 Cli.java:617 - address=/10.233.96.0:34922 url=/api/v0/probes/readiness status=500 Internal Server Error
INFO  [nioEventLoopGroup-2-1] 2021-05-27 09:22:01,047 Cli.java:617 - address=/10.233.96.0:34954 url=/api/v0/probes/liveness status=200 OK
INFO  [epollEventLoopGroup-170-1] 2021-05-27 09:22:02,337 Clock.java:47 - Using native clock for microsecond precision
WARN  [epollEventLoopGroup-170-2] 2021-05-27 09:22:02,338 AbstractBootstrap.java:452 - Unknown channel option 'TCP_NODELAY' for channel '[id: 0xa37e9fbc]'
WARN  [epollEventLoopGroup-170-2] 2021-05-27 09:22:02,339 Loggers.java:39 - [s165] Error connecting to Node(endPoint=/tmp/cassandra.sock, hostId=null, hashCode=762c7772), trying next node (FileNotFoundException: null)

[root@node1 ~]# kubectl get pvc
NAME                                      STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS      AGE
server-data-k8ssandra-dc1-default-sts-0   Bound    pvc-3e796c50-1dc0-4b10-a02c-94e83def42dd   5Gi        RWO            rook-ceph-block   32m
server-data-k8ssandra-dc1-default-sts-1   Bound    pvc-27ecb64f-97f4-401b-944d-161650784be0   5Gi        RWO            rook-ceph-block   32m
server-data-k8ssandra-dc1-default-sts-2   Bound    pvc-174c6237-c386-401e-8551-a1d39e266838   5Gi        RWO            rook-ceph-block   32m
server-data-k8ssandra-dc1-default-sts-3   Bound    pvc-5d0fa6fd-e7c9-459c-91c9-8226d363536e   5Gi        RWO            rook-ceph-block   32m

[root@node1 ~]# kubectl describe pod k8ssandra-dc1-default-sts-0
Name:         k8ssandra-dc1-default-sts-0
Namespace:    k8ssandra
Priority:     0
Node:         node7/172.16.11.183
Start Time:   Thu, 27 May 2021 11:51:35 +0300
              controller-revision-hash=k8ssandra-dc1-default-sts-865d88bd4
Annotations:  <none>
Status:       Running
IP:           10.233.96.6
IPs:
  IP:           10.233.96.6
Controlled By:  StatefulSet/k8ssandra-dc1-default-sts
Init Containers:
  base-config-init:
    Container ID:  docker://752e5e85c3cdde14d850998552809d3e98a85c2dfa647cb608034b6a180b1e83
    Image:         k8ssandra/cass-management-api:3.11.10-v0.1.25
    Image ID:      docker-pullable://k8ssandra/cass-management-api@sha256:ef5e007d37b57d905c706c1221c96228c4387abb8a96f994af8aae3423dc9f2a
    Port:          <none>
    Host Port:     <none>
    Command:
      /bin/sh
    Args:
      -c
      cp -r /etc/cassandra/* /cassandra-base-config/
    State:          Terminated
      Reason:       Completed
      Exit Code:    0
      Started:      Thu, 27 May 2021 11:53:52 +0300
      Finished:     Thu, 27 May 2021 11:53:52 +0300
    Ready:          True
    Restart Count:  0
    Environment:    <none>
    Mounts:
      /cassandra-base-config/ from cassandra-config (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
  server-config-init:
    Container ID:   docker://683835e66c8a9b4fd42900e0cc7f7b6930254bb042aab3e326b8b047f3665b63
    Image ID:       docker-pullable://datastax/cass-config-builder@sha256:0cfa1f1270f1c211ae4ac8eb690dd9e909cf690126e5ed5ddb08bba78902d1a1
    Port:           <none>
    Host Port:      <none>
    State:          Terminated
      Reason:       Completed
      Exit Code:    0
      Started:      Thu, 27 May 2021 11:53:59 +0300
      Finished:     Thu, 27 May 2021 11:54:01 +0300
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     1
      memory:  256M
    Requests:
      cpu:     1
      memory:  256M
    Environment:
      POD_IP:                      (v1:status.podIP)
      HOST_IP:                     (v1:status.hostIP)
      USE_HOST_IP_FOR_BROADCAST:  false
      RACK_NAME:                  default
      PRODUCT_VERSION:            3.11.10
      PRODUCT_NAME:               cassandra
      DSE_VERSION:                3.11.10
      CONFIG_FILE_DATA:           {"cassandra-yaml":{"authenticator":"PasswordAuthenticator","authorizer":"CassandraAuthorizer","credentials_update_interval_in_ms":3600000,"credentials_validity_in_ms":3600000,"num_tokens":256,"permissions_update_interval_in_ms":3600000,"permissions_validity_in_ms":3600000,"role_manager":"CassandraRoleManager","roles_update_interval_in_ms":3600000,"roles_validity_in_ms":3600000},"cluster-info":{"name":"k8ssandra","seeds":"k8ssandra-seed-service"},"datacenter-info":{"graph-enabled":0,"name":"dc1","solr-enabled":0,"spark-enabled":0},"jvm-options":{"additional-jvm-opts":["-Dcassandra.system_distributed_replication_dc_names=dc1","-Dcassandra.system_distributed_replication_per_dc=4"],"heap_size_young_generation":"24G","initial_heap_size":"24G","max_heap_size":"24G"}}
    Mounts:
      /config from server-config (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
  jmx-credentials:
    Container ID:  docker://d152b98f82f2d628069b567622e7a32168169195ef9ab22b59591af37138d5cb
    Image:         busybox
    Image ID:      docker-pullable://busybox@sha256:b5fc1d7b2e4ea86a06b0cf88de915a2c43a99a00b6b3c0af731e5f4c07ae8eff
    Port:          <none>
    Host Port:     <none>
    Args:
      /bin/sh
      -c
      echo "$REAPER_JMX_USERNAME $REAPER_JMX_PASSWORD" > /config/jmxremote.password && echo "$SUPERUSER_JMX_USERNAME $SUPERUSER_JMX_PASSWORD" >> /config/jmxremote.password
    State:          Terminated
      Reason:       Completed
      Exit Code:    0
      Started:      Thu, 27 May 2021 11:54:02 +0300
      Finished:     Thu, 27 May 2021 11:54:02 +0300
    Ready:          True
    Restart Count:  0
    Environment:
      REAPER_JMX_USERNAME:     <set to the key 'username' in secret 'k8ssandra-reaper-jmx'>  Optional: false
      REAPER_JMX_PASSWORD:     <set to the key 'password' in secret 'k8ssandra-reaper-jmx'>  Optional: false
      SUPERUSER_JMX_USERNAME:  <set to the key 'username' in secret 'k8ssandra-superuser'>   Optional: false
      SUPERUSER_JMX_PASSWORD:  <set to the key 'password' in secret 'k8ssandra-superuser'>   Optional: false
    Mounts:
      /config from server-config (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
Containers:
  cassandra:
    Container ID:   docker://217d76c7eb3153e77000da0043bfa31b4b45f1500002f9c8aac8a8e8ab94731d
    Image:          k8ssandra/cass-management-api:3.11.10-v0.1.25
    Image ID:       docker-pullable://k8ssandra/cass-management-api@sha256:ef5e007d37b57d905c706c1221c96228c4387abb8a96f994af8aae3423dc9f2a
    Ports:          9042/TCP, 9142/TCP, 7000/TCP, 7001/TCP, 7199/TCP, 8080/TCP, 9103/TCP, 9160/TCP
    Host Ports:     0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP
    State:          Running
      Started:      Thu, 27 May 2021 11:54:03 +0300
    Ready:          False
    Restart Count:  0
    Limits:
      cpu:     3
      memory:  24Gi
    Requests:
      cpu:      3
      memory:   24Gi
    Liveness:   http-get http://:8080/api/v0/probes/liveness delay=15s timeout=1s period=15s #success=1 #failure=3
    Readiness:  http-get http://:8080/api/v0/probes/readiness delay=20s timeout=1s period=10s #success=1 #failure=3
    Environment:
      LOCAL_JMX:                no
      DS_LICENSE:               accept
      DSE_AUTO_CONF_OFF:        all
      USE_MGMT_API:             true
      MGMT_API_EXPLICIT_START:  true
      DSE_MGMT_EXPLICIT_START:  true
    Mounts:
      /config from server-config (rw)
      /etc/encryption/ from encryption-cred-storage (rw)
      /var/lib/cassandra from server-data (rw)
      /var/log/cassandra from server-logs (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
  server-system-logger:
    Container ID:   docker://23b2501e7c93b5923c43d5f596ea9d9ce268f835ba0181b25364a01cc37c8c0a
    Image:          k8ssandra/system-logger:9c4c3692
    Image ID:       docker-pullable://k8ssandra/system-logger@sha256:6208a1e3d710d022c9e922c8466fe7d76ca206f97bf92902ff5327114696f8b1
    Port:           <none>
    Host Port:      <none>
    State:          Running
      Started:      Thu, 27 May 2021 11:54:07 +0300
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     100m
      memory:  64M
    Requests:
      cpu:        100m
      memory:     64M
    Environment:  <none>
    Mounts:
      /var/log/cassandra from server-logs (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
Conditions:
  Type              Status
  Initialized       True 
  Ready             False 
  ContainersReady   False 
  PodScheduled      True 
Volumes:
  server-data:
    Type:       PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
    ClaimName:  server-data-k8ssandra-dc1-default-sts-0
    ReadOnly:   false
  cassandra-config:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:     
    SizeLimit:  <unset>
  server-config:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:     
    SizeLimit:  <unset>
  server-logs:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:     
    SizeLimit:  <unset>
  encryption-cred-storage:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  dc1-keystore
    Optional:    false
  default-token-mtdjk:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-mtdjk
    Optional:    false
QoS Class:       Burstable
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                 node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type     Reason                  Age                    From                     Message
  ----     ------                  ----                   ----                     -------
  Normal   Scheduled               32m                    default-scheduler        Successfully assigned k8ssandra/k8ssandra-dc1-default-sts-0 to node7
  Normal   SuccessfulAttachVolume  32m                    attachdetach-controller  AttachVolume.Attach succeeded for volume "pvc-3e796c50-1dc0-4b10-a02c-94e83def42dd"
  Normal   Pulling                 32m                    kubelet                  Pulling image "k8ssandra/cass-management-api:3.11.10-v0.1.25"
  Normal   Pulled                  30m                    kubelet                  Successfully pulled image "k8ssandra/cass-management-api:3.11.10-v0.1.25" in 1m59.009421126s
  Normal   Started                 30m                    kubelet                  Started container base-config-init
  Normal   Created                 30m                    kubelet                  Created container base-config-init
  Normal   Pulling                 30m                    kubelet                  Pulling image "docker.io/datastax/cass-config-builder:1.0.4"
  Normal   Pulled                  30m                    kubelet                  Successfully pulled image "docker.io/datastax/cass-config-builder:1.0.4" in 6.631303128s
  Normal   Created                 30m                    kubelet                  Created container server-config-init
  Normal   Started                 30m                    kubelet                  Started container server-config-init
  Normal   Started                 30m                    kubelet                  Started container jmx-credentials
  Normal   Created                 30m                    kubelet                  Created container jmx-credentials
  Normal   Pulled                  30m                    kubelet                  Container image "busybox" already present on machine
  Normal   Pulled                  30m                    kubelet                  Container image "k8ssandra/cass-management-api:3.11.10-v0.1.25" already present on machine
  Normal   Created                 30m                    kubelet                  Created container cassandra
  Normal   Started                 30m                    kubelet                  Started container cassandra
  Normal   Pulling                 30m                    kubelet                  Pulling image "k8ssandra/system-logger:9c4c3692"
  Normal   Pulled                  30m                    kubelet                  Successfully pulled image "k8ssandra/system-logger:9c4c3692" in 3.859718237s
  Normal   Created                 30m                    kubelet                  Created container server-system-logger
  Normal   Started                 30m                    kubelet                  Started container server-system-logger
  Warning  Unhealthy               2m31s (x166 over 30m)  kubelet                  Readiness probe failed: HTTP probe failed with statuscode: 500


Reply all
Reply to author
Forward
0 new messages