Policy Definition

/other/get-debug-information/get-debug-information.yaml

 1apiVersion: kyverno.io/v1
 2kind: ClusterPolicy
 3metadata:
 4  name: get-debug-data-policy
 5  annotations:
 6    policies.kyverno.io/title: Collect Debug Information for Pods in CrashLoopBackOff
 7    policies.kyverno.io/category: Other
 8    policies.kyverno.io/severity: medium
 9    policies.kyverno.io/subject: Pod
10    kyverno.io/kyverno-version: 1.11.5
11    kyverno.io/kubernetes-version: "1.27"
12    policies.kyverno.io/description: >-
13      This policy generates a job which gathers troubleshooting data (including logs, kubectl describe output and events from the namespace) from pods that are in CrashLoopBackOff and have 3 restarts. This data can further be used to automatically create a Jira issue using some kind of automation or another Kyverno policy. For more information on the image used in this policy in addition to the necessary RBAC resources required in order for this policy to operate, see the documentation at https://github.com/nirmata/SRE-Operational-Usecases/tree/main/get-troubleshooting-data/get-debug-data.       
14spec:
15  rules:
16  - name: get-debug-data-policy-rule
17    match:
18      any:
19      - resources:
20          kinds:
21          - v1/Pod.status
22    context:
23    - name: pdcount
24      apiCall:
25        urlPath: "/api/v1/namespaces/{{request.namespace}}/pods?labelSelector=requestpdname=pod-{{request.object.metadata.name}}"
26        jmesPath: "items | length(@)"
27    preconditions:
28      all:
29      - key: "{{ sum(request.object.status.containerStatuses[*].restartCount || `0`) }}"
30        operator: Equals
31        value: 3
32      - key: "{{ request.object.metadata.labels.deleteme || 'empty' }}"
33        operator: Equals
34        value: "empty"
35      - key: "{{ pdcount }}"
36        operator: Equals
37        value: 0
38    generate:
39      apiVersion: batch/v1
40      kind: Job
41      name: get-debug-data-{{request.object.metadata.name}}-{{ random('[0-9a-z]{8}') }}
42      namespace: "{{request.namespace}}"
43      synchronize: false
44      data:
45        metadata:
46          labels:
47            deleteme: allow
48        spec:
49          template:
50            metadata:
51              labels:
52                app: my-app
53                deleteme: allow
54                requestpdname: "pod-{{request.object.metadata.name}}"
55            spec:
56              restartPolicy: OnFailure
57              containers:
58              - name: my-container
59                image: sagarkundral/my-python-app:v52
60                ports:
61                - containerPort: 8080
62                volumeMounts:
63                - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
64                  name: token
65                  readOnly: true
66                args:
67                - "/app/get-debug-jira-v2.sh"
68                - "{{request.namespace}}"
69                - "{{request.object.metadata.name}}"
70              serviceAccount: default # This serviceaccount needs the necessary RBAC in order for the policy to operate. 
71              volumes:
72              - name: token
73                projected:
74                  defaultMode: 420
75                  sources:
76                  - serviceAccountToken:
77                      expirationSeconds: 3607
78                      path: token
79                  - configMap:
80                      items:
81                      - key: ca.crt
82                        path: ca.crt
83                      name: kube-root-ca.crt