Overview

Diagnosis and resolution of Kubernetes security-related failures — RBAC denials, webhook rejections, PSA/PSS violations, secret access errors, and security audit findings.

Forbidden 403 — RBAC Denial

# "Error from server (Forbidden): pods is forbidden:
#  User "alice" cannot list resource "pods" in API group ""
#  in the namespace "production""

# Step 1: Check what permissions the user has
kubectl auth can-i --list --namespace production --as alice
kubectl auth can-i get pods --namespace production --as alice
kubectl auth can-i get pods --namespace production --as alice --as-group dev-team

# Step 2: Find all RoleBindings for the user
kubectl get rolebindings,clusterrolebindings -A -o json | \
  jq -r '.items[] | select(
    .subjects[]? |
    (.name=="alice") or (.name=="dev-team")
  ) | .kind + "/" + .metadata.name + " in " + (.metadata.namespace // "cluster-scope")'

# Step 3: Check what the Role/ClusterRole grants
kubectl describe role <role-name> -n <ns>
kubectl describe clusterrole <clusterrole-name>

# Step 4: Fix — create RoleBinding
kubectl create rolebinding alice-pod-reader \
  --role=pod-reader \
  --user=alice \
  -n production

# ServiceAccount getting 403
kubectl auth can-i get secrets -n production \
  --as system:serviceaccount:production:payments-api
# Fix: create Role + RoleBinding for the SA

Webhook Admission Rejection

# "Error from server: admission webhook "validate.kyverno.svc" denied the request:
#  resource requests not set on container payments-api"

# Step 1: Identify which policy is blocking
kubectl describe pod <pod> -n <ns>
# OR from the kubectl error message — webhook name is in the error

# Step 2: Find the Kyverno policy
kubectl get clusterpolicy
kubectl get policy -n <ns>
kubectl describe clusterpolicy require-resource-requests

# Step 3: Check what the policy requires
kubectl get clusterpolicy require-resource-requests -o yaml

# Step 4: Fix the workload to comply
# Add to container spec:
resources:
  requests:
    cpu: 100m
    memory: 128Mi
  limits:
    memory: 256Mi

# Step 5: Emergency bypass (staging only) — annotate resource
# kyverno.io/generate-existing: "false"   # depends on policy
# Or: apply an Exception (Kyverno PolicyException)
kubectl apply -f - <<EOF
apiVersion: kyverno.io/v2alpha1
kind: PolicyException
metadata:
  name: payments-api-exception
  namespace: production
spec:
  exceptions:
  - policyName: require-resource-requests
    ruleNames: [check-resources]
  match:
    any:
    - resources:
        namespaces: [production]
        names: [payments-api]
EOF

PodSecurity Admission (PSA) Violations

# Namespace has pod-security.kubernetes.io/enforce=restricted label
# Pod gets rejected for running as root, using hostNetwork, etc.

kubectl describe namespace production | grep pod-security

# See what PSS level is required
kubectl get ns production -o jsonpath='{.metadata.labels}' | jq

# Diagnose: what does my pod violate?
kubectl label --dry-run=server namespace production \
  pod-security.kubernetes.io/enforce=restricted
# OR: apply pod in dry-run mode and see error

# Common violations and fixes:
# "runAsNonRoot is required"
securityContext:
  runAsNonRoot: true
  runAsUser: 1000

# "allowPrivilegeEscalation must be false"
securityContext:
  allowPrivilegeEscalation: false

# "capabilities must be drop: ALL"
securityContext:
  capabilities:
    drop: [ALL]

# "hostNetwork/hostPID/hostIPC not allowed"
# Remove these fields from pod spec (or use baseline PSS level)

# "seccompProfile must be RuntimeDefault or Localhost"
securityContext:
  seccompProfile:
    type: RuntimeDefault

# "volumes hostPath not allowed at restricted level"
# Replace hostPath with emptyDir or PVC

# Check PSA audit logs
kubectl get events -n production --field-selector reason=FailedCreate | grep "violates"

Secret Access Errors

# "secret "payments-db-creds" not found"
kubectl get secret payments-db-creds -n production

# Secret exists but wrong namespace
kubectl get secrets -A | grep payments-db-creds

# Secret exists but SA doesn't have access
kubectl auth can-i get secret/payments-db-creds -n production \
  --as system:serviceaccount:production:payments-api

# Secret mounted as volume but env var not set
kubectl describe pod <pod> -n production | grep -A10 "Environment Variables"
# Check: secretKeyRef is correct key name
kubectl get secret payments-db-creds -n production \
  -o jsonpath='{.data}' | jq 'keys'

# External Secrets Operator — secret not syncing
kubectl get externalsecret -n production
kubectl describe externalsecret payments-db-creds -n production
# Look for: SyncError condition
# Common: IRSA role doesn't have access to Secrets Manager ARN

# Vault agent injector — secret not being injected
kubectl logs <pod> -n production -c vault-agent-init
# Common: Vault policy doesn't allow read on the path
# Check Vault policy:
# vault policy read payments-api-policy

# Rotation — pod has stale secret (old value)
# If mounted as env var: pod restart required
# If mounted as volume: auto-refreshed by kubelet (within 60s of secret update)
kubectl rollout restart deployment payments-api -n production

IRSA / Workload Identity Issues

# AWS SDK error: "NoCredentialProviders: no valid providers in chain"
# OR: "AccessDenied: User: arn:aws:sts::123:assumed-role/... is not authorized"

# Step 1: Check SA has the correct annotation
kubectl get sa payments-api -n production \
  -o jsonpath='{.metadata.annotations}'
# Should have: eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/payments-api-role

# Step 2: Check projected token is mounted
kubectl get pod <pod> -n production \
  -o jsonpath='{.spec.volumes}' | jq '.[].projected.sources[].serviceAccountToken'

# Step 3: Check IAM role trust policy
aws iam get-role --role-name payments-api-role \
  --query 'Role.AssumeRolePolicyDocument'
# Trust policy must have the OIDC provider condition:
# "oidc.eks.us-east-1.amazonaws.com/id/XXXX:sub": "system:serviceaccount:production:payments-api"

# Step 4: Verify OIDC provider is configured
aws iam list-open-id-connect-providers
aws eks describe-cluster --name <cluster> \
  --query 'cluster.identity.oidc.issuer'

# Step 5: Check token audience
kubectl exec <pod> -n production -- cat /var/run/secrets/eks.amazonaws.com/serviceaccount/token | \
  cut -d. -f2 | base64 -d | jq '{aud:.aud, sub:.sub}'
# aud must contain "sts.amazonaws.com"
# sub must match trust policy

Audit Log Investigation

# Find who made changes to RBAC
grep '"resource":"rolebindings"\|"resource":"clusterrolebindings"' \
  /var/log/kubernetes/audit.log | \
  jq 'select(.verb != "get" and .verb != "list" and .verb != "watch") |
    {time:.requestReceivedTimestamp, user:.user.username, verb:.verb,
     resource:.objectRef.resource, name:.objectRef.name}'

# Find who read secrets
grep '"resource":"secrets"' /var/log/kubernetes/audit.log | \
  jq 'select(.verb=="get") |
    {time:.requestReceivedTimestamp, user:.user.username, secret:.objectRef.name}'

# Find all exec into pods (security-sensitive)
grep 'pods/exec' /var/log/kubernetes/audit.log | \
  jq '{time:.requestReceivedTimestamp, user:.user.username,
       pod:.objectRef.name, ns:.objectRef.namespace}'

# Audit log with Falco (runtime security)
kubectl logs -n falco -l app=falco --tail=50
# Falco alerts on: container escapes, sensitive file access, unexpected network connections

Network Policy Security Check

# Verify default-deny is in place
kubectl get networkpolicy -n production -o json | \
  jq '.items[] | select(.spec.podSelector == {}) | .metadata.name'
# Should see: "default-deny-ingress" or similar

# Find pods with no NetworkPolicy (open to all ingress)
# Get all pods, subtract pods covered by any NP podSelector
kubectl get networkpolicy -n production -o json | \
  jq '[.items[].spec.podSelector.matchLabels // {}]'

# Test: can an unauthorized pod reach payments-api?
kubectl run attacker --image=nicolaka/netshoot \
  --labels="app=attacker" -n production --rm -it -- \
  curl http://payments-api.production.svc:8080/
# Should fail with connection reset / timeout

# Cilium: verify policy is actually enforced
cilium policy trace \
  --src-k8s-pod production/attacker \
  --dst-k8s-pod production/payments-api-xxx \
  --dport 8080
# Expected: "Final verdict: DENIED"

Common Security Misconfigurations

# 1. Check for pods running as root
kubectl get pods -n production -o json | \
  jq -r '.items[] | select(
    (.spec.securityContext.runAsNonRoot == null or .spec.securityContext.runAsNonRoot == false) and
    (.spec.containers[].securityContext.runAsNonRoot == null or .spec.containers[].securityContext.runAsNonRoot == false)
  ) | .metadata.name'

# 2. Check for privileged containers
kubectl get pods -A -o json | \
  jq -r '.items[] | select(
    .spec.containers[].securityContext.privileged == true
  ) | .metadata.namespace + "/" + .metadata.name'

# 3. Check for hostNetwork pods
kubectl get pods -A -o json | \
  jq -r '.items[] | select(.spec.hostNetwork == true) |
    .metadata.namespace + "/" + .metadata.name'

# 4. Check for overly permissive RBAC (wildcards)
kubectl get clusterrole,role -A -o json | \
  jq -r '.items[] | select(
    .rules[]? |
    ((.verbs | contains(["*"])) and (.resources | contains(["*"])))
  ) | .metadata.name'

# 5. Service accounts with cluster-admin
kubectl get clusterrolebindings -o json | \
  jq -r '.items[] | select(.roleRef.name == "cluster-admin") |
    .metadata.name + ": " + (.subjects[]?.name)'