About the k8s-infra, it seems cron jobs are not really getting tracked properly. I have hundreds of ...
t
About the k8s-infra, it seems cron jobs are not really getting tracked properly. I have hundreds of cron jobs running at times but
k8s_cronjob_active_jobs
doesn't seem to change accordingly, it also has strange values like "0.0216" and goes down later for no reason. Other K8s metrics seem to be fine. Is there something I have to change in the k8s-infra config for cron jobs to be tracked correctly?
n
Hey @Tomás Arribas, could you please share your k8s-infra chart values here
t
thanks ,this is the k8s-infra
Copy code
global:
  cloud: azure
  clusterName: ${cluster_name}  

# Configure to send data to local SigNoz instance
otelCollectorEndpoint: "${helm_release_name}-otel-collector.${namespace}.svc:4317"
otelInsecure: true

presets:
  # Enable OTLP exporter
  otlpExporter:
    enabled: true
  # Enable logs collection
  logsCollection:
    enabled: true
  # Enable metrics collection
  hostMetrics:
    enabled: true
  kubeletMetrics:
    enabled: true
  clusterMetrics:
    enabled: true
  resourceDetection:
    detectors:
      - eks
      - system

# Configure metrics collection
otelAgent:
  resources:
    limits:
      cpu: 500m
      memory: 512Mi
    requests:
      cpu: 100m
      memory: 256Mi
  
  config:
    receivers:
      kubeletstats:
        collection_interval: 20s
        auth_type: "serviceAccount"
        endpoint: "$${env:K8S_NODE_NAME}:10250"
        insecure_skip_verify: true
        metric_groups:
          - container
          - pod
          - node
          - volume
      
      hostmetrics:
        collection_interval: 30s
        scrapers:
          cpu:
            metrics:
              system.cpu.time:
                enabled: true
          memory:
            metrics:
              system.memory.usage:
                enabled: true
          disk:
            metrics:
              <http://system.disk.io|system.disk.io>:
                enabled: true
          filesystem:
            metrics:
              system.filesystem.usage:
                enabled: true
          network:
            metrics:
              <http://system.network.io|system.network.io>:
                enabled: true
              system.network.errors:
                enabled: true
and this the main signoz one just in case
Copy code
# Default values for SigNoz
queryService:
  name: query-service
  replicaCount: 1
  service:
    type: ClusterIP
    port: 8080

frontend:
  name: frontend
  replicaCount: 1
  service:
    type: ClusterIP
    port: 3301

alertmanager:
  name: alertmanager
  replicaCount: 1
  service:
    type: ClusterIP
    port: 9093
  persistence:
    enabled: true
    size: 100Mi

otelCollector:
  name: otel-collector
  replicaCount: 1
  service:
    type: ClusterIP
  config:    
    receivers:    
      otlp:
        protocols:
          grpc:
            endpoint: 0.0.0.0:4317
            max_recv_msg_size_mib: 16
          http:
            endpoint: 0.0.0.0:4318
      mongodb:
        collection_interval: 30s
        hosts:
          - endpoint: "mongodb-headless.mongodb.svc.cluster.local:27017"   
        replica_set: "rs0" 
        tls:
          insecure: true
          insecure_skip_verify: true        
        metrics:
          mongodb.lock.acquire.count:
            enabled: true
          mongodb.lock.acquire.time:
            enabled: true
          mongodb.lock.acquire.wait_count:
            enabled: true
          mongodb.lock.deadlock.count:
            enabled: true
          mongodb.operation.latency.time:
            enabled: true    
    service:
      telemetry:
        logs:
          encoding: json
        metrics:
          address: 0.0.0.0:8888
      extensions: [health_check, zpages, pprof]
      pipelines:
        metrics/mongodb:
          receivers: 
            - mongodb
          exporters:
            - clickhousemetricswrite
            - metadataexporter
          processors:
            - batch
        traces:
            receivers: [otlp]
            processors: [signozspanmetrics/delta, batch]
            exporters: [clickhousetraces, metadataexporter]
        metrics:
            receivers: [otlp]
            processors: [batch]
            exporters: [clickhousemetricswrite, metadataexporter, signozclickhousemetrics]
        logs:
            receivers: [otlp, httplogreceiver/heroku, httplogreceiver/json]
            processors: [batch]
            exporters: [clickhouselogsexporter, metadataexporter]  
      

clickhouse:
  persistence:    
    size: 100Gi

#otelCollectorMetrics:
#  name: otel-collector-metrics
#  replicaCount: 1
#  service:
#    type: ClusterIP