Samuel Olowoyeye
10/03/2024, 11:18 AMclickhouse:
layout:
shardsCount: 2
replicasCount: 1
zookeeper:
replicaCount: 3
podDistribution:
- type: ClickHouseAntiAffinity
topologyKey: <http://kubernetes.io/hostname|kubernetes.io/hostname>
- type: ReplicaAntiAffinity
topologyKey: <http://kubernetes.io/hostname|kubernetes.io/hostname>
- type: ShardAntiAffinity
topologyKey: <http://kubernetes.io/hostname|kubernetes.io/hostname>
persistence:
size: 1600Gi
clickhouseOperator:
zookeeperLog:
ttl: 1
schemaMigrator:
enableReplication: false
Srikanth Chekuri
10/08/2024, 1:40 PMSamuel Olowoyeye
10/08/2024, 2:09 PMSrikanth Chekuri
10/08/2024, 2:15 PMSamuel Olowoyeye
10/08/2024, 2:18 PMapiVersion: v1
kind: ConfigMap
metadata:
name: my-release-signoz-otel-collector
namespace: platform
data:
otel-collector-config.yaml: |-
exporters:
clickhouselogsexporter:
dsn: tcp://${CLICKHOUSE_USER}:${CLICKHOUSE_PASSWORD}@${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT}/${CLICKHOUSE_LOG_DATABASE}
timeout: 90s
use_new_schema: true
clickhousemetricswrite:
endpoint: tcp://${CLICKHOUSE_USER}:${CLICKHOUSE_PASSWORD}@${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT}/${CLICKHOUSE_DATABASE}
resource_to_telemetry_conversion:
enabled: true
timeout: 90s
clickhousetraces:
datasource: tcp://${CLICKHOUSE_USER}:${CLICKHOUSE_PASSWORD}@${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT}/${CLICKHOUSE_TRACE_DATABASE}
low_cardinal_exception_grouping: ${LOW_CARDINAL_EXCEPTION_GROUPING}
timeout: 90s
prometheus:
endpoint: 0.0.0.0:8889
extensions:
health_check:
endpoint: 0.0.0.0:13133
pprof:
endpoint: localhost:1777
zpages:
endpoint: localhost:55679
processors:
batch:
send_batch_size: 28000
timeout: 90s
k8sattributes:
extract:
metadata:
- k8s.namespace.name
- k8s.pod.name
- k8s.pod.uid
- k8s.pod.start_time
- k8s.deployment.name
- k8s.node.name
filter:
node_from_env_var: K8S_NODE_NAME
passthrough: false
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
resourcedetection:
detectors:
- env
- system
system:
hostname_sources:
- dns
- os
timeout: 2s
signozspanmetrics/cumulative:
dimensions:
- default: default
name: service.namespace
- default: default
name: deployment.environment
- name: signoz.collector.id
dimensions_cache_size: 100000
latency_histogram_buckets:
- 100us
- 1ms
- 2ms
- 6ms
- 10ms
- 50ms
- 100ms
- 250ms
- 500ms
- 1000ms
- 1400ms
- 2000ms
- 5s
- 10s
- 20s
- 40s
- 60s
metrics_exporter: clickhousemetricswrite
signozspanmetrics/delta:
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
dimensions:
- default: default
name: service.namespace
- default: default
name: deployment.environment
- name: signoz.collector.id
dimensions_cache_size: 100000
latency_histogram_buckets:
- 100us
- 1ms
- 2ms
- 6ms
- 10ms
- 50ms
- 100ms
- 250ms
- 500ms
- 1000ms
- 1400ms
- 2000ms
- 5s
- 10s
- 20s
- 40s
- 60s
metrics_exporter: clickhousemetricswrite
receivers:
hostmetrics:
collection_interval: 30s
scrapers:
cpu: {}
disk: {}
filesystem: {}
load: {}
memory: {}
network: {}
httplogreceiver/heroku:
endpoint: 0.0.0.0:8081
source: heroku
httplogreceiver/json:
endpoint: 0.0.0.0:8082
source: json
jaeger:
protocols:
grpc:
endpoint: 0.0.0.0:14250
thrift_http:
endpoint: 0.0.0.0:14268
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
max_recv_msg_size_mib: 16
http:
endpoint: 0.0.0.0:4318
otlp/spanmetrics:
protocols:
grpc:
endpoint: localhost:12345
service:
extensions:
- health_check
- zpages
- pprof
pipelines:
logs:
exporters:
- clickhouselogsexporter
processors:
- batch
receivers:
- otlp
- httplogreceiver/heroku
- httplogreceiver/json
metrics:
exporters:
- clickhousemetricswrite
processors:
- batch
receivers:
- otlp
metrics/internal:
exporters:
- clickhousemetricswrite
processors:
- resourcedetection
- k8sattributes
- batch
receivers:
- hostmetrics
traces:
exporters:
- clickhousetraces
processors:
- signozspanmetrics/cumulative
- signozspanmetrics/delta
- batch
receivers:
- otlp
- jaeger
telemetry:
logs:
encoding: json
metrics:
address: 0.0.0.0:8888
otel-collector-opamp-config.yaml: 'server_endpoint: "<ws://my-release-signoz-query-service:4320/v1/opamp>"'
Samuel Olowoyeye
10/08/2024, 2:23 PM{"level":"error","ts":1728171543.5460207,"caller":"clickhousetracesexporter/writer.go:417","msg":"Could not write a batch of spans to index table: ","kind":"exporter","data_type":"traces","name":"clickhousetraces","error":"read: read tcp 100.123.66.202:55052->10.1.153.198:9000: use of closed network connection","errorVerbose":"read:\n <http://github.com/ClickHouse/ch-go/proto.(*Reader).ReadFull|github.com/ClickHouse/ch-go/proto.(*Reader).ReadFull>\n /home/runner/go/pkg/mod/github.com/!sig!noz/ch-go@v0.61.2-dd/proto/reader.go:62\n - read tcp 100.123.66.202:55052->10.1.153.198:9000: use of closed network connection","stacktrace":"<http://github.com/SigNoz/signoz-otel-collector/exporter/clickhousetracesexporter.(*SpanWriter).WriteBatchOfSpans|github.com/SigNoz/signoz-otel-collector/exporter/clickhousetracesexporter.(*SpanWriter).WriteBatchOfSpans>\n\t/home/runner/work/signoz-otel-collector/signoz-otel-collector/exporter/clickhousetracesexporter/writer.go:417\n
Srikanth Chekuri
10/08/2024, 4:21 PMclickhousetraces:
datasource: tcp://${CLICKHOUSE_USER}:${CLICKHOUSE_PASSWORD}@${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT}/${CLICKHOUSE_TRACE_DATABASE}
low_cardinal_exception_grouping: ${LOW_CARDINAL_EXCEPTION_GROUPING}
timeout: 90sWith this timeout, you should not be getting that error.
I had to increase timout and reduce the send_batch_size to 28,00 because at 50000 i was getting this errorThe problem with 28k is the number of writes will increase which will put pressure on clickhouse to do aggressive merged. Can you confirm if you are ingesting 100k span per second?
Samuel Olowoyeye
10/08/2024, 10:31 PM2024-10-08T22:36:52+01:00 {"level":"info","ts":1728423412.0516074,"caller":"exporterhelper/retry_sender.go:118","msg":"Exporting failed. Will retry the request after interval.","kind":"exporter","data_type":"logs","name":"clickhouselogsexporter","error":"StatementSend:code: 252, message: Too many parts (3003 with average size of 49.41 MiB) in table 'signoz_logs.logs_v2 (498a6be1-142f-414f-ad2f-3c33174764ee)'. Merges are processing significantly slower than inserts","interval":"4.46158194s"}
2024-10-08T22:45:54+01:00 {"level":"error","ts":1728423954.9264715,"caller":"clickhousetracesexporter/writer.go:417","msg":"Could not write a batch of spans to index table: ","kind":"exporter","data_type":"traces","name":"clickhousetraces","error":"read: read tcp 100.123.66.203:48178->10.1.153.198:9000: use of closed network connection","errorVerbose":"read:\n github.com/ClickHouse/ch-go/proto.(*Reader).ReadFull\n /home/runner/go/pkg/mod/github.com/!sig!noz/ch-go@v0.61.2-dd/proto/reader.go:62\n - read tcp 100.123.66.203:48178->10.1.153.198:9000: use of closed network connection","stacktrace":"github.com/SigNoz/signoz-otel-collector/exporter/clickhousetracesexporter.(*SpanWriter).WriteBatchOfSpans\n\t/home/runner/work/signoz-otel-collector/signoz-otel-collector/exporter/clickhousetracesexporter/writer.go:417\ngithub.com/SigNoz/signoz-otel-collector/exporter/clickhousetracesexporter.(*storage).pushTraceData\n\t/home/runner/work/signoz-otel-collector/signoz-otel-collector/exporter/clickhousetracesexporter/clickhouse_exporter.go:436\ngo.opentelemetry.io/collector/exporter/exporterhelper.(*tracesRequest).Export\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/exporterhelper/traces.go:59\ngo.opentelemetry.io/collector/exporter/exporterhelper.(*timeoutSender).send\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/exporterhelper/timeout_sender.go:49\ngo.opentelemetry.io/collector/exporter/exporterhelper.(*retrySender).send\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/exporterhelper/retry_sender.go:89\ngo.opentelemetry.io/collector/exporter/exporterhelper.(*tracesExporterWithObservability).send\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/exporterhelper/traces.go:159\ngo.opentelemetry.io/collector/exporter/exporterhelper.(*baseRequestSender).send\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/exporterhelper/common.go:37\ngo.opentelemetry.io/collector/exporter/exporterhelper.newQueueSender.func1\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/exporterhelper/queue_sender.go:99\ngo.opentelemetry.io/collector/exporter/internal/queue.(*boundedMemoryQueue[...]).Consume\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/internal/queue/bounded_memory_queue.go:52\ngo.opentelemetry.io/collector/exporter/internal/queue.(*Consumers[...]).Start.func1\n\t/home/runner/go/pkg/mod/go.opentelemetry.io/collector/exporter@v0.102.0/internal/queue/consumers.go:43"}
Srikanth Chekuri
10/09/2024, 4:26 AMotelcol_exporter_sent_spans
Samuel Olowoyeye
10/09/2024, 8:28 AMSrikanth Chekuri
10/09/2024, 9:14 AMSamuel Olowoyeye
10/09/2024, 9:38 AMSamuel Olowoyeye
10/09/2024, 10:03 AMSamuel Olowoyeye
10/09/2024, 10:31 AMSrikanth Chekuri
10/09/2024, 6:29 PMSamuel Olowoyeye
10/09/2024, 9:17 PMSrikanth Chekuri
10/09/2024, 9:32 PMSamuel Olowoyeye
10/09/2024, 10:01 PMSamuel Olowoyeye
10/09/2024, 10:43 PMSamuel Olowoyeye
10/09/2024, 10:44 PMSamuel Olowoyeye
10/10/2024, 12:40 AMSamuel Olowoyeye
10/11/2024, 12:04 AMSigNoz is an open-source APM. It helps developers monitor their applications & troubleshoot problems, an open-source alternative to DataDog, NewRelic, etc.
Powered by