Slackbot
11/28/2023, 2:26 AMLeigh Finch
11/28/2023, 3:51 AMVishal Sharma
11/28/2023, 5:17 AMnitya-signoz
11/28/2023, 9:05 AMDiogo Baeder
11/28/2023, 9:55 AM{
"body": "2023-11-28 09:50:37,933 INFO [studioregistration.services.registration] [registration.py:51] [trace_id=8f6d9e783f88a869c54b07af617c8bde span_id=691154848cc4707f resource.service.name=registration trace_sampled=True] - Registered user: {\"uuid\": \"\", \"email\": \"r234r243\", \"name\": \"sdfsdr234\"}",
"id": "2Ynbjv17UEwsow3XRVzPy9V21W2",
"timestamp": "2023-11-28T09:50:37.934104792Z",
"attributes": {
"log_file_path": "/var/log/pods/default_registration-855dfb97f6-h9vxs_0544a01e-e782-410b-8530-eb43df531ba9/registration/0.log",
"log_iostream": "stderr",
"logtag": "F",
"time": "2023-11-28T09:50:37.934104792Z"
},
"resources": {
"k8s_cluster_name": "",
"k8s_container_name": "registration",
"k8s_container_restart_count": "0",
"k8s_deployment_name": "registration",
"k8s_namespace_name": "default",
"k8s_node_name": "deepopinion-worker",
"k8s_pod_name": "registration-855dfb97f6-h9vxs",
"k8s_pod_start_time": "2023-11-28 09:50:06 +0000 UTC",
"k8s_pod_uid": "0544a01e-e782-410b-8530-eb43df531ba9"
},
"severity_text": "",
"severity_number": 0,
"span_id": "",
"trace_flags": 0,
"trace_id": ""
}
@nitya-signoz thanks, in that case I'd prefer to change the collector so that I can store logs with the correct attributes from the get-go.nitya-signoz
11/28/2023, 10:05 AMDiogo Baeder
11/28/2023, 10:17 AMDiogo Baeder
11/28/2023, 11:21 AMspan_id
, but not yet for `trace_id`; I tried changing the regex to, instead of matching at start of string, match with a word boundary (\b
) on the left, but it still doesn't work (even though in the regex website used as a reference it matches just fine)Diogo Baeder
11/28/2023, 11:22 AMnitya-signoz
11/28/2023, 11:24 AMDiogo Baeder
11/28/2023, 11:25 AMDiogo Baeder
11/30/2023, 2:30 AM(?P<severity_text>(NOTSET|DEBUG|INFO|WARN|WARNING|ERROR|FATAL|CRITICAL))\/(?P<severity_number>\d+) .+trace_id=(?P<trace_id>[a-zA-Z0-9]+) span_id=(?P<span_id>[a-zA-Z0-9]+) resource\.service\.name=(?P<service_name>[-\w]+)
(starting with a blank space)
And this is an example log that should have all the matches in the log line processed by the collector, but only has trace_id
and `span_id`:
2023-11-30 01:14:24,681 INFO/20 [studioregistration.services.registration] [registration.py:51] [trace_id=381d38dc1d24127da0482b6b493138dc span_id=c120a55676c4fd6a resource.service.name=studio-registration trace_sampled=True] - Registered user: {\"uuid\": \"\", \"email\": \"dasa\", \"name\": \"123123\"}
Maybe the pipeline UI and the collector processor use different regular expression styles?Diogo Baeder
11/30/2023, 5:22 AMlogstransform/internal:
operators:
- type: regex_parser
id: logs_to_tags
# <https://regex101.com/r/TUZhgm/5>
regex: ' (?P<severity>NOTSET|DEBUG|INFO|WARN|WARNING|ERROR|FATAL|CRITICAL).*trace_id=(?P<trace_id>[-\w]+) span_id=(?P<span_id>[-\w]+) resource\.service\.name=(?P<service_name>[-\w]+)'
parse_from: body
parse_to: attributes.temp_trace
if: 'body matches "trace_id=\\w+.+span_id=\\w+"'
output: trace_parser
- type: trace_parser
id: trace_parser
trace_id:
parse_from: attributes.temp_trace.trace_id
span_id:
parse_from: attributes.temp_trace.span_id
output: severity_parser
- type: severity_parser
id: severity_parser
parse_from: attributes.temp_trace.severity
if: '"severity" in attributes.temp_trace'
mapping:
default: NOTSET
debug: DEBUG
info: INFO
warn:
- WARN
- WARNING
error: ERROR
fatal:
- FATAL
- CRITICAL
output: move_service_name
- type: move
id: move_service_name
from: attributes.temp_trace.service_name
to: resource.service_name
if: '"service_name" in attributes.temp_trace'
output: remove_temp
- type: remove
id: remove_temp
field: attributes.temp_trace
if: '"temp_trace" in attributes'