Add anonymization and the protos to the telemetry lib
Add the protos for the traces used to publish metrics to Clearcut and the anonymization code/test that ensure we don't collect de-anonymized paths. Bug: 326277821 Change-Id: Ifae4d51f59db2219995a0a8d21785729f5eeb137 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5850298 Reviewed-by: Terrence Reilly <treilly@google.com> Commit-Queue: Struan Shrimpton <sshrimp@google.com>changes/98/5850298/2
parent
62475a5ed8
commit
55d065cc0c
@ -0,0 +1,55 @@
|
||||
# Copyright 2024 The Chromium Authors
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
"""Util for anonymizing telemetry spans."""
|
||||
|
||||
import getpass
|
||||
import re
|
||||
|
||||
from typing import Optional, Pattern, Sequence, Tuple
|
||||
from google.protobuf import json_format
|
||||
|
||||
from .proto import trace_span_pb2
|
||||
|
||||
|
||||
class Anonymizer:
|
||||
"""Redact the personally identifiable information."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
replacements: Optional[Sequence[Tuple[Pattern[str],
|
||||
str]]] = None) -> None:
|
||||
self._replacements = list(replacements or [])
|
||||
if getpass.getuser() != "root":
|
||||
# Substituting the root user doesn't actually anonymize anything.
|
||||
self._replacements.append(
|
||||
(re.compile(re.escape(getpass.getuser())), "<user>"))
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.apply(*args, **kwargs)
|
||||
|
||||
def apply(self, data: str) -> str:
|
||||
"""Applies the replacement rules to data text."""
|
||||
if not data:
|
||||
return data
|
||||
|
||||
for repl_from, repl_to in self._replacements:
|
||||
data = re.sub(repl_from, repl_to, data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class AnonymizingFilter:
|
||||
"""Applies the anonymizer to TraceSpan messages."""
|
||||
|
||||
def __init__(self, anonymizer: Anonymizer) -> None:
|
||||
self._anonymizer = anonymizer
|
||||
|
||||
def __call__(self,
|
||||
msg: trace_span_pb2.TraceSpan) -> trace_span_pb2.TraceSpan:
|
||||
"""Applies the anonymizer to TraceSpan message."""
|
||||
raw = json_format.MessageToJson(msg)
|
||||
json_msg = self._anonymizer.apply(raw)
|
||||
output = trace_span_pb2.TraceSpan()
|
||||
json_format.Parse(json_msg, output)
|
||||
return output
|
@ -0,0 +1,53 @@
|
||||
# Copyright 2024 The Chromium Authors
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
"""Test the config and anonymizer utils."""
|
||||
|
||||
import getpass
|
||||
import re
|
||||
import pytest
|
||||
|
||||
from . import anonymization
|
||||
|
||||
|
||||
def test_default_anonymizer_to_remove_username_from_path(monkeypatch) -> None:
|
||||
"""Test that default Anonymizer redacts username."""
|
||||
monkeypatch.setattr(getpass, "getuser", lambda: "user")
|
||||
|
||||
a = anonymization.Anonymizer()
|
||||
output = a.apply("/home/user/docs")
|
||||
|
||||
assert output == "/home/<user>/docs"
|
||||
|
||||
|
||||
def test_anonymizer_to_apply_passed_replacements() -> None:
|
||||
"""Test anonymizer to apply the requested replacements."""
|
||||
text = "/home/%s/docs" % getpass.getuser()
|
||||
|
||||
replacements = [(re.escape(getpass.getuser()), "<user>")]
|
||||
a = anonymization.Anonymizer(replacements=replacements)
|
||||
output = a.apply(text)
|
||||
|
||||
assert output == "/home/<user>/docs"
|
||||
|
||||
|
||||
def test_anonymizer_to_apply_multiple_replacements() -> None:
|
||||
"""Test anonymizer to apply the passed replacements in order."""
|
||||
replacements = [(re.escape("abc"), "x"), (re.escape("xyz"), "t")]
|
||||
text = "hello abcd. how is xyz. abcyz"
|
||||
|
||||
a = anonymization.Anonymizer(replacements=replacements)
|
||||
output = a.apply(text)
|
||||
|
||||
assert output == "hello xd. how is t. t"
|
||||
|
||||
|
||||
def test_default_anonymizer_skip_root(monkeypatch) -> None:
|
||||
"""Test the anonymizer skips the root user."""
|
||||
monkeypatch.setattr(getpass, "getuser", lambda: "root")
|
||||
|
||||
text = "/root/home service.sysroot.SetupBoard"
|
||||
a = anonymization.Anonymizer()
|
||||
output = a.apply(text)
|
||||
|
||||
assert output == text
|
@ -0,0 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# source: chromite/telemetry/clientanalytics.proto
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
from google.protobuf.internal import builder as _builder
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
||||
b'\n(chromite/telemetry/clientanalytics.proto\x12\x12\x63hromite.telemetry\";\n\x08LogEvent\x12\x15\n\revent_time_ms\x18\x01 \x01(\x03\x12\x18\n\x10source_extension\x18\x06 \x01(\x0c\"!\n\nClientInfo\x12\x13\n\x0b\x63lient_type\x18\x01 \x01(\x05\"\x9f\x01\n\nLogRequest\x12\x33\n\x0b\x63lient_info\x18\x01 \x01(\x0b\x32\x1e.chromite.telemetry.ClientInfo\x12\x12\n\nlog_source\x18\x02 \x01(\x05\x12\x17\n\x0frequest_time_ms\x18\x04 \x01(\x03\x12/\n\tlog_event\x18\x03 \x03(\x0b\x32\x1c.chromite.telemetry.LogEvent\"/\n\x0bLogResponse\x12 \n\x18next_request_wait_millis\x18\x01 \x01(\x03\x42>Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
|
||||
)
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
_builder.BuildTopDescriptorsAndMessages(
|
||||
DESCRIPTOR, 'chromite.telemetry.clientanalytics_pb2', _globals)
|
||||
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||
|
||||
DESCRIPTOR._options = None
|
||||
DESCRIPTOR._serialized_options = b'Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
|
||||
_globals['_LOGEVENT']._serialized_start = 64
|
||||
_globals['_LOGEVENT']._serialized_end = 123
|
||||
_globals['_CLIENTINFO']._serialized_start = 125
|
||||
_globals['_CLIENTINFO']._serialized_end = 158
|
||||
_globals['_LOGREQUEST']._serialized_start = 161
|
||||
_globals['_LOGREQUEST']._serialized_end = 320
|
||||
_globals['_LOGRESPONSE']._serialized_start = 322
|
||||
_globals['_LOGRESPONSE']._serialized_end = 369
|
||||
# @@protoc_insertion_point(module_scope)
|
@ -0,0 +1,60 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# source: chromite/telemetry/trace_span.proto
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
from google.protobuf.internal import builder as _builder
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
||||
b'\n#chromite/telemetry/trace_span.proto\x12\x12\x63hromite.telemetry\x1a\x1cgoogle/protobuf/struct.proto\"\xa3\x11\n\tTraceSpan\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x36\n\x07\x63ontext\x18\x02 \x01(\x0b\x32%.chromite.telemetry.TraceSpan.Context\x12\x16\n\x0eparent_span_id\x18\x03 \x01(\t\x12\x39\n\tspan_kind\x18\x04 \x01(\x0e\x32&.chromite.telemetry.TraceSpan.SpanKind\x12\x19\n\x11start_time_millis\x18\x05 \x01(\x03\x12\x17\n\x0f\x65nd_time_millis\x18\x06 \x01(\x03\x12+\n\nattributes\x18\x07 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x33\n\x06\x65vents\x18\x08 \x03(\x0b\x32#.chromite.telemetry.TraceSpan.Event\x12\x31\n\x05links\x18\t \x03(\x0b\x32\".chromite.telemetry.TraceSpan.Link\x12\x34\n\x06status\x18\n \x01(\x0b\x32$.chromite.telemetry.TraceSpan.Status\x12\x38\n\x08resource\x18\x0b \x01(\x0b\x32&.chromite.telemetry.TraceSpan.Resource\x12Q\n\x15instrumentation_scope\x18\x0c \x01(\x0b\x32\x32.chromite.telemetry.TraceSpan.InstrumentationScope\x12\x41\n\rtelemetry_sdk\x18\r \x01(\x0b\x32*.chromite.telemetry.TraceSpan.TelemetrySdk\x1a?\n\x0cTelemetrySdk\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x1a\x66\n\x06System\x12\x0f\n\x07os_name\x18\x01 \x01(\t\x12\x12\n\nos_version\x18\x02 \x01(\t\x12\x0f\n\x07os_type\x18\x03 \x01(\t\x12\x0b\n\x03\x63pu\x18\x04 \x01(\t\x12\x19\n\x11host_architecture\x18\x05 \x01(\t\x1a\xd0\x02\n\x07Process\x12\x0b\n\x03pid\x18\x01 \x01(\t\x12\x17\n\x0f\x65xecutable_name\x18\x02 \x01(\t\x12\x17\n\x0f\x65xecutable_path\x18\x03 \x01(\t\x12\x0f\n\x07\x63ommand\x18\x04 \x01(\t\x12\x14\n\x0c\x63ommand_args\x18\x05 \x03(\t\x12\x15\n\rowner_is_root\x18\x06 \x01(\x08\x12\x14\n\x0cruntime_name\x18\x07 \x01(\t\x12\x17\n\x0fruntime_version\x18\x08 \x01(\t\x12\x1b\n\x13runtime_description\x18\t \x01(\t\x12\x13\n\x0b\x61pi_version\x18\n \x01(\t\x12;\n\x03\x65nv\x18\x0b \x03(\x0b\x32..chromite.telemetry.TraceSpan.Process.EnvEntry\x1a*\n\x08\x45nvEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xa5\x01\n\x08Resource\x12\x36\n\x07process\x18\x01 \x01(\x0b\x32%.chromite.telemetry.TraceSpan.Process\x12\x34\n\x06system\x18\x02 \x01(\x0b\x32$.chromite.telemetry.TraceSpan.System\x12+\n\nattributes\x18\x03 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x35\n\x14InstrumentationScope\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x1a]\n\x05\x45vent\x12\x19\n\x11\x65vent_time_millis\x18\x01 \x01(\x03\x12\x0c\n\x04name\x18\x02 \x01(\t\x12+\n\nattributes\x18\x03 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x62\n\nStackFrame\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x13\n\x0bline_number\x18\x03 \x01(\x03\x12\x15\n\rcolumn_number\x18\x04 \x01(\x03\x1a\x83\x01\n\nStackTrace\x12>\n\x0cstack_frames\x18\x01 \x03(\x0b\x32(.chromite.telemetry.TraceSpan.StackFrame\x12\x1c\n\x14\x64ropped_frames_count\x18\x02 \x01(\x03\x12\x17\n\x0fstacktrace_hash\x18\x03 \x01(\t\x1a\xee\x01\n\x06Status\x12\x44\n\x0bstatus_code\x18\x01 \x01(\x0e\x32/.chromite.telemetry.TraceSpan.Status.StatusCode\x12\x0f\n\x07message\x18\x02 \x01(\t\x12=\n\x0bstack_trace\x18\x03 \x01(\x0b\x32(.chromite.telemetry.TraceSpan.StackTrace\"N\n\nStatusCode\x12\x15\n\x11STATUS_CODE_UNSET\x10\x00\x12\x12\n\x0eSTATUS_CODE_OK\x10\x01\x12\x15\n\x11STATUS_CODE_ERROR\x10\x02\x1a\x41\n\x07\x43ontext\x12\x10\n\x08trace_id\x18\x01 \x01(\t\x12\x0f\n\x07span_id\x18\x02 \x01(\t\x12\x13\n\x0btrace_state\x18\x03 \x01(\t\x1ak\n\x04Link\x12\x36\n\x07\x63ontext\x18\x01 \x01(\x0b\x32%.chromite.telemetry.TraceSpan.Context\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"i\n\x08SpanKind\x12\x19\n\x15SPAN_KIND_UNSPECIFIED\x10\x00\x12\x16\n\x12SPAN_KIND_INTERNAL\x10\x01\x12\x14\n\x10SPAN_KIND_SERVER\x10\x02\x12\x14\n\x10SPAN_KIND_CLIENT\x10\x03\x42>Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
|
||||
)
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR,
|
||||
'chromite.telemetry.trace_span_pb2',
|
||||
_globals)
|
||||
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||
|
||||
DESCRIPTOR._options = None
|
||||
DESCRIPTOR._serialized_options = b'Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
|
||||
_TRACESPAN_PROCESS_ENVENTRY._options = None
|
||||
_TRACESPAN_PROCESS_ENVENTRY._serialized_options = b'8\001'
|
||||
_globals['_TRACESPAN']._serialized_start = 90
|
||||
_globals['_TRACESPAN']._serialized_end = 2301
|
||||
_globals['_TRACESPAN_TELEMETRYSDK']._serialized_start = 719
|
||||
_globals['_TRACESPAN_TELEMETRYSDK']._serialized_end = 782
|
||||
_globals['_TRACESPAN_SYSTEM']._serialized_start = 784
|
||||
_globals['_TRACESPAN_SYSTEM']._serialized_end = 886
|
||||
_globals['_TRACESPAN_PROCESS']._serialized_start = 889
|
||||
_globals['_TRACESPAN_PROCESS']._serialized_end = 1225
|
||||
_globals['_TRACESPAN_PROCESS_ENVENTRY']._serialized_start = 1183
|
||||
_globals['_TRACESPAN_PROCESS_ENVENTRY']._serialized_end = 1225
|
||||
_globals['_TRACESPAN_RESOURCE']._serialized_start = 1228
|
||||
_globals['_TRACESPAN_RESOURCE']._serialized_end = 1393
|
||||
_globals['_TRACESPAN_INSTRUMENTATIONSCOPE']._serialized_start = 1395
|
||||
_globals['_TRACESPAN_INSTRUMENTATIONSCOPE']._serialized_end = 1448
|
||||
_globals['_TRACESPAN_EVENT']._serialized_start = 1450
|
||||
_globals['_TRACESPAN_EVENT']._serialized_end = 1543
|
||||
_globals['_TRACESPAN_STACKFRAME']._serialized_start = 1545
|
||||
_globals['_TRACESPAN_STACKFRAME']._serialized_end = 1643
|
||||
_globals['_TRACESPAN_STACKTRACE']._serialized_start = 1646
|
||||
_globals['_TRACESPAN_STACKTRACE']._serialized_end = 1777
|
||||
_globals['_TRACESPAN_STATUS']._serialized_start = 1780
|
||||
_globals['_TRACESPAN_STATUS']._serialized_end = 2018
|
||||
_globals['_TRACESPAN_STATUS_STATUSCODE']._serialized_start = 1940
|
||||
_globals['_TRACESPAN_STATUS_STATUSCODE']._serialized_end = 2018
|
||||
_globals['_TRACESPAN_CONTEXT']._serialized_start = 2020
|
||||
_globals['_TRACESPAN_CONTEXT']._serialized_end = 2085
|
||||
_globals['_TRACESPAN_LINK']._serialized_start = 2087
|
||||
_globals['_TRACESPAN_LINK']._serialized_end = 2194
|
||||
_globals['_TRACESPAN_SPANKIND']._serialized_start = 2196
|
||||
_globals['_TRACESPAN_SPANKIND']._serialized_end = 2301
|
||||
# @@protoc_insertion_point(module_scope)
|
@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
gob-curl https://chromium.googlesource.com/chromiumos/chromite/+/main/api/gen_sdk/chromite/telemetry/clientanalytics_pb2.py?format=TEXT | base64 --decode > clientanalytics_pb2.py
|
||||
gob-curl https://chromium.googlesource.com/chromiumos/chromite/+/main/api/gen_sdk/chromite/telemetry/trace_span_pb2.py?format=TEXT | base64 --decode > trace_span_pb2.py
|
Loading…
Reference in New Issue