Add anonymization and the protos to the telemetry lib

Add the protos for the traces used to publish metrics to Clearcut and
the anonymization code/test that ensure we don't collect de-anonymized
paths.

Bug: 326277821
Change-Id: Ifae4d51f59db2219995a0a8d21785729f5eeb137
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5850298
Reviewed-by: Terrence Reilly <treilly@google.com>
Commit-Queue: Struan Shrimpton <sshrimp@google.com>
changes/98/5850298/2
Struan Shrimpton 6 months ago committed by LUCI CQ
parent 62475a5ed8
commit 55d065cc0c

@ -0,0 +1,55 @@
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Util for anonymizing telemetry spans."""
import getpass
import re
from typing import Optional, Pattern, Sequence, Tuple
from google.protobuf import json_format
from .proto import trace_span_pb2
class Anonymizer:
"""Redact the personally identifiable information."""
def __init__(
self,
replacements: Optional[Sequence[Tuple[Pattern[str],
str]]] = None) -> None:
self._replacements = list(replacements or [])
if getpass.getuser() != "root":
# Substituting the root user doesn't actually anonymize anything.
self._replacements.append(
(re.compile(re.escape(getpass.getuser())), "<user>"))
def __call__(self, *args, **kwargs):
return self.apply(*args, **kwargs)
def apply(self, data: str) -> str:
"""Applies the replacement rules to data text."""
if not data:
return data
for repl_from, repl_to in self._replacements:
data = re.sub(repl_from, repl_to, data)
return data
class AnonymizingFilter:
"""Applies the anonymizer to TraceSpan messages."""
def __init__(self, anonymizer: Anonymizer) -> None:
self._anonymizer = anonymizer
def __call__(self,
msg: trace_span_pb2.TraceSpan) -> trace_span_pb2.TraceSpan:
"""Applies the anonymizer to TraceSpan message."""
raw = json_format.MessageToJson(msg)
json_msg = self._anonymizer.apply(raw)
output = trace_span_pb2.TraceSpan()
json_format.Parse(json_msg, output)
return output

@ -0,0 +1,53 @@
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Test the config and anonymizer utils."""
import getpass
import re
import pytest
from . import anonymization
def test_default_anonymizer_to_remove_username_from_path(monkeypatch) -> None:
"""Test that default Anonymizer redacts username."""
monkeypatch.setattr(getpass, "getuser", lambda: "user")
a = anonymization.Anonymizer()
output = a.apply("/home/user/docs")
assert output == "/home/<user>/docs"
def test_anonymizer_to_apply_passed_replacements() -> None:
"""Test anonymizer to apply the requested replacements."""
text = "/home/%s/docs" % getpass.getuser()
replacements = [(re.escape(getpass.getuser()), "<user>")]
a = anonymization.Anonymizer(replacements=replacements)
output = a.apply(text)
assert output == "/home/<user>/docs"
def test_anonymizer_to_apply_multiple_replacements() -> None:
"""Test anonymizer to apply the passed replacements in order."""
replacements = [(re.escape("abc"), "x"), (re.escape("xyz"), "t")]
text = "hello abcd. how is xyz. abcyz"
a = anonymization.Anonymizer(replacements=replacements)
output = a.apply(text)
assert output == "hello xd. how is t. t"
def test_default_anonymizer_skip_root(monkeypatch) -> None:
"""Test the anonymizer skips the root user."""
monkeypatch.setattr(getpass, "getuser", lambda: "root")
text = "/root/home service.sysroot.SetupBoard"
a = anonymization.Anonymizer()
output = a.apply(text)
assert output == text

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: chromite/telemetry/clientanalytics.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
b'\n(chromite/telemetry/clientanalytics.proto\x12\x12\x63hromite.telemetry\";\n\x08LogEvent\x12\x15\n\revent_time_ms\x18\x01 \x01(\x03\x12\x18\n\x10source_extension\x18\x06 \x01(\x0c\"!\n\nClientInfo\x12\x13\n\x0b\x63lient_type\x18\x01 \x01(\x05\"\x9f\x01\n\nLogRequest\x12\x33\n\x0b\x63lient_info\x18\x01 \x01(\x0b\x32\x1e.chromite.telemetry.ClientInfo\x12\x12\n\nlog_source\x18\x02 \x01(\x05\x12\x17\n\x0frequest_time_ms\x18\x04 \x01(\x03\x12/\n\tlog_event\x18\x03 \x03(\x0b\x32\x1c.chromite.telemetry.LogEvent\"/\n\x0bLogResponse\x12 \n\x18next_request_wait_millis\x18\x01 \x01(\x03\x42>Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
)
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(
DESCRIPTOR, 'chromite.telemetry.clientanalytics_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b'Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
_globals['_LOGEVENT']._serialized_start = 64
_globals['_LOGEVENT']._serialized_end = 123
_globals['_CLIENTINFO']._serialized_start = 125
_globals['_CLIENTINFO']._serialized_end = 158
_globals['_LOGREQUEST']._serialized_start = 161
_globals['_LOGREQUEST']._serialized_end = 320
_globals['_LOGRESPONSE']._serialized_start = 322
_globals['_LOGRESPONSE']._serialized_end = 369
# @@protoc_insertion_point(module_scope)

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: chromite/telemetry/trace_span.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
b'\n#chromite/telemetry/trace_span.proto\x12\x12\x63hromite.telemetry\x1a\x1cgoogle/protobuf/struct.proto\"\xa3\x11\n\tTraceSpan\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x36\n\x07\x63ontext\x18\x02 \x01(\x0b\x32%.chromite.telemetry.TraceSpan.Context\x12\x16\n\x0eparent_span_id\x18\x03 \x01(\t\x12\x39\n\tspan_kind\x18\x04 \x01(\x0e\x32&.chromite.telemetry.TraceSpan.SpanKind\x12\x19\n\x11start_time_millis\x18\x05 \x01(\x03\x12\x17\n\x0f\x65nd_time_millis\x18\x06 \x01(\x03\x12+\n\nattributes\x18\x07 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x33\n\x06\x65vents\x18\x08 \x03(\x0b\x32#.chromite.telemetry.TraceSpan.Event\x12\x31\n\x05links\x18\t \x03(\x0b\x32\".chromite.telemetry.TraceSpan.Link\x12\x34\n\x06status\x18\n \x01(\x0b\x32$.chromite.telemetry.TraceSpan.Status\x12\x38\n\x08resource\x18\x0b \x01(\x0b\x32&.chromite.telemetry.TraceSpan.Resource\x12Q\n\x15instrumentation_scope\x18\x0c \x01(\x0b\x32\x32.chromite.telemetry.TraceSpan.InstrumentationScope\x12\x41\n\rtelemetry_sdk\x18\r \x01(\x0b\x32*.chromite.telemetry.TraceSpan.TelemetrySdk\x1a?\n\x0cTelemetrySdk\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x1a\x66\n\x06System\x12\x0f\n\x07os_name\x18\x01 \x01(\t\x12\x12\n\nos_version\x18\x02 \x01(\t\x12\x0f\n\x07os_type\x18\x03 \x01(\t\x12\x0b\n\x03\x63pu\x18\x04 \x01(\t\x12\x19\n\x11host_architecture\x18\x05 \x01(\t\x1a\xd0\x02\n\x07Process\x12\x0b\n\x03pid\x18\x01 \x01(\t\x12\x17\n\x0f\x65xecutable_name\x18\x02 \x01(\t\x12\x17\n\x0f\x65xecutable_path\x18\x03 \x01(\t\x12\x0f\n\x07\x63ommand\x18\x04 \x01(\t\x12\x14\n\x0c\x63ommand_args\x18\x05 \x03(\t\x12\x15\n\rowner_is_root\x18\x06 \x01(\x08\x12\x14\n\x0cruntime_name\x18\x07 \x01(\t\x12\x17\n\x0fruntime_version\x18\x08 \x01(\t\x12\x1b\n\x13runtime_description\x18\t \x01(\t\x12\x13\n\x0b\x61pi_version\x18\n \x01(\t\x12;\n\x03\x65nv\x18\x0b \x03(\x0b\x32..chromite.telemetry.TraceSpan.Process.EnvEntry\x1a*\n\x08\x45nvEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xa5\x01\n\x08Resource\x12\x36\n\x07process\x18\x01 \x01(\x0b\x32%.chromite.telemetry.TraceSpan.Process\x12\x34\n\x06system\x18\x02 \x01(\x0b\x32$.chromite.telemetry.TraceSpan.System\x12+\n\nattributes\x18\x03 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x35\n\x14InstrumentationScope\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x1a]\n\x05\x45vent\x12\x19\n\x11\x65vent_time_millis\x18\x01 \x01(\x03\x12\x0c\n\x04name\x18\x02 \x01(\t\x12+\n\nattributes\x18\x03 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x62\n\nStackFrame\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x13\n\x0bline_number\x18\x03 \x01(\x03\x12\x15\n\rcolumn_number\x18\x04 \x01(\x03\x1a\x83\x01\n\nStackTrace\x12>\n\x0cstack_frames\x18\x01 \x03(\x0b\x32(.chromite.telemetry.TraceSpan.StackFrame\x12\x1c\n\x14\x64ropped_frames_count\x18\x02 \x01(\x03\x12\x17\n\x0fstacktrace_hash\x18\x03 \x01(\t\x1a\xee\x01\n\x06Status\x12\x44\n\x0bstatus_code\x18\x01 \x01(\x0e\x32/.chromite.telemetry.TraceSpan.Status.StatusCode\x12\x0f\n\x07message\x18\x02 \x01(\t\x12=\n\x0bstack_trace\x18\x03 \x01(\x0b\x32(.chromite.telemetry.TraceSpan.StackTrace\"N\n\nStatusCode\x12\x15\n\x11STATUS_CODE_UNSET\x10\x00\x12\x12\n\x0eSTATUS_CODE_OK\x10\x01\x12\x15\n\x11STATUS_CODE_ERROR\x10\x02\x1a\x41\n\x07\x43ontext\x12\x10\n\x08trace_id\x18\x01 \x01(\t\x12\x0f\n\x07span_id\x18\x02 \x01(\t\x12\x13\n\x0btrace_state\x18\x03 \x01(\t\x1ak\n\x04Link\x12\x36\n\x07\x63ontext\x18\x01 \x01(\x0b\x32%.chromite.telemetry.TraceSpan.Context\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"i\n\x08SpanKind\x12\x19\n\x15SPAN_KIND_UNSPECIFIED\x10\x00\x12\x16\n\x12SPAN_KIND_INTERNAL\x10\x01\x12\x14\n\x10SPAN_KIND_SERVER\x10\x02\x12\x14\n\x10SPAN_KIND_CLIENT\x10\x03\x42>Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
)
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR,
'chromite.telemetry.trace_span_pb2',
_globals)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b'Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
_TRACESPAN_PROCESS_ENVENTRY._options = None
_TRACESPAN_PROCESS_ENVENTRY._serialized_options = b'8\001'
_globals['_TRACESPAN']._serialized_start = 90
_globals['_TRACESPAN']._serialized_end = 2301
_globals['_TRACESPAN_TELEMETRYSDK']._serialized_start = 719
_globals['_TRACESPAN_TELEMETRYSDK']._serialized_end = 782
_globals['_TRACESPAN_SYSTEM']._serialized_start = 784
_globals['_TRACESPAN_SYSTEM']._serialized_end = 886
_globals['_TRACESPAN_PROCESS']._serialized_start = 889
_globals['_TRACESPAN_PROCESS']._serialized_end = 1225
_globals['_TRACESPAN_PROCESS_ENVENTRY']._serialized_start = 1183
_globals['_TRACESPAN_PROCESS_ENVENTRY']._serialized_end = 1225
_globals['_TRACESPAN_RESOURCE']._serialized_start = 1228
_globals['_TRACESPAN_RESOURCE']._serialized_end = 1393
_globals['_TRACESPAN_INSTRUMENTATIONSCOPE']._serialized_start = 1395
_globals['_TRACESPAN_INSTRUMENTATIONSCOPE']._serialized_end = 1448
_globals['_TRACESPAN_EVENT']._serialized_start = 1450
_globals['_TRACESPAN_EVENT']._serialized_end = 1543
_globals['_TRACESPAN_STACKFRAME']._serialized_start = 1545
_globals['_TRACESPAN_STACKFRAME']._serialized_end = 1643
_globals['_TRACESPAN_STACKTRACE']._serialized_start = 1646
_globals['_TRACESPAN_STACKTRACE']._serialized_end = 1777
_globals['_TRACESPAN_STATUS']._serialized_start = 1780
_globals['_TRACESPAN_STATUS']._serialized_end = 2018
_globals['_TRACESPAN_STATUS_STATUSCODE']._serialized_start = 1940
_globals['_TRACESPAN_STATUS_STATUSCODE']._serialized_end = 2018
_globals['_TRACESPAN_CONTEXT']._serialized_start = 2020
_globals['_TRACESPAN_CONTEXT']._serialized_end = 2085
_globals['_TRACESPAN_LINK']._serialized_start = 2087
_globals['_TRACESPAN_LINK']._serialized_end = 2194
_globals['_TRACESPAN_SPANKIND']._serialized_start = 2196
_globals['_TRACESPAN_SPANKIND']._serialized_end = 2301
# @@protoc_insertion_point(module_scope)

@ -0,0 +1,4 @@
#!/usr/bin/env bash
gob-curl https://chromium.googlesource.com/chromiumos/chromite/+/main/api/gen_sdk/chromite/telemetry/clientanalytics_pb2.py?format=TEXT | base64 --decode > clientanalytics_pb2.py
gob-curl https://chromium.googlesource.com/chromiumos/chromite/+/main/api/gen_sdk/chromite/telemetry/trace_span_pb2.py?format=TEXT | base64 --decode > trace_span_pb2.py
Loading…
Cancel
Save