From 333785b3a4e06ff74b20d88e8b979f42239d8acb Mon Sep 17 00:00:00 2001 From: Johann Schmitz Date: Tue, 25 May 2021 06:28:32 +0200 Subject: [PATCH 1/2] Convert FORMATTER_RECORD_FIELD_SKIP_LIST to set The `FORMATTER_RECORD_FIELD_SKIP_LIST` is used heavily to remove fields from the resulting document in the fashion `x in FORMATTER_RECORD_FIELD_SKIP_LIST`. This operation is `O(n)` for lists, but `O(1)` to `O(n)` for sets. --- logstash_async/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/logstash_async/constants.py b/logstash_async/constants.py index f04167c..37d0b21 100644 --- a/logstash_async/constants.py +++ b/logstash_async/constants.py @@ -32,11 +32,11 @@ class Constants: # Usually this list does not need to be modified. Add/Remove elements to # exclude/include them in the Logstash event, for the full list see: # http://docs.python.org/library/logging.html#logrecord-attributes - FORMATTER_RECORD_FIELD_SKIP_LIST = [ + FORMATTER_RECORD_FIELD_SKIP_LIST = { 'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename', 'funcName', 'id', 'levelname', 'levelno', 'lineno', 'module', 'msecs', 'msg', 'name', 'pathname', 'process', - 'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName'] + 'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName'} # fields to be set on the top-level of a Logstash event/message, do not modify this # unless you know what you are doing FORMATTER_LOGSTASH_MESSAGE_FIELD_LIST = [ From 05b868e58f3aec30e5a16fa8f39bf3971bb49dd6 Mon Sep 17 00:00:00 2001 From: Johann Schmitz Date: Tue, 25 May 2021 06:36:36 +0200 Subject: [PATCH 2/2] Exclude fields early in _get_record_fields The `_get_record_fields` method extracts and formats the items of the `LogRecord` object into the message dict. After collecting the static extra fields and adding the per-record extra fields, `_remove_excluded_fields` is called to remove any keys we don't want. `_remove_excluded_fields` is called for the two `dict`s `message` and `extra_fields` and removes items if the key is present in `FORMATTER_RECORD_FIELD_SKIP_LIST`. We can improve this by not adding those fields in the first place since we're iterating over `record.__dict__` anyways. --- logstash_async/formatter.py | 4 +++- tests/formatter_test.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/logstash_async/formatter.py b/logstash_async/formatter.py index 173f7b1..83d2bfa 100644 --- a/logstash_async/formatter.py +++ b/logstash_async/formatter.py @@ -133,7 +133,9 @@ def _format_timestamp(self, time_): # ---------------------------------------------------------------------- def _get_record_fields(self, record): - return {k: self._value_repr(v) for k, v in record.__dict__.items()} + return {k: self._value_repr(v) + for k, v in record.__dict__.items() + if k not in constants.FORMATTER_RECORD_FIELD_SKIP_LIST} # ---------------------------------------------------------------------- def _value_repr(self, value): diff --git a/tests/formatter_test.py b/tests/formatter_test.py index 92a6ae4..1a004c0 100644 --- a/tests/formatter_test.py +++ b/tests/formatter_test.py @@ -29,6 +29,15 @@ def test_format(self): self.assertIsNone(file_handler.exception) + def test_fields_are_excluded_in_get_record_fields(self): + formatter = LogstashFormatter() + log_record = makeLogRecord({ + 'filename': 'foo.py', + 'dummy': 'foobar' + }) + self.assertDictEqual(formatter._get_record_fields(log_record), { + 'dummy': 'foobar' + }) if __name__ == "__main__": unittest.main()