From 9fe6b6d10280f1b854a144bd27c1fb6eb432d81e Mon Sep 17 00:00:00 2001 From: Vitaly Date: Sat, 22 Apr 2023 15:51:54 +0800 Subject: [PATCH 1/3] Update mixin to latest changes from grafana/postgres_exporter Porting a bunch of PRs accumulated over time in grafana/postgres_exporter: - https://github.com/grafana/postgres_exporter/pull/11 (@v-zhuravlev) - https://github.com/grafana/postgres_exporter/pull/12 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/13 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/14 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/15 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/16 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/17 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/20 (@gaantunes) - https://github.com/grafana/postgres_exporter/pull/21 (@mshahzeb) - https://github.com/grafana/postgres_exporter/pull/22 (@mshahzeb) Signed-off-by: Cristian Greco --- postgres_mixin/.lint | 15 + postgres_mixin/alerts/postgres.libsonnet | 189 ++- postgres_mixin/config.libsonnet | 6 +- ...overview.json => postgresql-overview.json} | 1100 +++++++++-------- 4 files changed, 758 insertions(+), 552 deletions(-) create mode 100644 postgres_mixin/.lint rename postgres_mixin/dashboards/{postgres-overview.json => postgresql-overview.json} (60%) diff --git a/postgres_mixin/.lint b/postgres_mixin/.lint new file mode 100644 index 000000000..65d621f69 --- /dev/null +++ b/postgres_mixin/.lint @@ -0,0 +1,15 @@ +--- +exclusions: + panel-units-rule: + reason: Ignoring so far, need to address this in future + panel-title-description-rule: + reason: Ignoring so far, need to address this in future + panel-datasource-rule: + reason: "Loki datasource variable is being named as loki_datasource now while linter expects 'datasource'" + template-datasource-rule: + reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + alert-name-camelcase: + reason: QPS is a common acronym (Queries Per Second) and should be allowed + entries: + - alert: PostgreSQLQPS + \ No newline at end of file diff --git a/postgres_mixin/alerts/postgres.libsonnet b/postgres_mixin/alerts/postgres.libsonnet index 4b0275df1..e3c8ee278 100644 --- a/postgres_mixin/alerts/postgres.libsonnet +++ b/postgres_mixin/alerts/postgres.libsonnet @@ -7,16 +7,16 @@ { alert: 'PostgreSQLMaxConnectionsReached', annotations: { - description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.', - summary: '{{ $labels.instance }} has maxed out Postgres connections.', + description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy).', + summary: 'Postgres connections count is over the maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) >= - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) - ||| % $._config, + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '1m', labels: { severity: 'warning', @@ -26,17 +26,17 @@ alert: 'PostgreSQLHighConnections', annotations: { description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).', - summary: '{{ $labels.instance }} is over 80% of max Postgres connections.', + summary: 'Postgres connections count is over 80% of maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) > ( - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) ) * 0.8 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '10m', labels: { severity: 'warning', @@ -46,7 +46,7 @@ alert: 'PostgreSQLDown', annotations: { description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.', - summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}', + summary: 'PostgreSQL is not processing queries.', }, expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config, 'for': '1m', @@ -58,15 +58,15 @@ alert: 'PostgreSQLSlowQueries', annotations: { description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ', - summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} ', + summary: 'PostgreSQL high number of slow queries.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( rate ( - pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m] + pg_stat_activity_max_tx_duration{%(dbNameFilter)s, %(postgresExporterSelector)s}[2m] ) ) > 2 * 60 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '2m', labels: { severity: 'warning', @@ -76,19 +76,19 @@ alert: 'PostgreSQLQPS', annotations: { description: 'PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL high number of queries per second.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( irate( - pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_commit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + irate( - pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_rollback{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) > 10000 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '5m', labels: { severity: 'warning', @@ -98,28 +98,165 @@ alert: 'PostgreSQLCacheHitRatio', annotations: { description: 'PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL low cache hit rate.', }, expr: ||| - avg by (datname) ( - rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]) + avg by (datname, %(agg)s) ( + rate(pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]) / ( rate( - pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + rate( - pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_read{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) ) < 0.98 + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasTooManyRollbacks', + annotations: { + description: 'PostgreSQL has too many rollbacks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has too many rollbacks.', + }, + expr: ||| + avg without(pod, instance) + (rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]) / + (rate(pg_stat_database_xact_commit{%(dbNameFilter)s}[5m]) + rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]))) > 0.10 + ||| % $._config, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasHighDeadLocks', + annotations: { + description: 'PostgreSQL has too high deadlocks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of deadlocks.', + }, + expr: ||| + max without(pod, instance) (rate(pg_stat_database_deadlocks{%(dbNameFilter)s}[5m]) * 60) > 5 ||| % $._config, 'for': '5m', labels: { severity: 'warning', }, }, + { + alert: 'PostgresAcquiredTooManyLocks', + annotations: { + description: 'PostgreSQL has acquired too many locks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of acquired locks.', + }, + expr: ||| + max by(datname, %(agg)s) ( + (pg_locks_count{%(dbNameFilter)s}) + / + on(%(aggWithoutServer)s) group_left(server) ( + pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{} + ) + ) > 0.20 + ||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), aggWithoutServer: std.join(',', std.filter(function(x) x != "server", $._config.groupLabels + $._config.instanceLabels)) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresReplicationLaggingMore1Hour', + annotations: { + description: '{{ $labels.instance }} replication lag exceeds 1 hour. Check for network issues or load imbalances.', + summary: 'PostgreSQL replication lagging more than 1 hour.', + }, + expr: ||| + (pg_replication_lag{} > 3600) and on (%(agg)s) (pg_replication_is_replica{} == 1) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasReplicationSlotUsed', + annotations: { + description: '{{ $labels.instance }} has replication slots that are not used, which might lead to replication lag or data inconsistency.', + summary: 'PostgreSQL has unused replication slots.', + }, + expr: 'pg_replication_slots_active{} == 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresReplicationRoleChanged', + annotations: { + description: '{{ $labels.instance }} replication role has changed. Verify if this is expected or if it indicates a failover.', + summary: 'PostgreSQL replication role change detected.', + }, + expr: 'pg_replication_is_replica{} and changes(pg_replication_is_replica{}[1m]) > 0', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasExporterErrors', + annotations: { + description: '{{ $labels.instance }} exporter is experiencing errors. Verify exporter health and configuration.', + summary: 'PostgreSQL exporter errors detected.', + }, + expr: 'pg_exporter_last_scrape_error{} > 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTablesNotVaccumed', + annotations: { + description: '{{ $labels.instance }} tables have not been vacuumed recently within the last hour, which may lead to performance degradation.', + summary: 'PostgreSQL tables not vacuumed.', + }, + expr: ||| + group without(pod, instance)( + timestamp( + pg_stat_user_tables_n_dead_tup{} > + pg_stat_user_tables_n_live_tup{} + * on(%(agg)s) group_left pg_settings_autovacuum_vacuum_scale_factor{} + + on(%(agg)s) group_left pg_settings_autovacuum_vacuum_threshold{} + ) + < time() - 36000 + ) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTooManyCheckpointsRequested', + annotations: { + description: '{{ $labels.instance }} is requesting too many checkpoints, which may lead to performance degradation.', + summary: 'PostgreSQL too many checkpoints requested.', + }, + expr: ||| + rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) / + (rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) + rate(pg_stat_bgwriter_checkpoints_req_total{}[5m])) + < 0.5 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, ], }, ], diff --git a/postgres_mixin/config.libsonnet b/postgres_mixin/config.libsonnet index d7bd7ac1b..d44830f87 100644 --- a/postgres_mixin/config.libsonnet +++ b/postgres_mixin/config.libsonnet @@ -1,5 +1,9 @@ { _config+:: { - postgresExporterSelector: '', + dbNameFilter: 'datname!~"template.*"', + postgresExporterSelector: 'job="integrations/postgres_exporter"', + groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'], + instanceLabels: ['instance', 'server'], + enableMultiCluster: false, }, } diff --git a/postgres_mixin/dashboards/postgres-overview.json b/postgres_mixin/dashboards/postgresql-overview.json similarity index 60% rename from postgres_mixin/dashboards/postgres-overview.json rename to postgres_mixin/dashboards/postgresql-overview.json index 9bf41be6a..8baf6fbb3 100644 --- a/postgres_mixin/dashboards/postgres-overview.json +++ b/postgres_mixin/dashboards/postgresql-overview.json @@ -3,81 +3,119 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Performance metrics for Postgres", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 455, "graphTooltip": 0, - "id": 1, - "iteration": 1603191461722, + "id": 38, "links": [], + "liveNow": false, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 2, + "w": 8, + "x": 0, + "y": 0 + }, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "span": 4, + "title": "PostgreSQL overview", + "titleSize": "h6", + "type": "row" + }, + { + + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 20, + "w": 4, "x": 0, "y": 0 }, - "hiddenSeries": false, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "id": 11, "links": [], - "nullPointMode": "connected", + "maxDataPoints": 100, "options": { - "alertThreshold": true + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -94,7 +132,6 @@ } ], "intervalFactor": 2, - "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", "refId": "A", @@ -103,7 +140,7 @@ [ { "params": [ - "tup_fetched" + "xact_commit" ], "type": "field" }, @@ -119,7 +156,7 @@ } ] ], - "step": 120, + "step": 1800, "tags": [ { "key": "instance", @@ -127,11 +164,101 @@ "value": "/^$instance$/" } ] + } + ], + "title": "QPS", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 0 + }, + "id": 1, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.3", + "targets": [ { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -148,10 +275,10 @@ } ], "intervalFactor": 2, - "legendFormat": "returned", + "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", - "refId": "B", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -184,8 +311,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -202,10 +332,10 @@ } ], "intervalFactor": 2, - "legendFormat": "inserted", + "legendFormat": "returned", "measurement": "postgresql", "policy": "default", - "refId": "C", + "refId": "B", "resultFormat": "time_series", "select": [ [ @@ -238,8 +368,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -256,10 +389,10 @@ } ], "intervalFactor": 2, - "legendFormat": "updated", + "legendFormat": "inserted", "measurement": "postgresql", "policy": "default", - "refId": "D", + "refId": "C", "resultFormat": "time_series", "select": [ [ @@ -292,8 +425,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -310,10 +446,10 @@ } ], "intervalFactor": 2, - "legendFormat": "deleted", + "legendFormat": "updated", "measurement": "postgresql", "policy": "default", - "refId": "E", + "refId": "D", "resultFormat": "time_series", "select": [ [ @@ -343,124 +479,14 @@ "value": "/^$instance$/" } ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Rows", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 20, - "y": 0 - }, - "height": "55px", - "id": 11, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { + "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -477,15 +503,16 @@ } ], "intervalFactor": 2, + "legendFormat": "deleted", "measurement": "postgresql", "policy": "default", - "refId": "A", + "refId": "E", "resultFormat": "time_series", "select": [ [ { "params": [ - "xact_commit" + "tup_fetched" ], "type": "field" }, @@ -501,7 +528,7 @@ } ] ], - "step": 1800, + "step": 120, "tags": [ { "key": "instance", @@ -511,80 +538,100 @@ ] } ], - "thresholds": "", - "title": "QPS", - "transparent": true, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "title": "Rows", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": 1, - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, - "hiddenSeries": false, "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_alloc{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_alloc_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -635,8 +682,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -687,8 +737,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -739,8 +792,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_clean{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_clean_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -791,8 +847,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_checkpoint{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_checkpoint_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -842,104 +901,113 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Buffers", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "deadlocks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 7 }, - "hiddenSeries": false, "id": 3, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "conflicts", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -959,6 +1027,7 @@ "legendFormat": "deadlocks", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "A", "resultFormat": "time_series", "select": [ @@ -990,8 +1059,12 @@ }, { "alias": "deadlocks", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -1011,6 +1084,7 @@ "legendFormat": "conflicts", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "B", "resultFormat": "time_series", "select": [ @@ -1041,204 +1115,187 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Conflicts/Deadlocks", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, - "hiddenSeries": false, "id": 12, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": true, - "pluginVersion": "7.2.1", - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "expr": "sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))", + "datasource": { + "uid": "$datasource" + }, + "expr": "round(sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))*100,0.001)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{datname}} - cache hit rate", "refId": "A", "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cache hit ratio", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 }, - "hiddenSeries": false, "id": 13, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "pg_stat_database_numbackends{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, @@ -1247,51 +1304,13 @@ "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Number of active connections", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "refresh": false, - "schemaVersion": 26, + "revision": 1, + "schemaVersion": 38, "style": "dark", "tags": [ "postgres" @@ -1310,66 +1329,96 @@ "regex": "", "skipUrlSync": false, "type": "datasource" - }, + }, { "allValue": ".+", - "datasource": "$datasource", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, "definition": "label_values(pg_up, job)", "hide": 0, "includeAll": true, - "label": "job", + "label": "Job", "multi": true, "name": "job", "options": [], - "query": "label_values(pg_up, job)", - "refresh": 0, + "query": { + "query": "label_values(pg_up, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_up{job=~\"$job\"},instance)", "hide": 0, "includeAll": true, - "label": "instance", + "label": "Instance", "multi": true, "name": "instance", "options": [], - "query": "label_values(up{job=~\"$job\"},instance)", - "refresh": 1, + "query": { + "query": "label_values(pg_up{job=~\"$job\"},instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", "hide": 0, "includeAll": true, - "label": "db", + "label": "Database", "multi": false, "name": "db", "options": [], - "query": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", - "refresh": 1, + "query": { + "query": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -1408,5 +1457,6 @@ "timezone": "browser", "title": "Postgres Overview", "uid": "wGgaPlciz", - "version": 5 -} + "version": 39, + "weekStart": "" +} \ No newline at end of file From 37157941475e1a7c6df8464d6e542a6ee612ce5b Mon Sep 17 00:00:00 2001 From: Cristian Greco Date: Mon, 21 Jul 2025 12:11:01 +0200 Subject: [PATCH 2/3] rename dashboard to old name Signed-off-by: Cristian Greco --- .../{postgresql-overview.json => postgres-overview.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename postgres_mixin/dashboards/{postgresql-overview.json => postgres-overview.json} (100%) diff --git a/postgres_mixin/dashboards/postgresql-overview.json b/postgres_mixin/dashboards/postgres-overview.json similarity index 100% rename from postgres_mixin/dashboards/postgresql-overview.json rename to postgres_mixin/dashboards/postgres-overview.json From e8deddfc4d96dab4bd190ae735379080bb973379 Mon Sep 17 00:00:00 2001 From: Cristian Greco Date: Mon, 21 Jul 2025 14:43:56 +0200 Subject: [PATCH 3/3] remove custom selector Signed-off-by: Cristian Greco --- postgres_mixin/config.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres_mixin/config.libsonnet b/postgres_mixin/config.libsonnet index d44830f87..4ea3b7e52 100644 --- a/postgres_mixin/config.libsonnet +++ b/postgres_mixin/config.libsonnet @@ -1,7 +1,7 @@ { _config+:: { dbNameFilter: 'datname!~"template.*"', - postgresExporterSelector: 'job="integrations/postgres_exporter"', + postgresExporterSelector: '', groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'], instanceLabels: ['instance', 'server'], enableMultiCluster: false,