diff --git a/postgres_mixin/.lint b/postgres_mixin/.lint new file mode 100644 index 000000000..65d621f69 --- /dev/null +++ b/postgres_mixin/.lint @@ -0,0 +1,15 @@ +--- +exclusions: + panel-units-rule: + reason: Ignoring so far, need to address this in future + panel-title-description-rule: + reason: Ignoring so far, need to address this in future + panel-datasource-rule: + reason: "Loki datasource variable is being named as loki_datasource now while linter expects 'datasource'" + template-datasource-rule: + reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + alert-name-camelcase: + reason: QPS is a common acronym (Queries Per Second) and should be allowed + entries: + - alert: PostgreSQLQPS + \ No newline at end of file diff --git a/postgres_mixin/alerts/postgres.libsonnet b/postgres_mixin/alerts/postgres.libsonnet index 4b0275df1..e3c8ee278 100644 --- a/postgres_mixin/alerts/postgres.libsonnet +++ b/postgres_mixin/alerts/postgres.libsonnet @@ -7,16 +7,16 @@ { alert: 'PostgreSQLMaxConnectionsReached', annotations: { - description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.', - summary: '{{ $labels.instance }} has maxed out Postgres connections.', + description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy).', + summary: 'Postgres connections count is over the maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) >= - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) - ||| % $._config, + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '1m', labels: { severity: 'warning', @@ -26,17 +26,17 @@ alert: 'PostgreSQLHighConnections', annotations: { description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).', - summary: '{{ $labels.instance }} is over 80% of max Postgres connections.', + summary: 'Postgres connections count is over 80% of maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) > ( - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) ) * 0.8 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '10m', labels: { severity: 'warning', @@ -46,7 +46,7 @@ alert: 'PostgreSQLDown', annotations: { description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.', - summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}', + summary: 'PostgreSQL is not processing queries.', }, expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config, 'for': '1m', @@ -58,15 +58,15 @@ alert: 'PostgreSQLSlowQueries', annotations: { description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ', - summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} ', + summary: 'PostgreSQL high number of slow queries.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( rate ( - pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m] + pg_stat_activity_max_tx_duration{%(dbNameFilter)s, %(postgresExporterSelector)s}[2m] ) ) > 2 * 60 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '2m', labels: { severity: 'warning', @@ -76,19 +76,19 @@ alert: 'PostgreSQLQPS', annotations: { description: 'PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL high number of queries per second.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( irate( - pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_commit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + irate( - pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_rollback{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) > 10000 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '5m', labels: { severity: 'warning', @@ -98,28 +98,165 @@ alert: 'PostgreSQLCacheHitRatio', annotations: { description: 'PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL low cache hit rate.', }, expr: ||| - avg by (datname) ( - rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]) + avg by (datname, %(agg)s) ( + rate(pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]) / ( rate( - pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + rate( - pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_read{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) ) < 0.98 + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasTooManyRollbacks', + annotations: { + description: 'PostgreSQL has too many rollbacks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has too many rollbacks.', + }, + expr: ||| + avg without(pod, instance) + (rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]) / + (rate(pg_stat_database_xact_commit{%(dbNameFilter)s}[5m]) + rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]))) > 0.10 + ||| % $._config, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasHighDeadLocks', + annotations: { + description: 'PostgreSQL has too high deadlocks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of deadlocks.', + }, + expr: ||| + max without(pod, instance) (rate(pg_stat_database_deadlocks{%(dbNameFilter)s}[5m]) * 60) > 5 ||| % $._config, 'for': '5m', labels: { severity: 'warning', }, }, + { + alert: 'PostgresAcquiredTooManyLocks', + annotations: { + description: 'PostgreSQL has acquired too many locks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of acquired locks.', + }, + expr: ||| + max by(datname, %(agg)s) ( + (pg_locks_count{%(dbNameFilter)s}) + / + on(%(aggWithoutServer)s) group_left(server) ( + pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{} + ) + ) > 0.20 + ||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), aggWithoutServer: std.join(',', std.filter(function(x) x != "server", $._config.groupLabels + $._config.instanceLabels)) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresReplicationLaggingMore1Hour', + annotations: { + description: '{{ $labels.instance }} replication lag exceeds 1 hour. Check for network issues or load imbalances.', + summary: 'PostgreSQL replication lagging more than 1 hour.', + }, + expr: ||| + (pg_replication_lag{} > 3600) and on (%(agg)s) (pg_replication_is_replica{} == 1) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasReplicationSlotUsed', + annotations: { + description: '{{ $labels.instance }} has replication slots that are not used, which might lead to replication lag or data inconsistency.', + summary: 'PostgreSQL has unused replication slots.', + }, + expr: 'pg_replication_slots_active{} == 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresReplicationRoleChanged', + annotations: { + description: '{{ $labels.instance }} replication role has changed. Verify if this is expected or if it indicates a failover.', + summary: 'PostgreSQL replication role change detected.', + }, + expr: 'pg_replication_is_replica{} and changes(pg_replication_is_replica{}[1m]) > 0', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasExporterErrors', + annotations: { + description: '{{ $labels.instance }} exporter is experiencing errors. Verify exporter health and configuration.', + summary: 'PostgreSQL exporter errors detected.', + }, + expr: 'pg_exporter_last_scrape_error{} > 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTablesNotVaccumed', + annotations: { + description: '{{ $labels.instance }} tables have not been vacuumed recently within the last hour, which may lead to performance degradation.', + summary: 'PostgreSQL tables not vacuumed.', + }, + expr: ||| + group without(pod, instance)( + timestamp( + pg_stat_user_tables_n_dead_tup{} > + pg_stat_user_tables_n_live_tup{} + * on(%(agg)s) group_left pg_settings_autovacuum_vacuum_scale_factor{} + + on(%(agg)s) group_left pg_settings_autovacuum_vacuum_threshold{} + ) + < time() - 36000 + ) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTooManyCheckpointsRequested', + annotations: { + description: '{{ $labels.instance }} is requesting too many checkpoints, which may lead to performance degradation.', + summary: 'PostgreSQL too many checkpoints requested.', + }, + expr: ||| + rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) / + (rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) + rate(pg_stat_bgwriter_checkpoints_req_total{}[5m])) + < 0.5 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, ], }, ], diff --git a/postgres_mixin/config.libsonnet b/postgres_mixin/config.libsonnet index d7bd7ac1b..4ea3b7e52 100644 --- a/postgres_mixin/config.libsonnet +++ b/postgres_mixin/config.libsonnet @@ -1,5 +1,9 @@ { _config+:: { + dbNameFilter: 'datname!~"template.*"', postgresExporterSelector: '', + groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'], + instanceLabels: ['instance', 'server'], + enableMultiCluster: false, }, } diff --git a/postgres_mixin/dashboards/postgres-overview.json b/postgres_mixin/dashboards/postgres-overview.json index 9bf41be6a..8baf6fbb3 100644 --- a/postgres_mixin/dashboards/postgres-overview.json +++ b/postgres_mixin/dashboards/postgres-overview.json @@ -3,81 +3,119 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Performance metrics for Postgres", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 455, "graphTooltip": 0, - "id": 1, - "iteration": 1603191461722, + "id": 38, "links": [], + "liveNow": false, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 2, + "w": 8, + "x": 0, + "y": 0 + }, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "span": 4, + "title": "PostgreSQL overview", + "titleSize": "h6", + "type": "row" + }, + { + + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 20, + "w": 4, "x": 0, "y": 0 }, - "hiddenSeries": false, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "id": 11, "links": [], - "nullPointMode": "connected", + "maxDataPoints": 100, "options": { - "alertThreshold": true + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -94,7 +132,6 @@ } ], "intervalFactor": 2, - "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", "refId": "A", @@ -103,7 +140,7 @@ [ { "params": [ - "tup_fetched" + "xact_commit" ], "type": "field" }, @@ -119,7 +156,7 @@ } ] ], - "step": 120, + "step": 1800, "tags": [ { "key": "instance", @@ -127,11 +164,101 @@ "value": "/^$instance$/" } ] + } + ], + "title": "QPS", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 0 + }, + "id": 1, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.3", + "targets": [ { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -148,10 +275,10 @@ } ], "intervalFactor": 2, - "legendFormat": "returned", + "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", - "refId": "B", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -184,8 +311,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -202,10 +332,10 @@ } ], "intervalFactor": 2, - "legendFormat": "inserted", + "legendFormat": "returned", "measurement": "postgresql", "policy": "default", - "refId": "C", + "refId": "B", "resultFormat": "time_series", "select": [ [ @@ -238,8 +368,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -256,10 +389,10 @@ } ], "intervalFactor": 2, - "legendFormat": "updated", + "legendFormat": "inserted", "measurement": "postgresql", "policy": "default", - "refId": "D", + "refId": "C", "resultFormat": "time_series", "select": [ [ @@ -292,8 +425,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -310,10 +446,10 @@ } ], "intervalFactor": 2, - "legendFormat": "deleted", + "legendFormat": "updated", "measurement": "postgresql", "policy": "default", - "refId": "E", + "refId": "D", "resultFormat": "time_series", "select": [ [ @@ -343,124 +479,14 @@ "value": "/^$instance$/" } ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Rows", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 20, - "y": 0 - }, - "height": "55px", - "id": 11, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { + "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -477,15 +503,16 @@ } ], "intervalFactor": 2, + "legendFormat": "deleted", "measurement": "postgresql", "policy": "default", - "refId": "A", + "refId": "E", "resultFormat": "time_series", "select": [ [ { "params": [ - "xact_commit" + "tup_fetched" ], "type": "field" }, @@ -501,7 +528,7 @@ } ] ], - "step": 1800, + "step": 120, "tags": [ { "key": "instance", @@ -511,80 +538,100 @@ ] } ], - "thresholds": "", - "title": "QPS", - "transparent": true, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "title": "Rows", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": 1, - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, - "hiddenSeries": false, "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_alloc{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_alloc_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -635,8 +682,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -687,8 +737,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -739,8 +792,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_clean{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_clean_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -791,8 +847,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_checkpoint{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_checkpoint_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -842,104 +901,113 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Buffers", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "deadlocks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 7 }, - "hiddenSeries": false, "id": 3, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "conflicts", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -959,6 +1027,7 @@ "legendFormat": "deadlocks", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "A", "resultFormat": "time_series", "select": [ @@ -990,8 +1059,12 @@ }, { "alias": "deadlocks", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -1011,6 +1084,7 @@ "legendFormat": "conflicts", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "B", "resultFormat": "time_series", "select": [ @@ -1041,204 +1115,187 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Conflicts/Deadlocks", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, - "hiddenSeries": false, "id": 12, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": true, - "pluginVersion": "7.2.1", - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "expr": "sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))", + "datasource": { + "uid": "$datasource" + }, + "expr": "round(sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))*100,0.001)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{datname}} - cache hit rate", "refId": "A", "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cache hit ratio", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 }, - "hiddenSeries": false, "id": 13, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "pg_stat_database_numbackends{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, @@ -1247,51 +1304,13 @@ "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Number of active connections", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "refresh": false, - "schemaVersion": 26, + "revision": 1, + "schemaVersion": 38, "style": "dark", "tags": [ "postgres" @@ -1310,66 +1329,96 @@ "regex": "", "skipUrlSync": false, "type": "datasource" - }, + }, { "allValue": ".+", - "datasource": "$datasource", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, "definition": "label_values(pg_up, job)", "hide": 0, "includeAll": true, - "label": "job", + "label": "Job", "multi": true, "name": "job", "options": [], - "query": "label_values(pg_up, job)", - "refresh": 0, + "query": { + "query": "label_values(pg_up, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_up{job=~\"$job\"},instance)", "hide": 0, "includeAll": true, - "label": "instance", + "label": "Instance", "multi": true, "name": "instance", "options": [], - "query": "label_values(up{job=~\"$job\"},instance)", - "refresh": 1, + "query": { + "query": "label_values(pg_up{job=~\"$job\"},instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", "hide": 0, "includeAll": true, - "label": "db", + "label": "Database", "multi": false, "name": "db", "options": [], - "query": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", - "refresh": 1, + "query": { + "query": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -1408,5 +1457,6 @@ "timezone": "browser", "title": "Postgres Overview", "uid": "wGgaPlciz", - "version": 5 -} + "version": 39, + "weekStart": "" +} \ No newline at end of file