Skip to content

Commit

Permalink
Uniqueify alert names
Browse files Browse the repository at this point in the history
  • Loading branch information
jon-funk committed Oct 28, 2024
1 parent 56fdc23 commit 278cc4a
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy-sysdig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
environment: tools
name: Deploy Sysdig Terraform
runs-on: ubuntu-22.04
timeout-minutes: 3
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- name: Apply Sysdig Terraform
Expand Down
64 changes: 32 additions & 32 deletions terraform/alerts_prod.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
### Backend Alerts
resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_cpu_quota" {
name = "Backend CPU Requests Quota Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_backend_prod_cpu_quota" {
name = "Prod Backend CPU Requests Quota Alert"
description = "Alert when the CPU requests usage is too high"
severity = "medium"
query = "sysdig_container_cpu_quota_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-backend\",container_name=\"nr-compliance-enforcement-prod-backend\"} > 98"
Expand All @@ -15,8 +15,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_cpu_quota" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_mem_usage" {
name = "Backend Mem Usage Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_backend_prod_mem_usage" {
name = "Prod Backend Mem Usage Alert"
description = "Alert when the mem usage is too high"
severity = "medium"
query = "sysdig_container_memory_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-backend\",container_name=\"nr-compliance-enforcement-prod-backend\"} > 98"
Expand All @@ -31,8 +31,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_mem_usage" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_mem_limit" {
name = "Backend Mem Limit Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_backend_prod_mem_limit" {
name = "Prod Backend Mem Limit Alert"
description = "Alert when the mem usage is near the limit for too long"
severity = "high"
query = "sysdig_container_memory_limit_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-backend\",container_name=\"nr-compliance-enforcement-prod-backend\"} > 70"
Expand All @@ -47,8 +47,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_mem_limit" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_uptime_score" {
name = "Backend Uptime Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_backend_prod_uptime_score" {
name = "Prod Backend Uptime Alert"
description = "Alert when the backend container has too much downtime"
severity = "high"
query = "sysdig_container_up{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-backend\",container_name=\"nr-compliance-enforcement-prod-backend\"} < 0.7"
Expand All @@ -63,8 +63,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_uptime_score" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_http_silent" {
name = "Backend Unresponsive Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_backend_prod_http_silent" {
name = "Prod Backend Unresponsive Alert"
description = "Alert when the backend container has been unresponsive or silent for too long"
severity = "high"
query = "sysdig_container_net_http_request_count{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-backend\",container_name=\"nr-compliance-enforcement-prod-backend\"} < 0.1"
Expand All @@ -80,8 +80,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "backend_prod_http_silent" {
}
}
### Frontend Alerts
resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_cpu_quota" {
name = "Frontend CPU Requests Quota Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_frontend_prod_cpu_quota" {
name = "Prod Frontend CPU Requests Quota Alert"
description = "Alert when the CPU requests usage is too high"
severity = "medium"
query = "sysdig_container_cpu_quota_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\",container_name=\"nr-compliance-enforcement-prod-frontend\"} > 98"
Expand All @@ -96,8 +96,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_cpu_quota" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_mem_usage" {
name = "Frontend Mem Usage Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_frontend_prod_mem_usage" {
name = "Prod Frontend Mem Usage Alert"
description = "Alert when the mem usage is too high"
severity = "medium"
query = "sysdig_container_memory_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\",container_name=\"nr-compliance-enforcement-prod-frontend\"} > 98"
Expand All @@ -112,8 +112,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_mem_usage" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_mem_limit" {
name = "Frontend Mem Limit Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_frontend_prod_mem_limit" {
name = "Prod Frontend Mem Limit Alert"
description = "Alert when the mem usage is near the limit for too long"
severity = "high"
query = "sysdig_container_memory_limit_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\",container_name=\"nr-compliance-enforcement-prod-frontend\"} > 70"
Expand All @@ -128,8 +128,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_mem_limit" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_uptime_score" {
name = "Frontend Uptime Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_frontend_prod_uptime_score" {
name = "Prod Frontend Uptime Alert"
description = "Alert when the frontend container has too much downtime"
severity = "high"
query = "sysdig_container_up{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\",container_name=\"nr-compliance-enforcement-prod-frontend\"} < 0.7"
Expand All @@ -144,8 +144,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_uptime_score" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_http_silent" {
name = "Frontend Unresponsive Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_frontend_prod_http_silent" {
name = "Prod Frontend Unresponsive Alert"
description = "Alert when the frontend container has been unresponsive or silent for too long"
severity = "high"
query = "sysdig_container_net_http_request_count{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\",container_name=\"nr-compliance-enforcement-prod-frontend\"} < 0.1"
Expand All @@ -160,8 +160,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_http_silent" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_http_error_rate" {
name = "Frontend HTTP Error Rate Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_frontend_prod_http_error_rate" {
name = "Prod Frontend HTTP Error Rate Alert"
description = "Alert when the frontend container has too many HTTP errors over a period"
severity = "high"
query = "(sysdig_container_net_http_error_count{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\"} / sysdig_container_net_http_request_count{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_deployment_name=\"nr-compliance-enforcement-prod-frontend\"} ) > 0.05"
Expand All @@ -177,8 +177,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "frontend_prod_http_error_rate" {
}
}
### Database Alerts
resource "sysdig_monitor_alert_v2_prometheus" "database_prod_cpu_quota" {
name = "Database CPU Requests Quota Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_database_prod_cpu_quota" {
name = "Prod Database CPU Requests Quota Alert"
description = "Alert when the CPU requests usage is too high"
severity = "medium"
query = "sysdig_container_cpu_quota_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_statefulset_name=\"nr-compliance-enforcement-prod-bitnami-pg\"} > 98"
Expand All @@ -193,8 +193,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "database_prod_cpu_quota" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "database_prod_mem_usage" {
name = "Database Mem Usage Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_database_prod_mem_usage" {
name = "Prod Database Mem Usage Alert"
description = "Alert when the mem usage is too high"
severity = "medium"
query = "sysdig_container_memory_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_statefulset_name=\"nr-compliance-enforcement-prod-bitnami-pg\"} > 98"
Expand All @@ -209,8 +209,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "database_prod_mem_usage" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "database_prod_mem_limit" {
name = "Database Mem Limit Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_database_prod_mem_limit" {
name = "Prod Database Mem Limit Alert"
description = "Alert when the mem usage is near the limit for too long"
severity = "high"
query = "sysdig_container_memory_limit_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_statefulset_name=\"nr-compliance-enforcement-prod-bitnami-pg\"} > 70"
Expand All @@ -225,8 +225,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "database_prod_mem_limit" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "database_prod_uptime_score" {
name = "Database Uptime Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_database_prod_uptime_score" {
name = "Prod Database Uptime Alert"
description = "Alert when the database container has too much downtime"
severity = "high"
query = "sysdig_container_up{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_statefulset_name=\"nr-compliance-enforcement-prod-bitnami-pg\"} < 0.7"
Expand All @@ -241,8 +241,8 @@ resource "sysdig_monitor_alert_v2_prometheus" "database_prod_uptime_score" {
app = "NatCom"
}
}
resource "sysdig_monitor_alert_v2_prometheus" "database_prod_storage_usage" {
name = "Database Storage Alert"
resource "sysdig_monitor_alert_v2_prometheus" "nr_database_prod_storage_usage" {
name = "Prod Database Storage Alert"
description = "Alert when the database storage usage is too high"
severity = "high"
query = "sysdig_fs_used_percent{kube_cluster_name=\"silver\",kube_namespace_name=\"c1c7ed-prod\",kube_statefulset_name=\"nr-compliance-enforcement-prod-bitnami-pg\"} > 70"
Expand Down
Loading

0 comments on commit 278cc4a

Please sign in to comment.