cloudwatch (terraform)
Reference
How to Build an AWS CloudWatch Dashboard using Terraform
Create an AWS CloudWatch Metric Filter and Alarm in Terraform
https://spin.atomicobject.com/2021/04/07/aws-cloudwatch-metric-filter-alarm-terraform/
CloudWatch log metric filter and alarm
Most relevant resources: aws_cloudwatch_log_metric_filter, aws_cloudwatch_log_metric_filter
JBoss "IJ000453: Unable to get managed connection for"
It creates a log filter and alarm if the given string is found 2 or more times during 30 seconds.
Eg: Datacontact 3rds.
#[1/2] JBoss IJ000453 metric filter [https://access.redhat.com/solutions/17782]
resource "aws_cloudwatch_log_metric_filter" "IJ000453" {
name = "JBoss IJ000453 metric filter"
pattern = "\"IJ000453: Unable to get managed connection for\""
log_group_name = aws_cloudwatch_log_group.main.name
metric_transformation {
name = "JBossIJ000453"
namespace = "${local.env}/${local.ci}"
value = "1"
}
}
#[2/2] JBoss IJ000453 metric alarm [There is a widget in the CloudWatch Dashborard]
resource "aws_cloudwatch_metric_alarm" "IJ000453" {
alarm_name = "${local.env}-${local.ci}-jboss-IJ000453"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "3"
metric_name = aws_cloudwatch_log_metric_filter.IJ000453.metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.IJ000453.metric_transformation[0].namespace
period = "10" # In seconds. Valid values are 10, 30, or any multiple of 60
statistic = "SampleCount"
threshold = 2
# dimensions = {
# "ClusterName" = aws_ecs_cluster.jboss.name
# "ServiceName" = aws_ecs_service.service.name
# }
alarm_description = "'IJ000453: Unable to get managed connection for' found in log (task must be replaced, see https://access.redhat.com/solutions/17782)"
#DOC alarm_actions No way found to stop the failing task, just messing w/ scale out and scale in... but not nice at all
alarm_actions = [fake array of actions, if any]
insufficient_data_actions = [] #(Optional)
}
And the CloudWatch dashboard, with the ARN of the metric alarm, in the 'widgets' array (see section 'Dashboard'):
{
"type" : "alarm",
"x" : 0,
"y" : 0,
"width" : 23, #11+1+11 (if 2 widgets in lower row)
"height" : 2,
"properties" : {
"title" : "Alarms",
"alarms" : [
var.metric_alarm.arn
]
}
},
Dashboard
Eg: Data generic cat,
Fargate W/ CPUUtilization and MemoryUtilization (dashboard)
modules/cloudwatch/variables.tf
variable "cloudwatch_group" {}
variable "retention_in_days" {}
variable "ecs_cluster_name" {
description = "ECS cluster name for the dashboard widgets"
nullable = false
type = string
}
variable "ecs_service_name" {
description = "ECS service name for the dashboard widgets"
nullable = false
type = string
}
modules/cloudwatch/cloudwatch.tf
locals {
env = data.aws_default_tags.dt.tags.env
ci = data.aws_default_tags.dt.tags.ci
}
data "aws_default_tags" "dt" {
}
data "aws_region" "current" {
}
resource "aws_cloudwatch_log_group" "main" {
name = var.cloudwatch_group
retention_in_days = var.retention_in_days
}
resource "aws_cloudwatch_log_stream" "main" {
name = var.cloudwatch_group
log_group_name = aws_cloudwatch_log_group.main.name
}
resource "aws_cloudwatch_dashboard" "cloudwatch_dashboard" {
dashboard_name = "${local.env}-${local.ci}-fargate"
dashboard_body = jsonencode({
widgets = [
{
height = 6
width = 12
y = 0
x = 0
type = "metric"
properties = {
view = "timeSeries"
stacked = false
metrics = [
[
"AWS/ECS", "CPUUtilization",
"ServiceName", var.ecs_service_name,
"ClusterName", var.ecs_cluster_name,
{ "stat" : "Minimum" }
],
[
"...",
{ "stat" : "Maximum" }
],
[
"..."
]
]
region = data.aws_region.current.name
stat = "Average"
title = "CPU (ecs service)"
}
},
{
height = 6
width = 12
y = 0
x = 12
type = "metric"
properties = {
view = "timeSeries"
stacked = false
metrics = [
[
"AWS/ECS", "MemoryUtilization",
"ServiceName", var.ecs_service_name,
"ClusterName", var.ecs_cluster_name,
{ "stat" : "Minimum" }
],
[
"...",
{ "stat" : "Maximum" }
],
[
"..."
]
]
region = data.aws_region.current.name
stat = "Average"
title = "Memory (ecs service)"
}
}
]
})
}