cloudwatch (terraform)

CloudWatch log metric filter and alarm

Most relevant resources: aws_cloudwatch_log_metric_filter, aws_cloudwatch_log_metric_filter


JBoss "IJ000453: Unable to get managed connection for"

It creates a log filter and alarm if the given string is found 2 or more times during 30 seconds.

Eg: Datacontact 3rds.

#[1/2] JBoss IJ000453 metric filter [https://access.redhat.com/solutions/17782]

resource "aws_cloudwatch_log_metric_filter" "IJ000453" {

  name           = "JBoss IJ000453 metric filter"

  pattern        = "\"IJ000453: Unable to get managed connection for\""

  log_group_name = aws_cloudwatch_log_group.main.name


  metric_transformation {

    name      = "JBossIJ000453"

    namespace = "${local.env}/${local.ci}"

    value     = "1"

  }

}



#[2/2] JBoss IJ000453 metric alarm [There is a widget in the CloudWatch Dashborard]

resource "aws_cloudwatch_metric_alarm" "IJ000453" {

  alarm_name          = "${local.env}-${local.ci}-jboss-IJ000453"

  comparison_operator = "GreaterThanOrEqualToThreshold"

  evaluation_periods  = "3"

  metric_name         = aws_cloudwatch_log_metric_filter.IJ000453.metric_transformation[0].name

  namespace           = aws_cloudwatch_log_metric_filter.IJ000453.metric_transformation[0].namespace

  period              = "10" # In seconds. Valid values are 10, 30, or any multiple of 60

  statistic           = "SampleCount"

  threshold           = 2


  # dimensions = {

  #   "ClusterName" = aws_ecs_cluster.jboss.name

  #   "ServiceName" = aws_ecs_service.service.name

  # }


  alarm_description = "'IJ000453: Unable to get managed connection for' found in log (task must be replaced, see https://access.redhat.com/solutions/17782)"

  #DOC alarm_actions No way found to stop the failing task, just messing w/ scale out and scale in... but not nice at all

  alarm_actions             = [fake array of actions, if any]

  insufficient_data_actions = [] #(Optional) 

}

And the CloudWatch dashboard, with the ARN of the metric alarm, in the 'widgets' array (see section 'Dashboard'):

      {

        "type" : "alarm",

        "x" : 0,

        "y" : 0,

        "width" : 23, #11+1+11 (if 2 widgets in lower row)

        "height" : 2,

        "properties" : {

          "title" : "Alarms",

          "alarms" : [

            var.metric_alarm.arn

          ]

        }

      },


Dashboard

Eg: Data generic cat, 

Fargate W/ CPUUtilization and MemoryUtilization (dashboard)

modules/cloudwatch/variables.tf

variable "cloudwatch_group" {}

variable "retention_in_days" {}


variable "ecs_cluster_name" {

  description = "ECS cluster name for the dashboard widgets"

  nullable    = false

  type        = string

}


variable "ecs_service_name" {

  description = "ECS service name for the dashboard widgets"

  nullable    = false

  type        = string

}

modules/cloudwatch/cloudwatch.tf

locals {

  env = data.aws_default_tags.dt.tags.env

  ci  = data.aws_default_tags.dt.tags.ci

}


data "aws_default_tags" "dt" {

}


data "aws_region" "current" {

}


resource "aws_cloudwatch_log_group" "main" {

  name              = var.cloudwatch_group

  retention_in_days = var.retention_in_days

}


resource "aws_cloudwatch_log_stream" "main" {

  name           = var.cloudwatch_group

  log_group_name = aws_cloudwatch_log_group.main.name

}


resource "aws_cloudwatch_dashboard" "cloudwatch_dashboard" {

  dashboard_name = "${local.env}-${local.ci}-fargate"


  dashboard_body = jsonencode({

    widgets = [

      {

        height = 6

        width  = 12

        y      = 0

        x      = 0

        type   = "metric"

        properties = {

          view    = "timeSeries"

          stacked = false

          metrics = [

            [

              "AWS/ECS", "CPUUtilization",

              "ServiceName", var.ecs_service_name,

              "ClusterName", var.ecs_cluster_name,

              { "stat" : "Minimum" }

            ],

            [

              "...",

              { "stat" : "Maximum" }

            ],

            [

              "..."

            ]

          ]

          region = data.aws_region.current.name

          stat   = "Average"

          title  = "CPU (ecs service)"

        }

      },

      {

        height = 6

        width  = 12

        y      = 0

        x      = 12

        type   = "metric"

        properties = {

          view    = "timeSeries"

          stacked = false

          metrics = [

            [

              "AWS/ECS", "MemoryUtilization",

              "ServiceName", var.ecs_service_name,

              "ClusterName", var.ecs_cluster_name,

              { "stat" : "Minimum" }

            ],

            [

              "...",

              { "stat" : "Maximum" }

            ],

            [

              "..."

            ]

          ]

          region = data.aws_region.current.name

          stat   = "Average"

          title  = "Memory (ecs service)"

        }

      }

    ]

  })

}