Eg: dagger
Let's see how to configure log groups retention if they already exist or create them otherwise.
AWS Glue ETL jobs of type "pythonshell" can only use the default log groups (created implicitly):
/aws-glue/python-jobs/output
/aws-glue/python-jobs/error
By default, these log groups are created without retention configuration.
main.tf
# For checking if Glue pythonshell log groups exist
data "aws_cloudwatch_log_groups" "glue_existing" {
log_group_name_prefix = "/aws-glue/python-jobs/"
}
locals {
glue_existing_log_groups = toset([
for lg in data.aws_cloudwatch_log_groups.glue_existing.log_group_names : lg
if contains(["/aws-glue/python-jobs/output", "/aws-glue/python-jobs/error"], lg)
])
}
# Glue: Import existing log groups.
# Import blocks are only allowed in the root module.
import {
for_each = local.glue_existing_log_groups
to = aws_cloudwatch_log_group.python_jobs[each.value]
id = each.value
}
# Manage log groups (create if not exist, update retention if exist)
resource "aws_cloudwatch_log_group" "python_jobs" {
for_each = toset(["/aws-glue/python-jobs/output", "/aws-glue/python-jobs/error"])
name = each.value
retention_in_days = local.cloudwatch_retention_in_days
}
Eg: behaviours
# pythonshell jobs ONLY use default logs /aws-glue/python-jobs/output & /aws-glue/python-jobs/error
#
resource "aws_glue_job" "deploy_script" {
command {
name = "pythonshell"
script_location = "s3://${replace(var.s3_coderepository_arn, "arn:aws:s3:::", "")}/${var.glue_bucket_coderepository_script_key}"
python_version = "3.9" #(3.9 when glue v4 or v3) See: https://docs.aws.amazon.com/glue/latest/dg/release-notes.html
}
connections = [aws_glue_connection.rds_connection.name]
default_arguments = {
"!WARNING" = "All values are managed by Terraform: next apply will replace them!" #W/out '--' (nothing uses it)
# ---- [1/2]Special Parameters Used by AWS Glue ---- https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
"library-set" = "analytics"
#"--continuous-log-logGroup" = aws_cloudwatch_log_group.glue_lg.name - only work for Spark ETL jobs
#"--continuous-log-logStreamPrefix" = "${local.ci}-${local.env}" - only work for Spark ETL jobs
#"--enable-continuous-cloudwatch-log" = "true" - only work for Spark ETL jobs
#"--enable-continuous-log-filter" = "true" - only work for Spark ETL jobs
#"--enable-auto-scaling" = "true"
"--enable-job-insights" = "false"
"--enable-metrics" = "true"
"--enable-observability-metrics" = "false"
"--job-language" = "python"
# ---- [2/2]Calling AWS Glue APIs in Python ---- https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_job#default_arguments-1
"--AWS_ACCESS_KEY_ID" = var.glue_job_aws_ak #Argument 1
"--AWS_SECRET_ACCESS_KEY" = var.glue_job_aws_sak #Argument 2
"--BBDD_HOST" = var.db_host #Used by the script for the PostgreSQL connection
"--BBDD_PORT" = var.db_port #Used by the script for the PostgreSQL connection
"--BBDD_USER" = var.db_user #Used by the script for the PostgreSQL connection
"--BBDD_PASSWORD" = var.db_pass #Used by the script for the PostgreSQL connection
"--BBDD_GLUE_ENTITIES_TABLE_NAME" = var.db_table_glue_entities #DB table with the field 'entity_name'
"--BBDD_GLUE_ENTITIES_DELTA_DAYS" = "7"
"--ENV" = local.env #Argument 9
"--GLUE_ROLE_ARN" = var.glue_role_arn #Argument 10
"--GLUE_S3_AUDITPROCESS_NAME" = var.audit_bucket_name #Argument 11
"--CI" = local.ci #Argument 12
# "--extra-py-files" = "s3://${aws_s3_object.snowflake_library.bucket}/${aws_s3_object.snowflake_library.key}"
}
description = "${local.ci} Glue job"
execution_class = "STANDARD" #STANDARD or FLEX
#execution_property = (Optional) Execution property of the job
#DOC glue_version> https://docs.aws.amazon.com/glue/latest/dg/release-notes.html
glue_version = null #Null if PythonShell. Glue 4.0-->Python 3.9. Glue 3.0-->Python 3.9
job_mode = "SCRIPT" #(Optional) Describes how a job was created
job_run_queuing_enabled = true
#maintenance_window= (Optional)
max_capacity = 0.0625 #Conflicts w/ worker_type 'G.1X'
max_retries = 0
name = var.glue-job-name
#non_overridable_arguments=
#notification_property=
#number_of_workers = 1 #Required for FLEX jobs
#region=
role_arn = var.glue_service_role_arn
#tags=
timeout = 480 #default is 480 minutes (8 hours) for a Glue 5.0 ETL job
#security_configuration=
#source_control_details=
#worker_type = "G.1X" #Only [G.1X, G.2X] worker types are supported for FLEX jobs
}