Skip to content

Commit 19959fc

Browse files
Merge branch 'develop' of github.com:NHSDigital/NRLF into NRL-2015-option-4
2 parents 29e9315 + 9e90f3f commit 19959fc

File tree

21 files changed

+276
-144
lines changed

21 files changed

+276
-144
lines changed

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
run: |
6161
DOWNLOAD_URL="https://github.com/anchore/syft/releases/download/v${{ env.SYFT_VERSION }}/syft_${{ env.SYFT_VERSION }}_linux_${{ steps.os-arch.outputs.arch }}.tar.gz"
6262
echo "Downloading: ${DOWNLOAD_URL}"
63-
curl -L -o syft.tar.gz "${DOWNLOAD_URL}"
63+
curl --proto '=https' --tlsv1.2 --location --output syft.tar.gz "${DOWNLOAD_URL}"
6464
tar -xzf syft.tar.gz
6565
chmod +x syft
6666
# Add to PATH for subsequent steps
@@ -111,7 +111,7 @@ jobs:
111111
112112
- name: Upload SBOM to release
113113
if: ${{ github.event.release.tag_name }}
114-
uses: svenstaro/[email protected].3
114+
uses: svenstaro/upload-release-action@b98a3b12e86552593f3e4e577ca8a62aa2f3f22b # v2.11.4
115115
with:
116116
file: sbom.spdx.json
117117
asset_name: sbom-${{ github.event.release.tag_name }}

Dockerfile.ci-build

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
FROM ubuntu:22.04
22

3+
34
RUN apt update && \
45
apt upgrade -y && \
56
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y \
@@ -30,7 +31,8 @@ RUN apt update && \
3031
zip \
3132
zlib1g-dev && \
3233
apt clean && \
33-
rm -rf /var/lib/apt/lists/*
34+
rm -rf /var/lib/apt/lists/* # NOSONAR (S6500) - Auto installing the defined packages is acceptable here
35+
3436

3537
WORKDIR /root
3638
RUN git clone https://github.com/asdf-vm/asdf.git ~/.asdf --branch v0.13.1 && \

layer/nrlf/core/validators.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ def validate_type(type_: Optional[RequestQueryType], pointer_types: List[str]) -
4141
return type_.root in pointer_types
4242

4343

44-
# TODO - Validate category is in set permissions once permissioning by category is done.
4544
def validate_category(categories: Optional[RequestQueryCategory]) -> bool:
4645
"""
4746
Validates if the given category is valid.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ env = [
104104
"AUTH_STORE=auth-store",
105105
"TABLE_NAME=unit-test-document-pointer"
106106
]
107-
pythonpath = [".", "./scripts"]
107+
pythonpath = [".", "./scripts", "./terraform/account-wide-infrastructure/modules/glue/src"]
108108

109109
[tool.datamodel-codegen]
110110
target-python-version = "3.12"

terraform/account-wide-infrastructure/dev/aws-backup.tf

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,22 @@
11

2-
resource "aws_s3_bucket" "backup_reports" {
2+
resource "aws_s3_bucket" "backup_reports" { # NOSONAR (S6258) - Logging not required for this bucket
33
bucket_prefix = "${local.prefix}-backup-reports"
44
}
55

6-
resource "aws_s3_bucket_public_access_block" "backup_reports" {
7-
bucket = aws_s3_bucket.backup_reports.id
8-
9-
block_public_acls = true
10-
block_public_policy = true
11-
ignore_public_acls = true
12-
restrict_public_buckets = true
13-
}
14-
15-
resource "aws_s3_bucket_server_side_encryption_configuration" "backup_reports" {
16-
bucket = aws_s3_bucket.backup_reports.bucket
17-
18-
rule {
19-
apply_server_side_encryption_by_default {
20-
sse_algorithm = "AES256"
21-
}
22-
}
23-
}
24-
25-
resource "aws_s3_bucket_policy" "backup_reports_bucket_policy" {
6+
resource "aws_s3_bucket_policy" "backup_reports_https_only" {
267
bucket = aws_s3_bucket.backup_reports.id
278

289
policy = jsonencode({
2910
Version = "2012-10-17"
30-
Id = "backup_reports_bucket_policy"
11+
Id = "backup_reports_https_only_policy"
3112
Statement = [
3213
{
33-
Sid = "HTTPSOnly"
34-
Effect = "Deny"
35-
Principal = "*"
36-
Action = "s3:*"
14+
Sid = "HTTPSOnly"
15+
Effect = "Deny"
16+
Principal = {
17+
"AWS" : "*"
18+
}
19+
Action = "s3:*"
3720
Resource = [
3821
aws_s3_bucket.backup_reports.arn,
3922
"${aws_s3_bucket.backup_reports.arn}/*",
@@ -43,7 +26,18 @@ resource "aws_s3_bucket_policy" "backup_reports_bucket_policy" {
4326
"aws:SecureTransport" = "false"
4427
}
4528
}
46-
},
29+
}
30+
]
31+
})
32+
}
33+
34+
resource "aws_s3_bucket_policy" "backup_reports_write_access" {
35+
bucket = aws_s3_bucket.backup_reports.id
36+
37+
policy = jsonencode({
38+
Version = "2012-10-17"
39+
Id = "backup_reports_write_access_policy"
40+
Statement = [
4741
{
4842
Sid = "AllowBackupReportsWrite"
4943
Effect = "Allow"
@@ -64,6 +58,24 @@ resource "aws_s3_bucket_policy" "backup_reports_bucket_policy" {
6458
})
6559
}
6660

61+
resource "aws_s3_bucket_public_access_block" "backup_reports" {
62+
bucket = aws_s3_bucket.backup_reports.id
63+
64+
block_public_acls = true
65+
block_public_policy = true
66+
ignore_public_acls = true
67+
restrict_public_buckets = true
68+
}
69+
70+
resource "aws_s3_bucket_server_side_encryption_configuration" "backup_reports" {
71+
bucket = aws_s3_bucket.backup_reports.bucket
72+
73+
rule {
74+
apply_server_side_encryption_by_default {
75+
sse_algorithm = "AES256"
76+
}
77+
}
78+
}
6779

6880
resource "aws_s3_bucket_ownership_controls" "backup_reports" {
6981
bucket = aws_s3_bucket.backup_reports.id

terraform/account-wide-infrastructure/mgmt/s3.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
resource "aws_s3_bucket" "ci_data" {
1+
resource "aws_s3_bucket" "ci_data" { # NOSONAR (S6258) - Logging not required for this bucket
22
bucket = "${local.prefix}--ci-data"
33
}
44

terraform/account-wide-infrastructure/modules/athena/s3.tf

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
resource "aws_s3_bucket" "athena" {
1+
resource "aws_s3_bucket" "athena" { # NOSONAR (S6258) - Logging not required for this bucket
22
bucket = "${var.name_prefix}-athena"
33
}
44

5-
resource "aws_s3_bucket_policy" "athena" {
6-
bucket = "${var.name_prefix}-athena"
5+
resource "aws_s3_bucket_policy" "athena-https-only" {
6+
bucket = aws_s3_bucket.athena.id
77

88
policy = jsonencode({
99
Version = "2012-10-17"
10-
Id = "athena-policy"
10+
Id = "athena-https-only-policy"
1111
Statement = [
1212
{
1313
Sid = "HTTPSOnly"
@@ -25,7 +25,18 @@ resource "aws_s3_bucket_policy" "athena" {
2525
"aws:SecureTransport" = "false"
2626
}
2727
}
28-
},
28+
}
29+
]
30+
})
31+
}
32+
33+
resource "aws_s3_bucket_policy" "athena-access" {
34+
bucket = aws_s3_bucket.athena.id
35+
36+
policy = jsonencode({
37+
Version = "2012-10-17"
38+
Id = "athena-access-policy"
39+
Statement = [
2940
{
3041
Sid : "AllowAthenaAccess",
3142
Effect : "Allow",

terraform/account-wide-infrastructure/modules/glue/s3.tf

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# S3 Bucket for Raw Data
2-
resource "aws_s3_bucket" "source-data-bucket" {
2+
resource "aws_s3_bucket" "source-data-bucket" { # NOSONAR (S6258) - Logging not required for this bucket
33
bucket = "${var.name_prefix}-source-data-bucket"
44
}
55

66
resource "aws_s3_bucket_policy" "source-data-bucket" {
7-
bucket = "${var.name_prefix}-source-data-bucket"
7+
bucket = aws_s3_bucket.source-data-bucket.id
88

99
policy = jsonencode({
1010
Version = "2012-10-17"
@@ -68,18 +68,18 @@ resource "aws_s3_bucket_lifecycle_configuration" "source-data-bucket-lifecycle"
6868
resource "aws_s3_bucket_versioning" "source-data-bucket-versioning" {
6969
bucket = aws_s3_bucket.source-data-bucket.id
7070
versioning_configuration {
71-
status = "Disabled"
71+
status = "Disabled" # NOSONAR (S6252) - Versioning is not required for this bucket
7272
}
7373
}
7474

7575

7676
# S3 Bucket for Processed Data
77-
resource "aws_s3_bucket" "target-data-bucket" {
77+
resource "aws_s3_bucket" "target-data-bucket" { # NOSONAR (S6258) - Logging not required for this bucket
7878
bucket = "${var.name_prefix}-target-data-bucket"
7979
}
8080

8181
resource "aws_s3_bucket_policy" "target-data-bucket" {
82-
bucket = "${var.name_prefix}-target-data-bucket"
82+
bucket = aws_s3_bucket.target-data-bucket.id
8383

8484
policy = jsonencode({
8585
Version = "2012-10-17"
@@ -127,12 +127,12 @@ resource "aws_s3_bucket_public_access_block" "target-data-bucket-public-access-b
127127
}
128128

129129
# S3 Bucket for Code
130-
resource "aws_s3_bucket" "code-bucket" {
130+
resource "aws_s3_bucket" "code-bucket" { # NOSONAR (S6258) - Logging not required for this bucket
131131
bucket = "${var.name_prefix}-code-bucket"
132132
}
133133

134134
resource "aws_s3_bucket_policy" "code-bucket" {
135-
bucket = "${var.name_prefix}-code-bucket"
135+
bucket = aws_s3_bucket.code-bucket.id
136136

137137
policy = jsonencode({
138138
Version = "2012-10-17"

terraform/account-wide-infrastructure/modules/glue/src/pipeline.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import os
12
import time
23

34
import boto3
45

6+
AWS_REGION = os.getenv("AWS_REGION", "eu-west-2")
7+
58

69
class LogPipeline:
710
def __init__(
@@ -13,8 +16,8 @@ def __init__(
1316
target_path,
1417
host_prefixes,
1518
job_name,
16-
partition_cols=[],
17-
transformations=[],
19+
partition_cols=None,
20+
transformations=None,
1821
):
1922
"""Initialize Glue context, Spark session, logger, and paths"""
2023
self.glue_context = glue_context
@@ -23,12 +26,12 @@ def __init__(
2326
self.source_path = source_path
2427
self.target_path = target_path
2528
self.host_prefixes = host_prefixes
26-
self.partition_cols = partition_cols
27-
self.transformations = transformations
29+
self.partition_cols = partition_cols if partition_cols else []
30+
self.transformations = transformations if transformations else []
2831
self.glue = boto3.client(
2932
service_name="glue",
30-
region_name="eu-west-2",
31-
endpoint_url="https://glue.eu-west-2.amazonaws.com",
33+
region_name=AWS_REGION,
34+
endpoint_url=f"https://glue.{AWS_REGION}.amazonaws.com",
3235
)
3336
self.job_name = job_name
3437
self.name_prefix = "-".join(job_name.split("-")[:4])
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from moto import mock_aws
2+
from pipeline import LogPipeline
3+
4+
5+
@mock_aws
6+
def test_pipeline_init_defaults():
7+
glue_context = "mock_glue_context"
8+
spark = "mock_spark_session"
9+
logger = "mock_logger"
10+
source_path = "s3://mock-source-path"
11+
target_path = "s3://mock-target-path"
12+
host_prefixes = ["host1", "host2"]
13+
job_name = "test-job-name"
14+
15+
pipeline = LogPipeline(
16+
glue_context, spark, logger, source_path, target_path, host_prefixes, job_name
17+
)
18+
19+
assert pipeline.glue_context == glue_context
20+
assert pipeline.spark == spark
21+
assert pipeline.logger == logger
22+
assert pipeline.source_path == source_path
23+
assert pipeline.target_path == target_path
24+
assert pipeline.host_prefixes == host_prefixes
25+
assert pipeline.job_name == job_name
26+
assert pipeline.name_prefix == "test-job-name"
27+
assert pipeline.partition_cols == []
28+
assert pipeline.transformations == []
29+
30+
31+
@mock_aws
32+
def test_pipeline_init_with_custom_values():
33+
glue_context = "mock_glue_context"
34+
spark = "mock_spark_session"
35+
logger = "mock_logger"
36+
source_path = "s3://mock-source-path"
37+
target_path = "s3://mock-target-path"
38+
host_prefixes = ["host1", "host2"]
39+
job_name = "test-job-name"
40+
partition_cols = ["col1", "col2"]
41+
transformations = ["transformation1", "transformation2"]
42+
43+
pipeline = LogPipeline(
44+
glue_context,
45+
spark,
46+
logger,
47+
source_path,
48+
target_path,
49+
host_prefixes,
50+
job_name,
51+
partition_cols=partition_cols,
52+
transformations=transformations,
53+
)
54+
55+
assert pipeline.partition_cols == partition_cols
56+
assert pipeline.transformations == transformations

0 commit comments

Comments
 (0)