Skip to content

Commit d12acbc

Browse files
NRL-2004 Migrate v1 permissions script (#1176)
1 parent 9e90f3f commit d12acbc

File tree

2 files changed

+255
-0
lines changed

2 files changed

+255
-0
lines changed

scripts/migrate_v1_perms_by_app.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Reads JSON files from a given source folder in the environment's S3 authorization
4+
bucket, transforms each from a flat array into {"types": [...]} format, and
5+
writes the results to both the consumer and producer folders in the same bucket
6+
under a sub-folder matching the source folder name.
7+
8+
Usage:
9+
python scripts/migrate_v1_perms_by_app.py <env> <folder>
10+
11+
Arguments:
12+
env - NRLF environment name (e.g. dev, qa, int, prod)
13+
folder - Source folder name within the authorization bucket
14+
(e.g. an app identifier)
15+
16+
Example:
17+
python scripts/migrate_v1_perms_by_app.py dev my-app-folder
18+
19+
The script reads from:
20+
s3://nhsd-nrlf--<env>-authorization-store/<folder>/*.json
21+
22+
And writes to:
23+
s3://nhsd-nrlf--<env>-authorization-store/consumer/<folder>/<filename>.json
24+
s3://nhsd-nrlf--<env>-authorization-store/producer/<folder>/<filename>.json
25+
26+
The bucket name defaults to nhsd-nrlf--<env>-authorization-store and can be
27+
overridden via the NRL_AUTH_BUCKET_NAME environment variable.
28+
"""
29+
30+
import json
31+
import os
32+
import sys
33+
34+
from aws_session_assume import get_boto_session
35+
from botocore.exceptions import ClientError
36+
37+
CONSUMER_OR_PRODUCER = ("consumer", "producer")
38+
39+
40+
def _get_bucket_name(env: str) -> str:
41+
return os.getenv("NRL_AUTH_BUCKET_NAME", f"nhsd-nrlf--{env}-authorization-store")
42+
43+
44+
def _get_s3_client(env: str):
45+
return get_boto_session(env).client("s3")
46+
47+
48+
def _list_json_files(s3, bucket: str, folder: str) -> list[str]:
49+
paginator = s3.get_paginator("list_objects_v2")
50+
return sorted(
51+
item["Key"]
52+
for page in paginator.paginate(Bucket=bucket, Prefix=f"{folder}/")
53+
for item in page.get("Contents", [])
54+
if item["Key"].endswith(".json")
55+
)
56+
57+
58+
def _read_and_transform(s3, bucket: str, file_path: str) -> tuple[str, int]:
59+
try:
60+
response = s3.get_object(Bucket=bucket, Key=file_path)
61+
except ClientError as e:
62+
raise RuntimeError(
63+
f"Failed to read s3://{bucket}/{file_path}: {e.response['Error']['Message']}"
64+
) from e
65+
data = json.loads(response["Body"].read())
66+
67+
if not isinstance(data, list):
68+
raise ValueError(
69+
f"{file_path}: Expected a JSON array, got {type(data).__name__}"
70+
)
71+
72+
return json.dumps({"types": data}, indent=2), len(data)
73+
74+
75+
def _write_v2_consumer_and_producer_files(
76+
s3, bucket: str, file_path: str, body: str, entry_count: int
77+
) -> None:
78+
for actor_type in CONSUMER_OR_PRODUCER:
79+
dest_filepath = f"{actor_type}/{file_path}"
80+
try:
81+
s3.put_object(Bucket=bucket, Key=dest_filepath, Body=body)
82+
except ClientError as e:
83+
raise RuntimeError(
84+
f"Failed to write s3://{bucket}/{dest_filepath}: {e.response['Error']['Message']}"
85+
) from e
86+
print(f" Written {entry_count} entries → s3://{bucket}/{dest_filepath}")
87+
88+
89+
def migrate_v1_perms_by_app(env: str, app_id_folder: str) -> None:
90+
bucket = _get_bucket_name(env)
91+
s3 = _get_s3_client(env)
92+
93+
print(f"Source bucket : {bucket}")
94+
print(f"Source folder : {app_id_folder}/")
95+
96+
json_file_paths = _list_json_files(s3, bucket, app_id_folder)
97+
if not json_file_paths:
98+
print(f"No JSON files found under s3://{bucket}/{app_id_folder}/")
99+
return
100+
print(f"Found {len(json_file_paths)} JSON files in s3://{bucket}/{app_id_folder}/:")
101+
102+
for file_path in json_file_paths:
103+
body, entry_count = _read_and_transform(s3, bucket, file_path)
104+
print(f" Transforming {file_path}{entry_count} entries")
105+
106+
_write_v2_consumer_and_producer_files(s3, bucket, file_path, body, entry_count)
107+
108+
109+
if __name__ == "__main__":
110+
if len(sys.argv) != 3:
111+
print(f"Usage: {sys.argv[0]} <env> <folder>")
112+
sys.exit(1)
113+
114+
migrate_v1_perms_by_app(sys.argv[1], sys.argv[2])
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import io
2+
import json
3+
from unittest.mock import MagicMock, patch
4+
5+
import pytest
6+
from botocore.exceptions import ClientError
7+
from migrate_v1_perms_by_app import (
8+
CONSUMER_OR_PRODUCER,
9+
_read_and_transform,
10+
migrate_v1_perms_by_app,
11+
)
12+
13+
ENV = "dev"
14+
FOLDER = "my-app-folder"
15+
BUCKET = f"nhsd-nrlf--{ENV}-authorization-store"
16+
17+
SAMPLE_V1_PERMS = [
18+
"http://snomed.info/sct|736253002",
19+
"http://snomed.info/sct|1363501000000100",
20+
"http://snomed.info/sct|736366004",
21+
]
22+
23+
# ---------------------------------------------------------------------------
24+
# Helper functions
25+
# ---------------------------------------------------------------------------
26+
27+
28+
def _make_client_error(code: str, message: str) -> ClientError:
29+
return ClientError(
30+
{"Error": {"Code": code, "Message": message}},
31+
operation_name="S3Operation",
32+
)
33+
34+
35+
def _mock_s3_client_with_response(data_to_return: bytes) -> MagicMock:
36+
s3 = MagicMock()
37+
s3.get_object.return_value = {"Body": io.BytesIO(data_to_return)}
38+
return s3
39+
40+
41+
# ---------------------------------------------------------------------------
42+
# Unit tests for _read_and_transform
43+
# ---------------------------------------------------------------------------
44+
45+
FILE_PATH = f"{FOLDER}/perms.json"
46+
47+
48+
def test_read_and_transform_returns_wrapped_json_and_count():
49+
s3 = _mock_s3_client_with_response(json.dumps(SAMPLE_V1_PERMS).encode())
50+
51+
body, count = _read_and_transform(s3, BUCKET, FILE_PATH)
52+
53+
assert count == len(SAMPLE_V1_PERMS)
54+
assert json.loads(body) == {"types": SAMPLE_V1_PERMS}
55+
s3.get_object.assert_called_once_with(Bucket=BUCKET, Key=FILE_PATH)
56+
57+
58+
def test_read_and_transform_empty_list():
59+
s3 = _mock_s3_client_with_response(b"[]")
60+
61+
body, count = _read_and_transform(s3, BUCKET, FILE_PATH)
62+
63+
assert count == 0
64+
assert json.loads(body) == {"types": []}
65+
66+
67+
def test_read_and_transform_raises_value_error_for_non_list():
68+
s3 = _mock_s3_client_with_response(b'{"key": "value"}')
69+
70+
with pytest.raises(ValueError, match="Expected a JSON array, got dict"):
71+
_read_and_transform(s3, BUCKET, FILE_PATH)
72+
73+
74+
def test_read_and_transform_raises_runtime_error_on_client_error():
75+
s3 = MagicMock()
76+
s3.get_object.side_effect = _make_client_error(
77+
"NoSuchKey", "The specified key does not exist"
78+
)
79+
80+
with pytest.raises(
81+
RuntimeError,
82+
match=f"Failed to read s3://{BUCKET}/{FILE_PATH}.*The specified key does not exist",
83+
):
84+
_read_and_transform(s3, BUCKET, FILE_PATH)
85+
86+
87+
# ---------------------------------------------------------------------------
88+
# Unit tests for migrate_v1_perms_by_app
89+
# ---------------------------------------------------------------------------
90+
91+
MODULE = "migrate_v1_perms_by_app"
92+
93+
TRANSFORMED_BODY = '{"types": ["http://snomed.info/sct|736253002"]}'
94+
ENTRY_COUNT = 1
95+
96+
97+
@patch(f"{MODULE}._write_v2_consumer_and_producer_files")
98+
@patch(f"{MODULE}._read_and_transform")
99+
@patch(f"{MODULE}._list_json_files")
100+
@patch(f"{MODULE}._get_s3_client")
101+
@patch(f"{MODULE}._get_bucket_name")
102+
def test_migrate_processes_each_file(
103+
mock_bucket, mock_s3, mock_list, mock_transform, mock_write
104+
):
105+
mock_bucket.return_value = BUCKET
106+
s3 = MagicMock()
107+
mock_s3.return_value = s3
108+
mock_list.return_value = [f"{FOLDER}/a.json", f"{FOLDER}/b.json"]
109+
mock_transform.return_value = (TRANSFORMED_BODY, ENTRY_COUNT)
110+
111+
migrate_v1_perms_by_app(ENV, FOLDER)
112+
113+
mock_bucket.assert_called_once_with(ENV)
114+
mock_s3.assert_called_once_with(ENV)
115+
mock_list.assert_called_once_with(s3, BUCKET, FOLDER)
116+
assert mock_transform.call_count == 2
117+
assert mock_write.call_count == 2
118+
mock_write.assert_any_call(
119+
s3, BUCKET, f"{FOLDER}/a.json", TRANSFORMED_BODY, ENTRY_COUNT
120+
)
121+
mock_write.assert_any_call(
122+
s3, BUCKET, f"{FOLDER}/b.json", TRANSFORMED_BODY, ENTRY_COUNT
123+
)
124+
125+
126+
@patch(f"{MODULE}._write_v2_consumer_and_producer_files")
127+
@patch(f"{MODULE}._read_and_transform")
128+
@patch(f"{MODULE}._list_json_files")
129+
@patch(f"{MODULE}._get_s3_client")
130+
@patch(f"{MODULE}._get_bucket_name")
131+
def test_migrate_no_files_skips_transform_and_write(
132+
mock_bucket, mock_s3, mock_list, mock_transform, mock_write
133+
):
134+
mock_bucket.return_value = BUCKET
135+
mock_s3.return_value = MagicMock()
136+
mock_list.return_value = []
137+
138+
migrate_v1_perms_by_app(ENV, FOLDER)
139+
140+
mock_transform.assert_not_called()
141+
mock_write.assert_not_called()

0 commit comments

Comments
 (0)