Skip to content

Commit c8e0b75

Browse files
Merge pull request #1122 from NHSDigital/NRL-1875-seed-script-make-better-output
NRL-1875 Make output better for tests & seed script
2 parents f955882 + 09bfba3 commit c8e0b75

File tree

12 files changed

+97
-66
lines changed

12 files changed

+97
-66
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ dist
7777
allure-results/*
7878
allure-report/*
7979

80-
# Performance test ref data
80+
# Performance test ref data & output
8181
tests/performance/reference-data.json
8282
tests/performance/producer/expanded_pointer_distributions.json
83+
producer-internal-*.json
84+
producer-public-*.json
85+
consumer-internal-*.json
86+
consumer-public-*.json

Makefile

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ perftest-prepare: ## Prepare input files for producer & consumer perf tests
314314

315315
perftest-producer-internal: ## Run producer perf tests
316316
@echo "Running producer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
317-
k6 run tests/performance/producer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
317+
k6 run tests/performance/producer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/producer-internal-$$(date +%Y%m%d%H%M%S).json -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
318318

319319
perftest-producer-public: check-warn ## Run the producer perftests for the external access points
320320
@echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)"
@@ -329,12 +329,12 @@ perftest-producer-public: check-warn ## Run the producer perftests for the exter
329329
TEST_CONNECT_MODE=public \
330330
TEST_PUBLIC_BASE_URL=$$PUBLIC_BASE_URL \
331331
TEST_CONFIG_FILE=$$CONFIG_FILE \
332-
k6 run tests/performance/producer/perftest.js -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
332+
k6 run tests/performance/producer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/producer-public-$$(date +%Y%m%d%H%M%S).json -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
333333
kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT))
334334

335335
perftest-consumer-internal:
336336
@echo "Running consumer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
337-
k6 run tests/performance/consumer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
337+
k6 run tests/performance/consumer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/consumer-internal-$$(date +%Y%m%d%H%M%S).json -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
338338

339339
perftest-consumer-public: check-warn ## Run the consumer perftests for the external access points
340340
@echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)"
@@ -349,22 +349,14 @@ perftest-consumer-public: check-warn ## Run the consumer perftests for the exter
349349
TEST_CONNECT_MODE=public \
350350
TEST_PUBLIC_BASE_URL=$$PUBLIC_BASE_URL \
351351
TEST_CONFIG_FILE=$$CONFIG_FILE \
352-
k6 run tests/performance/consumer/perftest.js -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
352+
k6 run tests/performance/consumer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/consumer-public-$$(date +%Y%m%d%H%M%S).json -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
353353
kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT))
354354

355355
perftest-generate-pointer-table-extract:
356-
@echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
356+
@echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and ENV=$(ENV) and DIST_PATH=$(DIST_PATH)"
357357
rm -rf "${DIST_PATH}/nft"
358358
mkdir -p "${DIST_PATH}/nft"
359-
PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft"
359+
PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft" --extract-size=2000000
360360
./scripts/get-current-info.sh > "${DIST_PATH}/nft/info.json"
361361
zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft"
362362
aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip"
363-
364-
perftest-run-token-refresher:
365-
@echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)"
366-
ENV=$(ENV) TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT) PYTHONPATH=. poetry run python ./tests/performance/token_refresher.py &
367-
trap "kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT)) 2>/dev/null" EXIT
368-
369-
make perftest-consumer-public
370-
kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT))

scripts/seed_nft_tables.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,21 @@ def _write_pointer_extract_to_file(table_name, pointer_data):
102102
create_extract_metadata_file(table_name, nft_dist_path)
103103

104104

105+
# To avoid sonarcube maintainability warning
106+
def get_pointer_processor(unprocessed_items):
107+
def pointer_is_processed(pointer):
108+
pointer_id = pointer[0]
109+
matches = [
110+
unprocessed_item
111+
for unprocessed_item in unprocessed_items
112+
if unprocessed_item["PutRequest"]["Item"].get("id") == pointer_id
113+
]
114+
115+
return len(matches) == 0
116+
117+
return pointer_is_processed
118+
119+
105120
def _populate_seed_table(
106121
table_name: str,
107122
patients_with_pointers: int,
@@ -143,6 +158,7 @@ def _populate_seed_table(
143158
unprocessed_count = 0
144159

145160
pointer_data: list[list[str]] = []
161+
batch_pointer_data: list[list[str]] = []
146162

147163
start_time = datetime.now(tz=timezone.utc)
148164
batch_upsert_items: list[dict[str, Any]] = []
@@ -158,11 +174,20 @@ def _populate_seed_table(
158174
RequestItems={table_name: batch_upsert_items}
159175
)
160176

177+
processed_pointers = batch_pointer_data
178+
161179
if response.get("UnprocessedItems"):
162-
unprocessed_count += len(
163-
response.get("UnprocessedItems").get(table_name, [])
180+
unprocessed_items = response.get("UnprocessedItems").get(table_name, [])
181+
unprocessed_count += len(unprocessed_items)
182+
pointer_is_processed = get_pointer_processor(unprocessed_items)
183+
184+
processed_pointers = list(
185+
filter(pointer_is_processed, batch_pointer_data)
164186
)
165187

188+
pointer_data.extend(processed_pointers)
189+
190+
batch_pointer_data = []
166191
batch_upsert_items = []
167192
batch_counter = 0
168193

@@ -178,7 +203,7 @@ def _populate_seed_table(
178203
)
179204
put_req = {"PutRequest": {"Item": pointer.model_dump()}}
180205
batch_upsert_items.append(put_req)
181-
pointer_data.append(
206+
batch_pointer_data.append(
182207
[
183208
pointer.id,
184209
new_type, # not full type url

terraform/bastion/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Before deploying a bastion, you will need:
1212

1313
## Deploying a bastion
1414

15-
The bastions are emphemeral resources that should be deploy when you need them.
15+
The bastions are ephemeral resources that should be deployed when you need them.
1616

1717
To deploy a bastion, you will first need to login to the AWS mgmt account on the CLI.
1818

@@ -46,7 +46,6 @@ terraform apply ./bastion.tfplan
4646
Once the bastion is deployed, you can connect to it via SSH with:
4747

4848
```sh
49-
assume nhsd-nrlf-test
5049
make ssh-connection ENV={env}
5150
```
5251

terraform/infrastructure/etc/perftest.tfvars

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
account_name = "perftest"
22
aws_account_name = "test"
33

4-
dynamodb_pointers_table_name = "nhsd-nrlf--perftest-baseline-pointers-table"
4+
dynamodb_pointers_table_name = "nhsd-nrlf--perftest-15m-pointers-table"
55

66
domain = "perftest.record-locator.national.nhs.uk"
77
public_domain = "perftest.api.service.nhs.uk"

tests/performance/README.md

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ We have performance tests which give us a benchmark of how NRLF performs under l
66

77
### Prep the environment
88

9-
Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-1million (todo: update with real names!).
9+
Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-15m vs perftest-55m
1010

1111
#### Pull certs for perftest
1212

@@ -17,45 +17,44 @@ make truststore-pull-all ENV=perftest
1717

1818
#### Point perftest at a different pointers table
1919

20-
We (will) have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers.
21-
22-
In order to run performance tests to get figures for these different states, we can point the perftest environment at one of these tables.
23-
24-
Currently, this requires tearing down the existing environment and restoring from scratch:
25-
26-
1. Follow instructions in terraform/infrastructure/readme.md to tear down the perf test environment.
27-
- Do **not** tear down shared account-wide infrastructure
28-
2. Update `perftest-pointers-table.name_prefix` in `terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf` to be the table name you want, minus "-pointers-table"
29-
- e.g. to use the baseline table `nhsd-nrlf--perftest-baseline-pointers-table`, set `name_prefix = "nhsd-nrlf--perftest-baseline"`
30-
3. Update `dynamodb_pointers_table_prefix` in `terraform/infrastructure/etc/perftest.tfvars` same as above.
31-
- e.g. to use the baseline table `dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline"`
32-
4. Commit changes to a branch & push
33-
5. Run the [Deploy Account-wide infrastructure](https://github.com/NHSDigital/NRLF/actions/workflows/deploy-account-wide-infra.yml) workflow against your branch & `account-test`.
34-
- If you get a terraform failure like "tried to create table but it already exists", you will need to do some fanangaling:
35-
1. make sure there is a backup of your chosen table or create one if not. In the AWS console: dynamodb > tables > your perftest table > backups > create backup > Create on-demand backup > leave all settings as defaults > create backup. This might take up to an hour to complete.
36-
2. once backed up, delete your table. In the AWS console: dynamodb > tables > your perftest table > actions > delete table
37-
3. Rerun the Deploy Account-wide infrastructure action.
38-
4. Terraform will create an empty table with the correct name & (most importantly!) read/write IAM policies.
39-
5. Delete the empty table created by terraform and restore from the backup, specifying the same table name you've defined in code & selecting the matching customer managed encryption key.
40-
6. Run the [Persistent Environment Deploy](https://github.com/NHSDigital/NRLF/actions/workflows/persistent-environment.yml) workflow against your branch & `perftest` to restore the environment with lambdas pointed at your chosen table.
41-
7. You can check this has been successful by checking the table name in the lambdas.
42-
- In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table`
20+
We have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers.
21+
22+
In order to run performance tests to get figures for these different volumes, we can point the perftest environment at one of these tables.
23+
24+
To do this, we change an environment variable which defines which table our lambdas talk to and deploy changes.
25+
26+
1. Update `dynamodb_pointers_table_name` to be the desired table name in [terraform/infrastructure/etc/perftest.tfvars](terraform/infrastructure/etc/perftest.tfvars) e.g.
27+
28+
```sh
29+
dynamodb_pointers_table_name = "nhsd-nrlf--perftest-baseline-pointers-table"
30+
```
4331

44-
If you've followed these steps, you will also need to [generate permissions](#generate-permissions) as the organisation permissions will have been lost when the environment was torn down.
32+
2. To avoid erasing the test permissions when you deploy these changes, make sure to run through the steps to [generate permissions](#generate-permissions)
33+
3. Apply your changes
34+
35+
```sh
36+
cd ./terraform/infrastructure
37+
make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest
38+
make ENV=perftest USE_SHARED_RESOURCES=true apply
39+
```
40+
41+
4. You can verify this has been successful by checking the table name in the lambdas.
42+
- In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table`
4543

4644
#### Generate permissions
4745

4846
You will need to generate pointer permissions the first time performance tests are run in an environment e.g. if the perftest environment is destroyed & recreated.
4947

5048
```sh
49+
assume nhsd-nrlf-mgmt
50+
5151
# In project root
5252
make perftest-generate-permissions # makes a bunch of json permission files for test organisations
5353
make get-s3-perms ENV=perftest # will take all permissions & create nrlf_permissions.zip file
5454
make build
5555

5656
# apply this new permissions zip file to your environment
5757
cd ./terraform/infrastructure
58-
assume nhsd-nrlf-mgmt
5958
make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest
6059
make ENV=perftest USE_SHARED_RESOURCES=true apply
6160
```
@@ -111,9 +110,11 @@ Regenerates the input files from the current state of a given perftest table & u
111110
112111
```sh
113112
make perftest-generate-pointer-table-extract \
114-
PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table
113+
PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table ENV=perftest
115114
```
116115

116+
This will generate a csv extract of the given pointer table containing a row per pointer. To run the perf tests, you will need an extract larger than the number of test iterations. The default extract size is 2 million - this can be changed in the make file command by updating the value of`--extract-size`. Too big and the test runners will take a long time to load the file.
117+
117118
## Assumptions / Caveats
118119

119120
- Run performance tests in the perftest environment only\*

tests/performance/constants.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ export const POINTERS_TO_DELETE = ALL_POINTER_IDS.slice(0, 3500);
1515
export const POINTER_IDS = ALL_POINTER_IDS.slice(3500);
1616
export const NHS_NUMBERS = REFERENCE_DATA["nhs_numbers"];
1717

18-
// filter only 736253001, 736253002, 1363501000000100, 861421000000109, 749001000000101 for now
18+
// filter only 736253002, 1363501000000100, 861421000000109, 749001000000101 for now
1919
export const FILTERED_POINTER_TYPES = [
20-
// "736253001",
2120
"736253002",
2221
"1363501000000100",
2322
"861421000000109",

tests/performance/consumer/perftest.config.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,43 @@
66
},
77
"scenarios": {
88
"countDocumentReference": {
9-
"tps": 5,
9+
"tps": 1,
1010
"duration": "5m",
1111
"hold": "30m",
1212
"rampDown": "1m"
1313
},
1414
"countPostDocumentReference": {
15-
"tps": 5,
15+
"tps": 1,
1616
"duration": "5m",
1717
"hold": "30m",
1818
"rampDown": "1m"
1919
},
2020
"readDocumentReference": {
21-
"tps": 5,
21+
"tps": 1,
2222
"duration": "5m",
2323
"hold": "30m",
2424
"rampDown": "1m"
2525
},
2626
"searchDocumentReference": {
27-
"tps": 5,
27+
"tps": 1,
2828
"duration": "5m",
2929
"hold": "30m",
3030
"rampDown": "1m"
3131
},
3232
"searchDocumentReferenceByCategory": {
33-
"tps": 5,
33+
"tps": 1,
3434
"duration": "5m",
3535
"hold": "30m",
3636
"rampDown": "1m"
3737
},
3838
"searchPostDocumentReference": {
39-
"tps": 5,
39+
"tps": 1,
4040
"duration": "5m",
4141
"hold": "30m",
4242
"rampDown": "1m"
4343
},
4444
"searchPostDocumentReferenceByCategory": {
45-
"tps": 5,
45+
"tps": 1,
4646
"duration": "5m",
4747
"hold": "30m",
4848
"rampDown": "1m"

tests/performance/perftest_environment.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def __next__(self):
6666

6767

6868
def generate_pointer_table_extract(
69+
extract_size=2000000, # must be a multiple of buffer_size (1 million)
6970
output_dir=".",
7071
):
7172
"""
@@ -79,6 +80,7 @@ def generate_pointer_table_extract(
7980
start_key = None
8081
buffer = []
8182
buffer_size = 1_000_000 # 10k rows needs ~3MB of RAM, so 1M rows needs ~300MB
83+
buffers_written = 0
8284

8385
with open(out, "w", newline="") as csv_file:
8486
writer = csv.writer(csv_file)
@@ -109,12 +111,20 @@ def generate_pointer_table_extract(
109111
if len(buffer) >= buffer_size:
110112
print("Writing buffer to CSV...") # noqa: T201
111113
writer.writerows(buffer)
114+
buffers_written += 1
112115
buffer.clear()
113116
start_key = response.get("LastEvaluatedKey", None)
114-
done = start_key is None
117+
118+
no_more_to_read = start_key is None
119+
reached_desired_extract_size = (
120+
buffers_written * buffer_size
121+
) >= extract_size
122+
123+
done = no_more_to_read or reached_desired_extract_size
115124
# Write any remaining rows in buffer
116125
if buffer:
117126
writer.writerows(buffer)
127+
buffers_written += 1
118128
print(f"Pointer extract CSV data written to {out}") # noqa: T201
119129

120130
create_extract_metadata_file(table_name, output_dir)

tests/performance/producer/client_perftest.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import http from "k6/http";
22
import { ODS_CODE } from "../constants.js";
33
import { check } from "k6";
44
import { randomItem } from "https://jslib.k6.io/k6-utils/1.2.0/index.js";
5-
import { crypto } from "k6/experimental/webcrypto";
65
import { createRecord } from "../setup.js";
76
import { getHeaders, getFullUrl } from "../test-config.js";
87
import exec from "k6/execution";

0 commit comments

Comments
 (0)