Merge pull request #1122 from NHSDigital/NRL-1875-seed-script-make-better-output

anjalitrace2-nhs · web-flow · commit c8e0b75f75f5 · 2026-01-27T15:13:28.000Z
NRL-1875 Make output better for tests & seed script
diff --git a/.gitignore b/.gitignore
@@ -77,6 +77,10 @@ dist
 allure-results/*
 allure-report/*
 
-# Performance test ref data
+# Performance test ref data & output
 tests/performance/reference-data.json
 tests/performance/producer/expanded_pointer_distributions.json
+producer-internal-*.json
+producer-public-*.json
+consumer-internal-*.json
+consumer-public-*.json
diff --git a/Makefile b/Makefile
@@ -314,7 +314,7 @@ perftest-prepare:	## Prepare input files for producer & consumer perf tests
 
 perftest-producer-internal:	## Run producer perf tests
 	@echo "Running producer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
-	k6 run tests/performance/producer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
+	k6 run tests/performance/producer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/producer-internal-$$(date +%Y%m%d%H%M%S).json -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
 
 perftest-producer-public: check-warn ## Run the producer perftests for the external access points
 	@echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)"
@@ -329,12 +329,12 @@ perftest-producer-public: check-warn ## Run the producer perftests for the exter
 	TEST_CONNECT_MODE=public \
 	TEST_PUBLIC_BASE_URL=$$PUBLIC_BASE_URL \
 	TEST_CONFIG_FILE=$$CONFIG_FILE \
-		k6 run tests/performance/producer/perftest.js -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
+		k6 run tests/performance/producer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/producer-public-$$(date +%Y%m%d%H%M%S).json -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
 	kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT))
 
 perftest-consumer-internal:
 	@echo "Running consumer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
-	k6 run tests/performance/consumer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
+	k6 run tests/performance/consumer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/consumer-internal-$$(date +%Y%m%d%H%M%S).json -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
 
 perftest-consumer-public: check-warn ## Run the consumer perftests for the external access points
 	@echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)"
@@ -349,22 +349,14 @@ perftest-consumer-public: check-warn ## Run the consumer perftests for the exter
 	TEST_CONNECT_MODE=public \
 	TEST_PUBLIC_BASE_URL=$$PUBLIC_BASE_URL \
 	TEST_CONFIG_FILE=$$CONFIG_FILE \
-		k6 run tests/performance/consumer/perftest.js -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
+		k6 run tests/performance/consumer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/consumer-public-$$(date +%Y%m%d%H%M%S).json -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
 	kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT))
 
 perftest-generate-pointer-table-extract:
-	@echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
+	@echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and ENV=$(ENV) and DIST_PATH=$(DIST_PATH)"
 	rm -rf "${DIST_PATH}/nft"
 	mkdir -p "${DIST_PATH}/nft"
-	PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft"
+	PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft" --extract-size=2000000
 	./scripts/get-current-info.sh > "${DIST_PATH}/nft/info.json"
 	zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft"
 	aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip"
-
-perftest-run-token-refresher:
-	@echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)"
-	ENV=$(ENV) TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT) PYTHONPATH=. poetry run python ./tests/performance/token_refresher.py &
-	trap "kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT)) 2>/dev/null" EXIT
-
-	make perftest-consumer-public
-	kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT))
diff --git a/scripts/seed_nft_tables.py b/scripts/seed_nft_tables.py
@@ -102,6 +102,21 @@ def _write_pointer_extract_to_file(table_name, pointer_data):
     create_extract_metadata_file(table_name, nft_dist_path)
 
 
+# To avoid sonarcube maintainability warning
+def get_pointer_processor(unprocessed_items):
+    def pointer_is_processed(pointer):
+        pointer_id = pointer[0]
+        matches = [
+            unprocessed_item
+            for unprocessed_item in unprocessed_items
+            if unprocessed_item["PutRequest"]["Item"].get("id") == pointer_id
+        ]
+
+        return len(matches) == 0
+
+    return pointer_is_processed
+
+
 def _populate_seed_table(
     table_name: str,
     patients_with_pointers: int,
@@ -143,6 +158,7 @@ def _populate_seed_table(
     unprocessed_count = 0
 
     pointer_data: list[list[str]] = []
+    batch_pointer_data: list[list[str]] = []
 
     start_time = datetime.now(tz=timezone.utc)
     batch_upsert_items: list[dict[str, Any]] = []
@@ -158,11 +174,20 @@ def _populate_seed_table(
                 RequestItems={table_name: batch_upsert_items}
             )
 
+            processed_pointers = batch_pointer_data
+
             if response.get("UnprocessedItems"):
-                unprocessed_count += len(
-                    response.get("UnprocessedItems").get(table_name, [])
+                unprocessed_items = response.get("UnprocessedItems").get(table_name, [])
+                unprocessed_count += len(unprocessed_items)
+                pointer_is_processed = get_pointer_processor(unprocessed_items)
+
+                processed_pointers = list(
+                    filter(pointer_is_processed, batch_pointer_data)
                 )
 
+            pointer_data.extend(processed_pointers)
+
+            batch_pointer_data = []
             batch_upsert_items = []
             batch_counter = 0
 
@@ -178,7 +203,7 @@ def _populate_seed_table(
             )
             put_req = {"PutRequest": {"Item": pointer.model_dump()}}
             batch_upsert_items.append(put_req)
-            pointer_data.append(
+            batch_pointer_data.append(
                 [
                     pointer.id,
                     new_type,  # not full type url
diff --git a/terraform/bastion/README.md b/terraform/bastion/README.md
@@ -12,7 +12,7 @@ Before deploying a bastion, you will need:
 
 ## Deploying a bastion
 
-The bastions are emphemeral resources that should be deploy when you need them.
+The bastions are ephemeral resources that should be deployed when you need them.
 
 To deploy a bastion, you will first need to login to the AWS mgmt account on the CLI.
 
@@ -46,7 +46,6 @@ terraform apply ./bastion.tfplan
 Once the bastion is deployed, you can connect to it via SSH with:
 
 ```sh
-assume nhsd-nrlf-test
 make ssh-connection ENV={env}
 ```
 
diff --git a/terraform/infrastructure/etc/perftest.tfvars b/terraform/infrastructure/etc/perftest.tfvars
@@ -1,7 +1,7 @@
 account_name     = "perftest"
 aws_account_name = "test"
 
-dynamodb_pointers_table_name = "nhsd-nrlf--perftest-baseline-pointers-table"
+dynamodb_pointers_table_name = "nhsd-nrlf--perftest-15m-pointers-table"
 
 domain        = "perftest.record-locator.national.nhs.uk"
 public_domain = "perftest.api.service.nhs.uk"
diff --git a/tests/performance/README.md b/tests/performance/README.md
@@ -6,7 +6,7 @@ We have performance tests which give us a benchmark of how NRLF performs under l
 
 ### Prep the environment
 
-Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-1million (todo: update with real names!).
+Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-15m vs perftest-55m
 
 #### Pull certs for perftest
 
@@ -17,45 +17,44 @@ make truststore-pull-all ENV=perftest
 
 #### Point perftest at a different pointers table
 
-We (will) have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers.
-
-In order to run performance tests to get figures for these different states, we can point the perftest environment at one of these tables.
-
-Currently, this requires tearing down the existing environment and restoring from scratch:
-
-1. Follow instructions in terraform/infrastructure/readme.md to tear down the perf test environment.
-   - Do **not** tear down shared account-wide infrastructure
-2. Update `perftest-pointers-table.name_prefix` in `terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf` to be the table name you want, minus "-pointers-table"
-   - e.g. to use the baseline table `nhsd-nrlf--perftest-baseline-pointers-table`, set `name_prefix = "nhsd-nrlf--perftest-baseline"`
-3. Update `dynamodb_pointers_table_prefix` in `terraform/infrastructure/etc/perftest.tfvars` same as above.
-   - e.g. to use the baseline table `dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline"`
-4. Commit changes to a branch & push
-5. Run the [Deploy Account-wide infrastructure](https://github.com/NHSDigital/NRLF/actions/workflows/deploy-account-wide-infra.yml) workflow against your branch & `account-test`.
-   - If you get a terraform failure like "tried to create table but it already exists", you will need to do some fanangaling:
-     1. make sure there is a backup of your chosen table or create one if not. In the AWS console: dynamodb > tables > your perftest table > backups > create backup > Create on-demand backup > leave all settings as defaults > create backup. This might take up to an hour to complete.
-     2. once backed up, delete your table. In the AWS console: dynamodb > tables > your perftest table > actions > delete table
-     3. Rerun the Deploy Account-wide infrastructure action.
-     4. Terraform will create an empty table with the correct name & (most importantly!) read/write IAM policies.
-     5. Delete the empty table created by terraform and restore from the backup, specifying the same table name you've defined in code & selecting the matching customer managed encryption key.
-6. Run the [Persistent Environment Deploy](https://github.com/NHSDigital/NRLF/actions/workflows/persistent-environment.yml) workflow against your branch & `perftest` to restore the environment with lambdas pointed at your chosen table.
-7. You can check this has been successful by checking the table name in the lambdas.
-   - In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table`
+We have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers.
+
+In order to run performance tests to get figures for these different volumes, we can point the perftest environment at one of these tables.
+
+To do this, we change an environment variable which defines which table our lambdas talk to and deploy changes.
+
+1. Update `dynamodb_pointers_table_name` to be the desired table name in [terraform/infrastructure/etc/perftest.tfvars](terraform/infrastructure/etc/perftest.tfvars) e.g.
+
+```sh
+dynamodb_pointers_table_name = "nhsd-nrlf--perftest-baseline-pointers-table"
+```
 
-If you've followed these steps, you will also need to [generate permissions](#generate-permissions) as the organisation permissions will have been lost when the environment was torn down.
+2. To avoid erasing the test permissions when you deploy these changes, make sure to run through the steps to [generate permissions](#generate-permissions)
+3. Apply your changes
+
+```sh
+cd ./terraform/infrastructure
+make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest
+make ENV=perftest USE_SHARED_RESOURCES=true apply
+```
+
+4. You can verify this has been successful by checking the table name in the lambdas.
+   - In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table`
 
 #### Generate permissions
 
 You will need to generate pointer permissions the first time performance tests are run in an environment e.g. if the perftest environment is destroyed & recreated.
 
 ```sh
+assume nhsd-nrlf-mgmt
+
 # In project root
 make perftest-generate-permissions   # makes a bunch of json permission files for test organisations
 make get-s3-perms ENV=perftest   # will take all permissions & create nrlf_permissions.zip file
 make build
 
 # apply this new permissions zip file to your environment
 cd ./terraform/infrastructure
-assume nhsd-nrlf-mgmt
 make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest
 make ENV=perftest USE_SHARED_RESOURCES=true apply
 ```
@@ -111,9 +110,11 @@ Regenerates the input files from the current state of a given perftest table & u
 
 ```sh
 make perftest-generate-pointer-table-extract \
-   PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table
+   PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table ENV=perftest
 ```
 
+This will generate a csv extract of the given pointer table containing a row per pointer. To run the perf tests, you will need an extract larger than the number of test iterations. The default extract size is 2 million - this can be changed in the make file command by updating the value of`--extract-size`. Too big and the test runners will take a long time to load the file.
+
 ## Assumptions / Caveats
 
 - Run performance tests in the perftest environment only\*
diff --git a/tests/performance/constants.js b/tests/performance/constants.js
@@ -15,9 +15,8 @@ export const POINTERS_TO_DELETE = ALL_POINTER_IDS.slice(0, 3500);
 export const POINTER_IDS = ALL_POINTER_IDS.slice(3500);
 export const NHS_NUMBERS = REFERENCE_DATA["nhs_numbers"];
 
-// filter only 736253001, 736253002, 1363501000000100, 861421000000109, 749001000000101 for now
+// filter only 736253002, 1363501000000100, 861421000000109, 749001000000101 for now
 export const FILTERED_POINTER_TYPES = [
-  // "736253001",
   "736253002",
   "1363501000000100",
   "861421000000109",
diff --git a/tests/performance/consumer/perftest.config.json b/tests/performance/consumer/perftest.config.json
@@ -6,43 +6,43 @@
   },
   "scenarios": {
     "countDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "countPostDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "readDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "searchDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "searchDocumentReferenceByCategory": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "searchPostDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "searchPostDocumentReferenceByCategory": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
diff --git a/tests/performance/perftest_environment.py b/tests/performance/perftest_environment.py
@@ -66,6 +66,7 @@ def __next__(self):
 
 
 def generate_pointer_table_extract(
+    extract_size=2000000,  # must be a multiple of buffer_size (1 million)
     output_dir=".",
 ):
     """
@@ -79,6 +80,7 @@ def generate_pointer_table_extract(
     start_key = None
     buffer = []
     buffer_size = 1_000_000  # 10k rows needs ~3MB of RAM, so 1M rows needs ~300MB
+    buffers_written = 0
 
     with open(out, "w", newline="") as csv_file:
         writer = csv.writer(csv_file)
@@ -109,12 +111,20 @@ def generate_pointer_table_extract(
                 if len(buffer) >= buffer_size:
                     print("Writing buffer to CSV...")  # noqa: T201
                     writer.writerows(buffer)
+                    buffers_written += 1
                     buffer.clear()
             start_key = response.get("LastEvaluatedKey", None)
-            done = start_key is None
+
+            no_more_to_read = start_key is None
+            reached_desired_extract_size = (
+                buffers_written * buffer_size
+            ) >= extract_size
+
+            done = no_more_to_read or reached_desired_extract_size
         # Write any remaining rows in buffer
         if buffer:
             writer.writerows(buffer)
+            buffers_written += 1
     print(f"Pointer extract CSV data written to {out}")  # noqa: T201
 
     create_extract_metadata_file(table_name, output_dir)
diff --git a/tests/performance/producer/client_perftest.js b/tests/performance/producer/client_perftest.js
@@ -2,7 +2,6 @@ import http from "k6/http";
 import { ODS_CODE } from "../constants.js";
 import { check } from "k6";
 import { randomItem } from "https://jslib.k6.io/k6-utils/1.2.0/index.js";
-import { crypto } from "k6/experimental/webcrypto";
 import { createRecord } from "../setup.js";
 import { getHeaders, getFullUrl } from "../test-config.js";
 import exec from "k6/execution";
diff --git a/tests/performance/producer/perftest.config.json b/tests/performance/producer/perftest.config.json
@@ -6,31 +6,31 @@
   },
   "scenarios": {
     "createDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "readDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "upsertDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "searchDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
     },
     "searchPostDocumentReference": {
-      "tps": 5,
+      "tps": 1,
       "duration": "5m",
       "hold": "30m",
       "rampDown": "1m"
diff --git a/tests/performance/test-config.js b/tests/performance/test-config.js