-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathResolveCDCSchemaDrift.sql
More file actions
278 lines (239 loc) · 14.2 KB
/
ResolveCDCSchemaDrift.sql
File metadata and controls
278 lines (239 loc) · 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*
Script: Automate CDC Schema Drift Resolution
Description: Detects tables where the source schema has drifted since Change Data
Capture (CDC) was originally enabled -- columns added to or removed from
the source table that are not reflected in the CDC capture instance.
For each drifted table the script:
1) Backs up the existing CDC change data into a staging table.
2) Disables CDC on the source table (drops the capture instance).
3) Re-enables CDC so the new capture instance picks up the current
column layout.
4) Restores the historical change data, mapping only the columns that
exist in both the old backup and the new capture instance (safely
handles both added and removed columns).
Tables that are not yet tracked by CDC but otherwise meet the filter
criteria are enrolled automatically.
Prerequisites: - SQL Server 2017+ (uses STRING_AGG which requires 2017+)
- CDC must already be enabled at the database level
(sys.sp_cdc_enable_db)
- The executing login must be a member of the db_owner fixed database
role (required by sys.sp_cdc_enable_table / sys.sp_cdc_disable_table)
Configuration: Set the variables in the "Configuration" section below:
@RoleName - Database role granted SELECT on CDC change
tables (default: cdc_reader)
@SupportsNetChanges - Whether to enable net-change querying (0 or 1)
@DryRun - When 1, reports drifted tables without making
changes. Set to 0 to apply.
Usage Notes: ***** CRITICAL: MAINTENANCE WINDOW REQUIRED *****
Must be run during a maintenance window. Between disabling and
re-enabling CDC, any changes to the source tables will NOT be captured.
Data changes during this window are permanently lost from the CDC
perspective. Schedule this script when application write activity is
stopped or acceptably low.
Each table is processed independently. If CDC is disabled but
re-enabling fails, the script preserves the backup table and
prints a detailed error message for manual recovery.
Run in dry-run mode first (@DryRun = 1) to review which tables have
drifted before committing to changes.
License: MIT License - https://opensource.org/licenses/MIT
Source: https://github.com/mbentham/sql-server-scripts
*/
SET NOCOUNT ON;
SET XACT_ABORT ON;
-- ============================================================================
-- Configuration
-- ============================================================================
DECLARE @RoleName NVARCHAR(128) = N'cdc_reader'; -- Database role granted read access to CDC change tables
DECLARE @SupportsNetChanges BIT = 0; -- 0 = store every change; 1 = store net changes only (requires a primary key/unique index)
DECLARE @DryRun BIT = 1; -- 1 = report only (no changes); 0 = apply changes
-- ============================================================================
-- Validate that CDC is enabled on this database
-- ============================================================================
IF NOT EXISTS (SELECT 1 FROM sys.databases WHERE database_id = DB_ID() AND is_cdc_enabled = 1)
BEGIN
RAISERROR('CDC is not enabled on database [%s]. Enable it with sys.sp_cdc_enable_db before running this script.', 16, 1, DB_NAME());
RETURN;
END;
-- ============================================================================
-- Detect schema drift and untracked tables
-- ============================================================================
/*
The query below finds two categories of tables:
1) Tables with CDC enabled where columns have been ADDED to or REMOVED from
the source table since CDC was configured (cc.column_name IS NULL means
a source column is not in the capture instance).
2) Tables that are user tables, not in the cdc schema, not MS-shipped, and
have no capture instance at all (ct.capture_instance IS NULL) -- these
are candidates for initial CDC enrolment.
*/
DECLARE @schema_name NVARCHAR(128);
DECLARE @table_name NVARCHAR(128);
DECLARE @capture_instance NVARCHAR(128);
DECLARE tables_for_cdc CURSOR LOCAL FAST_FORWARD FOR
SELECT DISTINCT
s.name AS schema_name,
t.name AS table_name,
ct.capture_instance
FROM sys.schemas AS s
INNER JOIN sys.tables AS t ON s.schema_id = t.schema_id
INNER JOIN sys.columns AS c ON t.object_id = c.object_id
INNER JOIN sys.types AS ty ON c.system_type_id = ty.system_type_id
AND c.user_type_id = ty.user_type_id
LEFT JOIN cdc.change_tables AS ct ON t.object_id = ct.source_object_id
LEFT JOIN cdc.captured_columns AS cc ON ct.object_id = cc.object_id
AND c.name = cc.column_name
AND ty.name = cc.column_type
WHERE t.type = 'U'
AND s.name <> 'cdc'
AND t.is_ms_shipped = 0
AND cc.column_name IS NULL;
OPEN tables_for_cdc;
FETCH NEXT FROM tables_for_cdc INTO @schema_name, @table_name, @capture_instance;
WHILE @@FETCH_STATUS = 0
BEGIN
-- ==========================================================================
-- Branch 1: Table is not currently tracked by CDC -- enable it
-- ==========================================================================
IF @capture_instance IS NULL
BEGIN
SET @capture_instance = CONCAT(@schema_name, N'_', @table_name);
IF @DryRun = 1
BEGIN
PRINT CONCAT('DRY RUN -- Would ENABLE CDC on [', @schema_name, '].[', @table_name, '] as capture instance [', @capture_instance, ']');
END
ELSE
BEGIN
BEGIN TRY
EXEC sys.sp_cdc_enable_table
@source_schema = @schema_name,
@source_name = @table_name,
@role_name = @RoleName,
@supports_net_changes = @SupportsNetChanges,
@capture_instance = @capture_instance;
PRINT CONCAT('ENABLED CDC on [', @schema_name, '].[', @table_name, '] as capture instance [', @capture_instance, ']');
END TRY
BEGIN CATCH
PRINT CONCAT('ERROR enabling CDC on [', @schema_name, '].[', @table_name, ']: ', ERROR_MESSAGE());
END CATCH;
END;
END
-- ==========================================================================
-- Branch 2: Table is tracked but schema has drifted -- recycle the capture
-- instance and preserve historical change data
-- ==========================================================================
ELSE
BEGIN
IF @DryRun = 1
BEGIN
PRINT CONCAT('DRY RUN -- Would RECYCLE CDC capture instance [', @capture_instance, '] on [', @schema_name, '].[', @table_name, '] due to schema drift');
END
ELSE
BEGIN
DECLARE @backup_table NVARCHAR(256) = CONCAT(N'temp_', @capture_instance, N'_CT');
DECLARE @ct_table NVARCHAR(256) = CONCAT(@capture_instance, N'_CT');
DECLARE @old_columns NVARCHAR(MAX);
DECLARE @common_cols NVARCHAR(MAX);
DECLARE @sql NVARCHAR(MAX);
DECLARE @cdc_disabled BIT = 0;
-- Reset aggregation variables to avoid stale values from a prior iteration
SET @old_columns = NULL;
SET @common_cols = NULL;
BEGIN TRY
-- ---------------------------------------------------------------
-- Step 1: Drop any leftover staging table from a prior failed run
-- ---------------------------------------------------------------
SET @sql = N'IF OBJECT_ID(N''[dbo].' + QUOTENAME(@backup_table) + N''') IS NOT NULL DROP TABLE [dbo].' + QUOTENAME(@backup_table) + N';';
EXEC sp_executesql @stmt = @sql;
-- ---------------------------------------------------------------
-- Step 2: Build the column list from the EXISTING CT table and
-- back up the change data into a staging table
-- ---------------------------------------------------------------
SELECT @old_columns = STRING_AGG(QUOTENAME(col.name), N', ')
FROM sys.columns AS col
INNER JOIN sys.tables AS tbl ON col.object_id = tbl.object_id
INNER JOIN sys.schemas AS sch ON tbl.schema_id = sch.schema_id
WHERE sch.name = N'cdc'
AND tbl.name = @ct_table;
SET @sql = N'SELECT ' + @old_columns
+ N' INTO [dbo].' + QUOTENAME(@backup_table)
+ N' FROM [cdc].' + QUOTENAME(@ct_table) + N';';
EXEC sp_executesql @stmt = @sql;
-- ---------------------------------------------------------------
-- Step 3: Disable CDC (drops the old capture instance)
-- ---------------------------------------------------------------
EXEC sys.sp_cdc_disable_table
@source_schema = @schema_name,
@source_name = @table_name,
@capture_instance = @capture_instance;
SET @cdc_disabled = 1;
-- ---------------------------------------------------------------
-- Step 4: Re-enable CDC (creates a new capture instance with the
-- current source-table column layout)
-- ---------------------------------------------------------------
EXEC sys.sp_cdc_enable_table
@source_schema = @schema_name,
@source_name = @table_name,
@role_name = @RoleName,
@supports_net_changes = @SupportsNetChanges,
@capture_instance = @capture_instance;
-- ---------------------------------------------------------------
-- Step 5: Determine the COMMON columns between the backup table
-- and the newly created CT table. This safely handles
-- both added columns (exist in new CT but not backup)
-- and removed columns (exist in backup but not new CT).
-- ---------------------------------------------------------------
SELECT @common_cols = STRING_AGG(QUOTENAME(new_col.name), N', ')
FROM sys.columns AS new_col
INNER JOIN sys.tables AS new_tbl ON new_col.object_id = new_tbl.object_id
INNER JOIN sys.schemas AS new_sch ON new_tbl.schema_id = new_sch.schema_id
INNER JOIN (
SELECT col.name
FROM sys.columns AS col
INNER JOIN sys.tables AS tbl ON col.object_id = tbl.object_id
INNER JOIN sys.schemas AS sch ON tbl.schema_id = sch.schema_id
WHERE sch.name = N'dbo'
AND tbl.name = @backup_table
) AS backup_col ON new_col.name = backup_col.name
WHERE new_sch.name = N'cdc'
AND new_tbl.name = @ct_table;
-- ---------------------------------------------------------------
-- Step 6: Restore historical change data into the new CT table
-- ---------------------------------------------------------------
IF @common_cols IS NOT NULL
BEGIN
SET @sql = N'INSERT INTO [cdc].' + QUOTENAME(@ct_table)
+ N' (' + @common_cols + N')'
+ N' SELECT ' + @common_cols
+ N' FROM [dbo].' + QUOTENAME(@backup_table) + N';';
EXEC sp_executesql @stmt = @sql;
END;
-- ---------------------------------------------------------------
-- Step 7: Drop the staging table
-- ---------------------------------------------------------------
SET @sql = N'DROP TABLE [dbo].' + QUOTENAME(@backup_table) + N';';
EXEC sp_executesql @stmt = @sql;
PRINT CONCAT('RECYCLED CDC capture instance [', @capture_instance, '] on [', @schema_name, '].[', @table_name, '] -- schema drift resolved');
END TRY
BEGIN CATCH
IF @cdc_disabled = 1
BEGIN
PRINT CONCAT(
'CRITICAL ERROR on [', @schema_name, '].[', @table_name, ']: ',
'The old CDC capture instance [', @capture_instance, '] was DISABLED but re-enabling FAILED. ',
'This table is NO LONGER TRACKED by CDC -- changes are NOT being captured. ',
'The backup table [dbo].', QUOTENAME(@backup_table), ' has been preserved and contains the historical change data. ',
'You must manually re-enable CDC on this table and restore the backup data. ',
'Error: ', ERROR_MESSAGE());
END
ELSE
BEGIN
PRINT CONCAT('ERROR recycling CDC on [', @schema_name, '].[', @table_name, ']: ', ERROR_MESSAGE());
END;
END CATCH;
END;
END;
FETCH NEXT FROM tables_for_cdc INTO @schema_name, @table_name, @capture_instance;
END;
CLOSE tables_for_cdc;
DEALLOCATE tables_for_cdc;
PRINT 'CDC schema drift resolution complete.';