Skip to content

Commit 402301f

Browse files
committed
Merge branch 'features/sql-cleanup-scripts' into develop
2 parents 6616794 + f379d7b commit 402301f

11 files changed

+321
-9
lines changed

OnTopic.Data.Sql.Database/OnTopic.Data.Sql.Database.sqlproj

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,19 @@
6868
<Folder Include="Functions\" />
6969
<Folder Include="Stored Procedures\" />
7070
<Folder Include="Types" />
71+
<Folder Include="Utilities\" />
72+
<Folder Include="Utilities\Stored Procedures" />
73+
</ItemGroup>
74+
<ItemGroup>
75+
<Build Include="Utilities\Schema.sql" />
76+
<Build Include="Utilities\Stored Procedures\GenerateNestedSet.sql" />
77+
<Build Include="Utilities\Stored Procedures\CompressHierarchy.sql" />
78+
<Build Include="Utilities\Stored Procedures\DisableForeignKeys.sql" />
79+
<Build Include="Utilities\Stored Procedures\ValidateHierarchy.sql" />
80+
<Build Include="Utilities\Stored Procedures\DeleteConsecutiveExtendedAttributes.sql" />
81+
<Build Include="Utilities\Stored Procedures\DeleteOrphanedLastModifiedAttributes.sql" />
82+
<Build Include="Utilities\Stored Procedures\DeleteConsecutiveAttributes.sql" />
83+
<Build Include="Utilities\Stored Procedures\ConsolidateVersions.sql" />
7184
</ItemGroup>
7285
<ItemGroup>
7386
<Build Include="Tables\ExtendedAttributes.sql" />
@@ -89,13 +102,9 @@
89102
<Build Include="Stored Procedures\UpdateRelationships.sql" />
90103
<Build Include="Stored Procedures\GetTopics.sql" />
91104
<Build Include="Stored Procedures\GetAttributes.sql" />
92-
<Build Include="Stored Procedures\GenerateNestedSet.sql" />
93105
<Build Include="Stored Procedures\DeleteTopic.sql" />
94106
<Build Include="Stored Procedures\CreateTopic.sql" />
95-
<Build Include="Stored Procedures\CompressHierarchy.sql" />
96-
<Build Include="Stored Procedures\pr_Disable_Foreign_Keys.sql" />
97107
<Build Include="Stored Procedures\GetTopicVersion.sql" />
98-
<Build Include="Stored Procedures\ValidateHierarchy.sql" />
99108
<Build Include="Types\AttributeValues.sql" />
100109
<Build Include="Types\TopicList.sql" />
101110
<Build Include="Views\AttributeIndex.sql" />
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CREATE SCHEMA [Utilities]

OnTopic.Data.Sql.Database/Stored Procedures/CompressHierarchy.sql renamed to OnTopic.Data.Sql.Database/Utilities/Stored Procedures/CompressHierarchy.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
-- Remove gaps within nested set model created when non-leaf nodes are deleted
55
--------------------------------------------------------------------------------------------------------------------------------
66

7-
CREATE PROCEDURE [dbo].[CompressTopics]
7+
CREATE PROCEDURE [Utilities].[CompressTopics]
88
AS
99

1010
SET NOCOUNT ON;
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
--------------------------------------------------------------------------------------------------------------------------------
2+
-- CONSOLIDATE VERSIONS
3+
--------------------------------------------------------------------------------------------------------------------------------
4+
-- Given a start date and an end date, will consolidate all versions within that range into a single, new version. This has the
5+
-- benefit of reducing the number of versions in the database, reducing the database size, and making some database queries
6+
-- faster. If the end date parameter is not specified, it defaults to 2000-01-01, in which case ALL historical data will be
7+
-- collapsed prior to the start date.
8+
--------------------------------------------------------------------------------------------------------------------------------
9+
10+
CREATE PROCEDURE [Utilities].[ConsolidateVersions]
11+
@StartDate datetime = 20000101,
12+
@EndDate datetime = null
13+
AS
14+
15+
--------------------------------------------------------------------------------------------------------------------------------
16+
-- DECLARE AND SET VARIABLES
17+
--------------------------------------------------------------------------------------------------------------------------------
18+
DECLARE @IsNestedTransaction BIT;
19+
20+
BEGIN TRY
21+
22+
--------------------------------------------------------------------------------------------------------------------------------
23+
-- BEGIN TRANSACTION
24+
--------------------------------------------------------------------------------------------------------------------------------
25+
IF (@@TRANCOUNT = 0)
26+
BEGIN
27+
SET @IsNestedTransaction = 0;
28+
BEGIN TRANSACTION;
29+
END
30+
ELSE
31+
BEGIN
32+
SET @IsNestedTransaction = 1;
33+
END
34+
35+
--------------------------------------------------------------------------------------------------------------------------------
36+
-- SELECT TOPIC ATTRIBUTES
37+
--------------------------------------------------------------------------------------------------------------------------------
38+
;WITH TopicAttributes
39+
AS (
40+
SELECT Version,
41+
RowNumber = ROW_NUMBER() OVER (
42+
PARTITION BY TopicID,
43+
AttributeKey
44+
ORDER BY Version DESC
45+
)
46+
FROM Attributes
47+
WHERE Version > @StartDate
48+
AND Version <= @EndDate
49+
)
50+
UPDATE TopicAttributes
51+
SET Version = @EndDate
52+
WHERE RowNumber = 1
53+
54+
--------------------------------------------------------------------------------------------------------------------------------
55+
-- DELETE CONSOLIDATED ATTRIBUTES
56+
--------------------------------------------------------------------------------------------------------------------------------
57+
DELETE
58+
FROM Attributes
59+
WHERE Version > @StartDate
60+
AND Version < @EndDate
61+
62+
--------------------------------------------------------------------------------------------------------------------------------
63+
-- SELECT EXTENDED ATTRIBUTES
64+
--------------------------------------------------------------------------------------------------------------------------------
65+
;WITH TopicExtendedAttributes
66+
AS (
67+
SELECT Version,
68+
RowNumber = ROW_NUMBER() OVER (
69+
PARTITION BY TopicID
70+
ORDER BY Version DESC
71+
)
72+
FROM ExtendedAttributes
73+
WHERE Version > @StartDate
74+
AND Version <= @EndDate
75+
)
76+
UPDATE TopicExtendedAttributes
77+
SET Version = @EndDate
78+
WHERE RowNumber = 1
79+
80+
--------------------------------------------------------------------------------------------------------------------------------
81+
-- DELETE CONSOLIDATED ATTRIBUTES
82+
--------------------------------------------------------------------------------------------------------------------------------
83+
DELETE
84+
FROM ExtendedAttributes
85+
WHERE Version > @StartDate
86+
AND Version < @EndDate
87+
88+
--------------------------------------------------------------------------------------------------------------------------------
89+
-- COMMIT TRANSACTION
90+
--------------------------------------------------------------------------------------------------------------------------------
91+
IF (@@TRANCOUNT > 0 AND @IsNestedTransaction = 0)
92+
BEGIN
93+
COMMIT
94+
END
95+
END TRY
96+
97+
--------------------------------------------------------------------------------------------------------------------------------
98+
-- HANDLE ERRORS
99+
--------------------------------------------------------------------------------------------------------------------------------
100+
BEGIN CATCH
101+
IF (@@TRANCOUNT > 0 AND @IsNestedTransaction = 0)
102+
BEGIN
103+
ROLLBACK;
104+
END;
105+
THROW
106+
RETURN;
107+
END CATCH
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
--------------------------------------------------------------------------------------------------------------------------------
2+
-- DELETE CONSECUTIVE ATTRIBUTES
3+
--------------------------------------------------------------------------------------------------------------------------------
4+
-- Current versions of the OnTopic Library evaluate whether or not an attribute value has changed since the previous version,
5+
-- and doesn't create a new attribute version if it has. This wasn't true in previous versions, however. As a result, there are
6+
-- some cases, and especially in older databases, where unnecessary duplicates occur for attribute values. This script will
7+
-- detect concurrent duplicates and remove them from the database. This reduces the size of the database, without interfering
8+
-- with the data integrity. If attribute values are not consecutive, the duplicates aren't deleted; e.g., if the value of
9+
-- <c>Title</c> gets changed, then gets reverted, all three versions will be retained in the database.
10+
--------------------------------------------------------------------------------------------------------------------------------
11+
12+
CREATE PROCEDURE [Utilities].[DeleteConsecutiveAttributes]
13+
AS
14+
15+
SET NOCOUNT ON;
16+
17+
--------------------------------------------------------------------------------------------------------------------------------
18+
-- CHECK INITIAL VALUES
19+
--------------------------------------------------------------------------------------------------------------------------------
20+
DECLARE @Count INT
21+
22+
SELECT @Count = Count(TopicID)
23+
FROM Attributes
24+
25+
Print('Initial Count: ' + CAST(@Count AS VARCHAR) + ' Attributes in the database.');
26+
27+
--------------------------------------------------------------------------------------------------------------------------------
28+
-- IDENTIFY GROUPS OF CONCURRENT DUPLICATES
29+
--------------------------------------------------------------------------------------------------------------------------------
30+
WITH GroupedValues AS (
31+
SELECT TopicID,
32+
AttributeKey,
33+
AttributeValue,
34+
DateModified,
35+
Version,
36+
ValueGroup = ROW_NUMBER() OVER(PARTITION BY TopicID, AttributeKey ORDER BY TopicID, AttributeKey, Version)
37+
- ROW_NUMBER() OVER(PARTITION BY TopicID, AttributeKey, AttributeValue ORDER BY TopicID, AttributeKey, Version)
38+
FROM Attributes
39+
),
40+
41+
--------------------------------------------------------------------------------------------------------------------------------
42+
-- RANK DUPLICATES BY DATE
43+
--------------------------------------------------------------------------------------------------------------------------------
44+
RankedValues AS (
45+
SELECT TopicID,
46+
AttributeKey,
47+
AttributeValue,
48+
DateModified,
49+
Version,
50+
ValueGroup,
51+
ValueRank = ROW_NUMBER() OVER(PARTITION BY ValueGroup, TopicID, AttributeKey, AttributeValue ORDER BY TopicID, AttributeKey, Version)
52+
FROM GroupedValues
53+
)
54+
55+
--------------------------------------------------------------------------------------------------------------------------------
56+
-- DELETE NEWER DUPLICATES
57+
--------------------------------------------------------------------------------------------------------------------------------
58+
DELETE
59+
FROM RankedValues
60+
WHERE ValueRank > 1;
61+
62+
--------------------------------------------------------------------------------------------------------------------------------
63+
-- CHECK FINAL VALUES
64+
--------------------------------------------------------------------------------------------------------------------------------
65+
SELECT @Count = @Count - Count(TopicID)
66+
FROM Attributes
67+
68+
Print('Final Count: ' + CAST(@Count AS VARCHAR) + ' Attributes were identified and deleted.')
69+
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
--------------------------------------------------------------------------------------------------------------------------------
2+
-- DELETE CONSECUTIVE ATTRIBUTES
3+
--------------------------------------------------------------------------------------------------------------------------------
4+
-- Current versions of the OnTopic Library evaluate whether or not the composite XML for the extended attribute values has
5+
-- changed since the previous version, and only creates a new version if it has. This wasn't true in previous versions, however.
6+
-- As a result, there are some cases, and especially in older databases, where unnecessary duplicates occur for attribute
7+
-- values. These dramatically increase the size of the database and can slow down the processing time of certain queries. This
8+
-- procedure will detect concurrent duplicates and remove them from the database. This reduces the size of the database, without
9+
-- interfering with the data integrity.
10+
--------------------------------------------------------------------------------------------------------------------------------
11+
-- NOTE: Because this query must cast the XML values as VARCHAR in order to compare them, it takes a LONG time to run. Please
12+
-- be patient!
13+
--------------------------------------------------------------------------------------------------------------------------------
14+
15+
CREATE PROCEDURE [Utilities].[DeleteConsecutiveExtendedAttributes]
16+
AS
17+
18+
SET NOCOUNT ON;
19+
20+
--------------------------------------------------------------------------------------------------------------------------------
21+
-- CHECK INITIAL VALUES
22+
--------------------------------------------------------------------------------------------------------------------------------
23+
DECLARE @Count INT
24+
25+
SELECT @Count = Count(TopicID)
26+
FROM ExtendedAttributes
27+
28+
Print('Initial Count: ' + CAST(@Count AS VARCHAR) + ' Extended Attributes in the database.');
29+
30+
--------------------------------------------------------------------------------------------------------------------------------
31+
-- IDENTIFY GROUPS OF CONCURRENT DUPLICATES
32+
--------------------------------------------------------------------------------------------------------------------------------
33+
WITH GroupedValues AS (
34+
SELECT TopicID,
35+
AttributesXml,
36+
DateModified,
37+
Version,
38+
ValueGroup = ROW_NUMBER() OVER(PARTITION BY TopicID ORDER BY TopicID, Version)
39+
- ROW_NUMBER() OVER(PARTITION BY TopicID, CAST(AttributesXml AS NVARCHAR(MAX)) ORDER BY TopicID, Version)
40+
FROM ExtendedAttributes
41+
),
42+
43+
--------------------------------------------------------------------------------------------------------------------------------
44+
-- RANK DUPLICATES BY DATE
45+
--------------------------------------------------------------------------------------------------------------------------------
46+
RankedValues AS (
47+
SELECT TopicID,
48+
AttributesXml,
49+
DateModified,
50+
Version,
51+
ValueGroup,
52+
ValueRank = ROW_NUMBER() OVER(PARTITION BY ValueGroup, TopicID, CAST(AttributesXml AS NVARCHAR(MAX)) ORDER BY TopicID, Version)
53+
FROM GroupedValues
54+
)
55+
56+
--------------------------------------------------------------------------------------------------------------------------------
57+
-- DELETE NEWER DUPLICATES
58+
--------------------------------------------------------------------------------------------------------------------------------
59+
DELETE
60+
FROM RankedValues
61+
WHERE ValueRank > 1;
62+
63+
PRINT('Concurrent duplicates have been deleted.')
64+
65+
--------------------------------------------------------------------------------------------------------------------------------
66+
-- CHECK FINAL VALUES
67+
--------------------------------------------------------------------------------------------------------------------------------
68+
SELECT @Count = @Count - Count(TopicID)
69+
FROM ExtendedAttributes
70+
71+
Print('Final Count: ' + CAST(@Count AS VARCHAR) + ' duplicate Extended Attributes were identified and deleted.')
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
--------------------------------------------------------------------------------------------------------------------------------
2+
-- DELETE ORPHANED LAST MODIFIED ATTRIBUTES
3+
--------------------------------------------------------------------------------------------------------------------------------
4+
-- Current versions of the OnTopic Library evaluate whether or not an attribute value has changed since the previous version,
5+
-- and doesn't create a new attribute version if it has. This process doesn't work for <c>LastModified</c>, however, as that
6+
-- value changes every time a value is saved—at least via the OnTopic Editor. If you save a topic five times in the editor, it
7+
-- will generate five <c>LastModified</c> values, <i>even if no other attribute values changed</i>. Over time, this can create a
8+
-- lot of clutter in the database, and potentially slow down some queries. This script identifies <c>LastModified</c> attributes
9+
-- which don't correspond to any other updates of <i>indexed</i> attributes, and deletes them.
10+
--------------------------------------------------------------------------------------------------------------------------------
11+
-- NOTE: There are legitimate scenarios where a topic is updated but it doesn't show up in other attribute values, such as when
12+
-- relationships are updated. These situations are the exception, however, and it's usually not an issue to lose that level of
13+
-- granularity.
14+
--------------------------------------------------------------------------------------------------------------------------------
15+
16+
CREATE PROCEDURE [Utilities].[DeleteOrphanedLastModifiedAttributes]
17+
AS
18+
19+
SET NOCOUNT ON;
20+
21+
--------------------------------------------------------------------------------------------------------------------------------
22+
-- CHECK INITIAL VALUES
23+
--------------------------------------------------------------------------------------------------------------------------------
24+
DECLARE @Count INT
25+
26+
SELECT @Count = Count(TopicID)
27+
FROM Attributes
28+
WHERE AttributeKey = 'LastModified'
29+
30+
Print('Initial Count: ' + CAST(@Count AS VARCHAR) + ' LastModified records in the database.');
31+
32+
--------------------------------------------------------------------------------------------------------------------------------
33+
-- DELETE ORPHANED LAST MODIFIED ATTRIBUTES
34+
--------------------------------------------------------------------------------------------------------------------------------
35+
DELETE Attributes
36+
FROM Attributes
37+
LEFT JOIN Attributes Unmatched
38+
ON Attributes.TopicID = Unmatched.TopicID
39+
AND Attributes.Version = Unmatched.Version
40+
AND Attributes.AttributeKey != Unmatched.AttributeKey
41+
LEFT JOIN ExtendedAttributes UnmatchedExtended
42+
ON Attributes.TopicID = UnmatchedExtended.TopicID
43+
AND Attributes.Version = UnmatchedExtended.Version
44+
WHERE Unmatched.AttributeKey is null
45+
AND UnmatchedExtended.TopicID is null
46+
AND Attributes.AttributeKey = 'LastModified'
47+
48+
--------------------------------------------------------------------------------------------------------------------------------
49+
-- CHECK FINAL VALUES
50+
--------------------------------------------------------------------------------------------------------------------------------
51+
SELECT @Count = @Count - Count(TopicID)
52+
FROM Attributes
53+
WHERE AttributeKey = 'LastModified'
54+
55+
Print('Final Count: ' + CAST(@Count AS VARCHAR) + ' orphaned LastModified records were identified and deleted.')

OnTopic.Data.Sql.Database/Stored Procedures/pr_Disable_Foreign_Keys.sql renamed to OnTopic.Data.Sql.Database/Utilities/Stored Procedures/DisableForeignKeys.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CREATE PROCEDURE pr_Disable_Foreign_Keys
1+
CREATE PROCEDURE [Utilities].[DisableForeignKeys]
22
@disable BIT = 1
33
AS
44

OnTopic.Data.Sql.Database/Stored Procedures/GenerateHierarchyID.sql renamed to OnTopic.Data.Sql.Database/Utilities/Stored Procedures/GenerateHierarchyID.sql

File renamed without changes.

OnTopic.Data.Sql.Database/Stored Procedures/GenerateNestedSet.sql renamed to OnTopic.Data.Sql.Database/Utilities/Stored Procedures/GenerateNestedSet.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
-- and uses it to generate a nested set based table in Topics. Useful for recovering from a corrupted nested set model.
66
--------------------------------------------------------------------------------------------------------------------------------
77

8-
CREATE PROCEDURE [dbo].[GenerateNestedSet]
8+
CREATE PROCEDURE [Utilities].[GenerateNestedSet]
99
AS
1010

1111
SET IDENTITY_INSERT Topics ON

0 commit comments

Comments
 (0)