This question is linked to my previous one ( posted as an anonymous user - now I have an account ) and, before I begin, I would like to give the credit to Rob Farley for providing the right indexing schema.
But the problem is not the indexing schema.
It's the Query Optimizer !
The query :
SELECT s.ID_i
, s.ShortName_v
, sp.Path_v
, ( SELECT TOP 1 1 -- has also user access on subsites ?
FROM SitePath_T usp
, UserSiteRight_t usr
WHERE usr.SiteID_i = usp.SiteID_i
AND usp.Path_v LIKE sp.Path_v + '%_'
AND usr.UserID_i = 1 )
FROM Site_T s
, SitePath_T sp
WHERE sp.SiteID_i = s.ID_i
AND s.ShortName_v LIKE '[a-y]%'
AND s.ParentID_i = 1
AND EXISTS ( SELECT *
FROM SitePath_T usp
, UserSiteRight_t usr
WHERE usr.SiteID_i = usp.SiteID_i
AND usp.Path_v LIKE sp.Path_v + '%'
AND usr.UserID_i = 1 )
... runs in :
CPU Reads Writes Duration
2073 49572 0 2241 -- more than 2 sec
Execution plan :
|--Compute Scalar(DEFINE:([Expr1014]=[Expr1014]))
|--Nested Loops(Left Outer Join, OUTER REFERENCES:([sp].[Path_v]))
|--Nested Loops(Left Semi Join, OUTER REFERENCES:([Expr1016], [Expr1017], [Expr1018], [Expr1019]))
| |--Merge Join(Inner Join, MERGE:([sp].[SiteID_i])=([s].[ID_i]), RESIDUAL:([dbo].[SitePath_T].[SiteID_i] as [sp].[SiteID_i]=[dbo].[Site_T].[ID_i] as [s].[ID_i]))
| | |--Compute Scalar(DEFINE:([Expr1016]=[dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%', [Expr1017]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1018]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1019]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%')))
| | | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_SiteID_<Path>] AS [sp]), ORDERED FORWARD)
| | |--Sort(ORDER BY:([s].[ID_i] ASC))
| | |--Clustered Index Seek(OBJECT:([dbo].[Site_T].[IDXC_Site_ParentID+ShortName+ID] AS [s]), SEEK:([s].[ParentID_i]=(1) AND [s].[ShortName_v] >= '9þþþþþ' AND [s].[ShortName_v] < 'Z'), WHERE:([dbo].[Site_T].[ShortName_v] as [s].[ShortName_v] like '[a-y]%') ORDERED FORWARD)
| |--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1020]) WITH UNORDERED PREFETCH)
| |--Clustered Index Scan(OBJECT:([dbo].[SitePath_T].[IDXC_SitePath_Path+SiteID] AS [usp]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [Expr1016]))
| |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD)
|--Compute Scalar(DEFINE:([Expr1014]=(1)))
|--Top(TOP EXPRESSION:((1)))
|--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1021]) WITH UNORDERED PREFETCH)
|--Clustered Index Scan(OBJECT:([dbo].[SitePath_T].[IDXC_SitePath_Path+SiteID] AS [usp]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_')开发者_开发技巧)
|--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD)
But if I enforce the indexes, the following query :
SELECT s.ID_i
, s.ShortName_v
, sp.Path_v
, ( SELECT TOP 1 1 -- has also user access on subsites ?
FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) )
-- same performance when using WITH ( INDEX ( [IDX_SitePath_Path_INC<SiteID>] ) )
, UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) )
WHERE usr.SiteID_i = usp.SiteID_i
AND usp.Path_v LIKE sp.Path_v + '%_'
AND usr.UserID_i = 1)
FROM Site_T s
, SitePath_T sp WITH ( INDEX ( [IDX_SitePath_SiteID+Path] ) )
-- same performance when using WITH ( INDEX ( [IDX_SitePath_SiteID_INC<Path>] ) )
WHERE sp.SiteID_i = s.ID_i
AND s.ShortName_v LIKE '[a-y]%'
AND s.ParentID_i = 1
AND EXISTS ( SELECT *
FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) )
-- same performance when using WITH ( INDEX ( [IDX_SitePath_Path_INC<SiteID>] ) )
, UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) )
WHERE usr.SiteID_i = usp.SiteID_i
AND usp.Path_v LIKE sp.Path_v + '%'
AND usr.UserID_i = 1 )
will run in :
CPU Reads Writes Duration
50 11237 0 55
the duration will drop to 55 milliseconds ( from more than 2 sec ) !!!!
And I'm happy with this result !
Execution plan :
|--Compute Scalar(DEFINE:([Expr1014]=[Expr1014]))
|--Nested Loops(Left Outer Join, OUTER REFERENCES:([sp].[Path_v]))
|--Nested Loops(Left Semi Join, OUTER REFERENCES:([Expr1016], [Expr1017], [Expr1018], [Expr1019]))
| |--Merge Join(Inner Join, MERGE:([sp].[SiteID_i])=([s].[ID_i]), RESIDUAL:([dbo].[SitePath_T].[SiteID_i] as [sp].[SiteID_i]=[dbo].[Site_T].[ID_i] as [s].[ID_i]))
| | |--Compute Scalar(DEFINE:([Expr1016]=[dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%', [Expr1017]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1018]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1019]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%')))
| | | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_SiteID_<Path>] AS [sp]), ORDERED FORWARD)
| | |--Sort(ORDER BY:([s].[ID_i] ASC))
| | |--Clustered Index Seek(OBJECT:([dbo].[Site_T].[IDXC_Site_ParentID+ShortName+ID] AS [s]), SEEK:([s].[ParentID_i]=(1) AND [s].[ShortName_v] >= '9þþþþþ' AND [s].[ShortName_v] < 'Z'), WHERE:([dbo].[Site_T].[ShortName_v] as [s].[ShortName_v] like '[a-y]%') ORDERED FORWARD)
| |--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1023]) WITH UNORDERED PREFETCH)
| |--Nested Loops(Inner Join, OUTER REFERENCES:([Expr1017], [Expr1018], [Expr1019]))
| | |--Compute Scalar(DEFINE:([Expr1017]=[Expr1017], [Expr1018]=[Expr1018], [Expr1019]=[Expr1019]))
| | | |--Constant Scan
| | |--Index Seek(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]), SEEK:([usp].[Path_v] > [Expr1017] AND [usp].[Path_v] < [Expr1018]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [Expr1016]) ORDERED FORWARD)
| |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD)
|--Compute Scalar(DEFINE:([Expr1014]=(1)))
|--Top(TOP EXPRESSION:((1)))
|--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1027]) WITH UNORDERED PREFETCH)
|--Nested Loops(Inner Join, OUTER REFERENCES:([Expr1024], [Expr1025], [Expr1026]))
| |--Compute Scalar(DEFINE:([Expr1024]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1025]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1026]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_')))
| | |--Constant Scan
| |--Index Seek(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]), SEEK:([usp].[Path_v] > [Expr1024] AND [usp].[Path_v] < [Expr1025]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_') ORDERED FORWARD)
|--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD)
The next step is to run it for different users, thus I will declare UserID_i as a variable :
DECLARE @UserID_i INT
SELECT @UserID_i = 1
BUT NOW THE BELOW QUERY BECOMES CRAZY SLOW !!!
SELECT s.ID_i
, s.ShortName_v
, sp.Path_v
, ( SELECT TOP 1 1 -- has also user access on subsites ?
FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) )
, UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) )
WHERE usr.SiteID_i = usp.SiteID_i
AND usp.Path_v LIKE sp.Path_v + '%_'
AND usr.UserID_i = @UserID_i)
FROM Site_T s
, SitePath_T sp WITH ( INDEX ( [IDX_SitePath_SiteID+Path] ) )
WHERE sp.SiteID_i = s.ID_i
AND s.ShortName_v LIKE '[a-y]%'
AND s.ParentID_i = 1
AND EXISTS ( SELECT *
FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) )
, UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) )
WHERE usr.SiteID_i = usp.SiteID_i
AND usp.Path_v LIKE sp.Path_v + '%'
AND usr.UserID_i = @UserID_i )
The duration is now over 7 seconds !!!
CPU Reads Writes Duration
7421 149984 35 7625
And the execution plan :
|--Compute Scalar(DEFINE:([Expr1014]=[Expr1014]))
|--Nested Loops(Left Outer Join, OUTER REFERENCES:([sp].[Path_v]))
|--Nested Loops(Left Semi Join, WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [Expr1016]))
| |--Merge Join(Inner Join, MERGE:([sp].[SiteID_i])=([s].[ID_i]), RESIDUAL:([dbo].[SitePath_T].[SiteID_i] as [sp].[SiteID_i]=[dbo].[Site_T].[ID_i] as [s].[ID_i]))
| | |--Compute Scalar(DEFINE:([Expr1016]=[dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%', [Expr1017]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1018]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1019]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%')))
| | | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_SiteID+Path] AS [sp]), ORDERED FORWARD)
| | |--Sort(ORDER BY:([s].[ID_i] ASC))
| | |--Clustered Index Seek(OBJECT:([dbo].[Site_T].[IDXC_Site_ParentID+ShortName+ID] AS [s]), SEEK:([s].[ParentID_i]=(1) AND [s].[ShortName_v] >= '9þþþþþ' AND [s].[ShortName_v] < 'Z'), WHERE:([dbo].[Site_T].[ShortName_v] as [s].[ShortName_v] like '[a-y]%') ORDERED FORWARD)
| |--Table Spool
| |--Hash Match(Inner Join, HASH:([usr].[SiteID_i])=([usp].[SiteID_i]))
| |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=[@UserID_i]) ORDERED FORWARD)
| |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]))
|--Compute Scalar(DEFINE:([Expr1014]=(1)))
|--Top(TOP EXPRESSION:((1)))
|--Nested Loops(Inner Join, WHERE:([dbo].[UserSiteRight_T].[SiteID_i] as [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]))
|--Nested Loops(Inner Join, OUTER REFERENCES:([Expr1020], [Expr1021], [Expr1022]))
| |--Compute Scalar(DEFINE:([Expr1020]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1021]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1022]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_')))
| | |--Constant Scan
| |--Index Seek(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]), SEEK:([usp].[Path_v] > [Expr1020] AND [usp].[Path_v] < [Expr1021]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_') ORDERED FORWARD)
|--Table Spool
|--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=[@UserID_i]) ORDERED FORWARD)
The execution plan is changing completely when I'm using a variable instead of hard coding the UserID_i value !
Why the query optimizer is behaving like this ?
How can I enforce the execution plan to be the same as the second fast query ?
Thank you.
UPDATE 1
Deleted ( irrelevant )
UPDATE 2
It seems that I am not the only one having this problem.
Please check the following topics :
Why does the SqlServer optimizer get so confused with parameters? Known issue?: SQL Server 2005 stored procedure fails to complete with a parameterUPDATE 3
An excellent article from SQL Server Query Optimization Team covering parameter sniffing : I Smell a Parameter !
Is there a reason you can't use index hints (as in your second query) when you're using the variable (in the third query)? It's odd that the query optimizer makes such a bad decision when there's an index available, but it only knows a limited amount about your data, and it chooses as best it can.
Some statistics on the indexed columns might help you out, actually - they keep track of the data, the data layout, and some other information about what the table actually contains, whereas the indexes themselves are only built on top of the table metadata, and the query optimizer doesn't choose on the data itself (unless there are statistics there to help it do so).
Have you run the "Database Tuning Advisor" on the query? Highlighting the query and selecting "Analyze Query in Database Engine Tuning Advisor" from the "Query" menu in SSMS will use the table data to suggest some statistics for you - that might make a huge difference.
After reading the above articles ( provided in Update 2 and Update 3 ) I finally understood more about how Sql Server is treating / caching execution plans.
Adding OPTION ( RECOMPILE )
at the end of my SELECT
statements will force Sql Server to recalculate the execution plan ( and not using the cached one ) every time the query will be run, thus choosing the best plan matching the variable.
As Peter was asking why I did not use recursion, I am providing below the recursive cte which will return the correct result :
; WITH Site_R AS (
SELECT s.ID_i
, sp.Path_v
, s.ID_i AS SubSiteID_i
, sp.Path_v AS SubPath_v
, 0 AS Depth_i
FROM Site_T s
, SitePath_T sp
WHERE sp.SiteID_i = s.ID_i
AND s.ShortName_v LIKE '[a-y]%'
AND s.ParentID_i = 1
UNION ALL
SELECT sr.ID_i
, sr.Path_v
, s.ID_i
, sp.Path_v
, Depth_i+1
FROM Site_T s
, Site_R sr
, SitePath_T sp
WHERE sp.SiteID_i = s.ID_i
AND s.ParentID_i = sr.SubSiteID_i
)
SELECT us.*
, ( SELECT usr.UserID_i FROM UserSiteRight_T usr WHERE usr.SiteID_i = us.SubSiteID_i AND UseriD_i = 1 ) AS UserID_i
FROM Site_R us
Result's first rows with added UserSiteRight_T.UserID_i
column showing the access to the SubSiteID_i :
ID_i Path_v SubSiteID_i SubPath_v Depth_i UserSiteRight_T.UserID_i
------- ----------- ----------- --------------- ----------- -----------
2 1.2. 2 1.2. 0 1
3 1.3. 3 1.3. 0 NULL
3 1.3. 4 1.3.4. 1 1
3 1.3. 5 1.3.15863. 1 1
3 1.3. 6 1.3.6. 1 NULL
3 1.3. 7 1.3.6.7. 2 1
3 1.3. 8 1.3.8. 1 1
9 1.9. 9 1.9. 0 NULL
9 1.9. 10 1.9.10. 1 NULL
9 1.9. 11 1.9.10.11. 2 1
9 1.9. 12 1.9.10.12. 2 1
9 1.9. 13 1.9.13. 1 NULL
9 1.9. 14 1.9.13.14. 2 NULL
9 1.9. 15 1.9.13.14.15. 3 1
9 1.9. 16 1.9.13.14.16. 3 1
9 1.9. 17 1.9.13.17. 2 NULL
9 1.9. 18 1.9.13.17.18. 3 1
9 1.9. 19 1.9.19. 1 1
9 1.9. 20 1.9.20. 1 NULL
My final result should be a Group By on the first column having last column NOT NULL.
Or the following recursive query :
; WITH Site_R AS (
SELECT s.ID_i
, sp.Path_v
, s.ID_i AS SubSiteID_i
, sp.Path_v AS SubPath_v
, 0 AS Depth_i
FROM Site_T s
, SitePath_T sp
WHERE sp.SiteID_i = s.ID_i
AND s.ShortName_v LIKE '[a-y]%'
AND s.ParentID_i = 1
UNION ALL
SELECT sr.ID_i
, sr.Path_v
, s.ID_i
, sp.Path_v
, Depth_i+1
FROM Site_T s
, Site_R sr
, SitePath_T sp
WHERE sp.SiteID_i = s.ID_i
AND s.ParentID_i = sr.SubSiteID_i
)
SELECT us.ID_i
FROM Site_R us
, UserSiteRight_T usr
WHERE usr.SiteID_i = us.SubSiteID_i
AND UseriD_i = 1
GROUP BY ID_i
which basically builds the whole tree and selects only the ancestors having SubSiteID_i accessible by UserID_i. Or :
; WITH Site_R AS (
SELECT s.ID_i
, s.ID_i AS SubSiteID_i
, 0 AS Depth_i
, ( SELECT 1 FROM UserSiteRight_T usr WHERE usr.SiteID_i = s.ID_i AND usr.UserID_i = @UserID_i ) AS HasRight_b
FROM Site_T s
WHERE s.ShortName_v LIKE '[a-y]%'
AND s.ParentID_i = @ParentID_i
UNION ALL
SELECT sr.ID_i
, s.ID_i
, Depth_i+1
, ( SELECT 1 FROM UserSiteRight_T usr WHERE usr.SiteID_i = s.ID_i AND usr.UserID_i = @UserID_i )
FROM Site_T s
, Site_R sr
WHERE s.ParentID_i = sr.SubSiteID_i
AND ( sr.HasRight_b IS NULL OR Depth_i = 0 )
)
SELECT * FROM Site_R Where HasRight_b IS NOT NULL
EDIT:
First, you need a covering index on (ParentID_i, ID_i). Do you have one?
Second:
I am trying to obtain all the sites with depth = 0 which are having subsites accessible by a user.
This description doesn't match the queries you provided here.
This will return all sites w/ depth = 0 (ie, no more parents) that have subsites accessible to the user:
; WITH Site_R AS (
SELECT s.ID_i
, s.ParentID_i
FROM Site_T s
, UserSiteRight_T usr
WHERE usr.SiteID_i = s.ID_i
AND usr.UserID_i = @UserID_i -- plus any other filters
UNION ALL
SELECT s.ID_i
, s.ParentID_i
FROM Site_T s
, Site_R sr
WHERE s.ID_i = sr.ParentID_i
)
SELECT DISTINCT ID_i
FROM Site_R
WHERE ParentID_i IS NULL
Is this the result set you want?
Don't add any unnecessary columns to the recursive CTE. Join them in later, post-recurse, post-reduce.
精彩评论