开发者

Join One Table To Get Random Rows from 2nd Table

开发者 https://www.devze.com 2023-02-10 21:50 出处:网络
I have two tables: #tmpParams, which lists 29 combinations of Disability and Grade. The 2nd table, #tmpSource contains many students with their StudentID, Disability, and Grade.I need to select 29 ran

I have two tables: #tmpParams, which lists 29 combinations of Disability and Grade. The 2nd table, #tmpSource contains many students with their StudentID, Disability, and Grade. I need to select 29 random rows from #tmpSource, joining on #tmpParams by Disability and Grade.

In other words, someone handed me a list of parameters for 29 students (Disability and Grade) and they want 29 random students from #tmpSource, including StudentID, matching on the two fields.

This is the DDL:

  IF OBJECT_ID('tempdb..#tmpParams') IS NOT NULL 
        BEGIN
              DROP TABLE #tmpParams
        END
  GO

  IF OBJECT_ID('tempdb..#tmpSource') IS NOT NULL 
        BEGIN
              DROP TABLE #tmpSource
        END
  GO

    /* Create parameter table */
    CREATE TABLE #tmpParams
         ( 
            Disability        varchar(10)
            ,Grade      varchar(10)
         )
    GO        

    /* populate it */
    INSERT INTO #tmpParams
        (Disability, Grade)
    SELECT '13 - AUT' ,'Grade 1'  UNION ALL
    SELECT '08 - SL' ,'Grade 1'  UNION ALL
    SELECT '11 - PSD' ,'Grade 1'  UNION ALL
    SELECT '04 - SLD' ,'Grade 2'  UNION ALL
    SELECT '04 - SLD' ,'Grade 2'  UNION ALL
    SELECT '08 - SL' ,'Grade 2'  UNION ALL
    SELECT '08 - SL' ,'Grade 2'  UNION ALL
    SELECT '08 - SL' ,'Grade 2'  UNION ALL
    SELECT '14 - TBI' ,'Grade 2'  UNION ALL
    SELECT '07 - PD' ,'Grade 2'  UNION ALL
    SELECT '08 - SL' ,'Grade 3'  UNION ALL
    SELECT '08 - SL' ,'Grade 3'  UNION ALL
    SELECT '08 - SL' ,'Grade 3'  UNION ALL
    SELECT '08 - SL' ,'Grade 3'  UNION ALL
    SELECT '08 - SL' ,'Grade 3'  UNION ALL
    SELECT '07 - PD' ,'Grade 3'  UNION ALL
    SELECT '04 - SLD' ,'Grade 4'  UNION ALL
    SELECT '04 - SLD' ,'Grade 4'  UNION ALL
    SELECT '08 - SL' ,'Grade 4'  UNION ALL
    SELECT '08 - SL' ,'Grade 4'  UNION ALL
    SELECT '08 - SL' ,'Grade 4'  UNION ALL
    SELECT '08 - SL' ,'Grade 4'  UNION ALL
    SELECT '08 - SL' ,'Grade 4'  UNION ALL
    SELECT '04 - SLD' ,'Grade 5'  UNION ALL
    SELECT '04 - SLD' ,'Grade 5'  UNION ALL
    SELECT '04 - SLD' ,'Grade 5'  UNION ALL
    SELECT '04 - SLD' ,'Grade 5'  UNION ALL
    SELECT '08 - SL' ,'Grade 5'  UNION ALL
    SELECT '08 - SL' ,'Grade 5';
    GO


    /* Create table to hold source */
    CREATE TABLE #tmpSource
         ( 
            StudentID          int
            ,Disability     varchar(10)
            ,Grade          varchar(10)
         )
    GO 

    INSERT INTO #tmpSource
        (StudentID, Disability, Grade)
    SELECT '3', '04 - SLD', 'Grade 2'  UNION ALL
    SELECT '31', '04 - SLD', 'Grade 2'  UNION ALL
    SELECT '39', '04 - SLD', 'Grade 2'  UNION ALL
    SELECT '46', '04 - SLD', 'Grade 2'  UNION ALL
    SELECT '10', '04 - SLD', 'Grade 4'  UNION ALL
    SELECT '13', '04 - SLD', 'Grade 4'  UNION ALL
    SELECT '25', '04 - SLD', 'Grade 4'  UNION ALL
    SELECT '33', '04 - SLD', 'Grade 4'  UNION ALL
    SELECT '57', '04 - SLD', 'Grade 4'  UNION ALL
    SELECT '60', '04 - SLD', 'Grade 4'  UNION ALL
    SELECT '8', '04 - SLD', 'Grade 5'  UNION ALL
    SELECT '19', '04 - SLD', 'Grade 5'  UNION ALL
    SELECT '23', '04 - SLD', 'Grade 5'  UNION ALL
    SELECT '51', '04 - SLD', 'Grade 5'  UNION ALL
    SELECT '55', '04 - SLD', 'Grade 5'  UNION ALL
    SELECT '16', '07 - PD', 'Grade 2'  UNION ALL
    SELECT '28', '07 - PD', 'Grade 2'  UNION ALL
    SELECT '36', '07 - PD', 'Grade 2'  UNION ALL
    SELECT '43', '07 - PD', 'Grade 2'  UNION ALL
    SELECT '11', '07 - PD', 'Grade 3'  UNION ALL
    SELECT '14', '07 - PD', 'Grade 3'  UNION ALL
    SELECT '26', '07 - PD', 'Grade 3'  UNION ALL
    SELECT '34', '07 - PD', 'Grade 3'  UNION ALL
    SELECT '58', '07 - PD', 'Grade 3'  UNION ALL
    SELECT '61', '07 - PD', 'Grade 3'  UNION ALL
    SELECT '5', '08 - SL', 'Grade 1'  UNION ALL
    SELECT '21', '08 - SL', 'Grade 1'  UNION ALL
    SELECT '41', '08 - SL', 'Grade 1'  UNION ALL
    SELECT '48', '08 - SL', 'Grade 1'  UNION ALL
    SELECT '2', '08 - SL', 'Grade 2'  UNION ALL
    SELECT '30', '08 - SL', 'Grade 2'  UNION ALL
    SELECT '38', '08 - SL', 'Grade 2'  UNION ALL
    SELECT '45', '08 - SL', 'Grade 2'  UNION ALL
    SELECT '12', '08 - SL', 'Grade 3'  UNION ALL
    SELECT '15', '08 - SL', 'Grade 3'  UNION ALL
    SELECT '27', '08 - SL', 'Grade 3'  UNION ALL
    SELECT '35', '08 - SL', 'Grade 3'  UNION ALL
    SELECT '59', '08 - SL', 'Grade 3'  UNION ALL
    SELECT '62', '08 - SL', 'Grade 3'  UNION ALL
    SELECT '9', '08 - SL', 'Grade 4'  UNION ALL
    SELECT '20', '08 - SL', 'Grade 4'  UNION ALL
    SELECT '24', '08 - SL', 'Grade 4'  开发者_运维技巧UNION ALL
    SELECT '52', '08 - SL', 'Grade 4'  UNION ALL
    SELECT '56', '08 - SL', 'Grade 4'  UNION ALL
    SELECT '7', '08 - SL', 'Grade 5'  UNION ALL
    SELECT '18', '08 - SL', 'Grade 5'  UNION ALL
    SELECT '22', '08 - SL', 'Grade 5'  UNION ALL
    SELECT '50', '08 - SL', 'Grade 5'  UNION ALL
    SELECT '54', '08 - SL', 'Grade 5'  UNION ALL
    SELECT '4', '11 - PSD', 'Grade 1'  UNION ALL
    SELECT '32', '11 - PSD', 'Grade 1'  UNION ALL
    SELECT '40', '11 - PSD', 'Grade 1'  UNION ALL
    SELECT '47', '11 - PSD', 'Grade 1'  UNION ALL
    SELECT '6', '13 - AUT', 'Grade 1'  UNION ALL
    SELECT '17', '13 - AUT', 'Grade 1'  UNION ALL
    SELECT '42', '13 - AUT', 'Grade 1'  UNION ALL
    SELECT '49', '13 - AUT', 'Grade 1'  UNION ALL
    SELECT '53', '13 - AUT', 'Grade 1'  UNION ALL
    SELECT '1', '14 - TBI', 'Grade 2'  UNION ALL
    SELECT '29', '14 - TBI', 'Grade 2'  UNION ALL
    SELECT '37', '14 - TBI', 'Grade 2'  UNION ALL
    SELECT '44', '14 - TBI', 'Grade 2';
    GO

That populates 29 students into #tmpParams and 62 into #tmpSource. My real-world scenario is, of course, much more complex.

I can get 29 random students from #tmpSource easily enough...but how do I JOIN on #tmpParams to get 29 students that match on the two fields?

   SELECT TOP 29
       TS.StudentID
       ,TS.Disability
       ,TS.Grade
   FROM #tmpSource AS TS
   ORDER BY NEWID();
   GO

As always, thank you for your help, and please let me know if I can clarify something or make this easier to read.


I may be missing something but why not

SELECT TOP 29 TS.StudentID,
              TS.Disability,
              TS.Grade
FROM   #tmpParams P
       JOIN #tmpSource TS
         ON TS.Disability = P.Disability
            AND TS.Grade = P.Grade
ORDER  BY NEWID()

Edit

You want this?

SELECT TS.StudentID,
       TS.Disability,
       TS.Grade
FROM   #tmpParams P
       CROSS APPLY (SELECT TOP 1 *
                    FROM   #tmpSource TS
                    WHERE  TS.Disability = P.Disability
                           AND TS.Grade = P.Grade
                    ORDER  BY NEWID()) TS  

Edit 2

You want this?

 ;WITH T1
     AS (SELECT *,
                Row_number() OVER (PARTITION BY Disability, Grade ORDER BY Newid()) RN
         FROM   #tmpSource),
     T2
     AS (SELECT Disability,
                Grade,
                Row_number() OVER (PARTITION BY Disability, Grade 
                                       ORDER BY (SELECT 0)) AS RN
         FROM   #tmpParams)
SELECT T1.Disability,
       T1.Grade,
       T1.StudentID
FROM   T1
       JOIN T2
         ON T1.Disability = T2.Disability
            AND T2.Grade = T1.Grade
            AND T1.RN = T2.RN  


with sample as
(
   select p.disability, p.grade, count(*) as numOfSamples from #tmpParams P
   group by p.disability, p.grade
),
rownumedStuds as
(
   select std.id, std.disability, std.grade, row_number() over (partition by std.disability, std.grade order by newID()) num
)
select a.id,a.disability, a.grade 
from sample b
inner join rownumedStuds a on a.grade = b.grade and a.disability = b.disability
where num <= numOfSamples

Basically it does calculate number of buckets and number of matches then randomizes each group (if this is perfomance heavy You might loop join it with only the categoriez that are need in the rownumedStuds to randomize only groups You need.

the actual select I hope is pretty straighforward

regards luke


I may have figured this out, although it doesn't appear very elegant:

 /* Add identity column to #tmpParams */
 ALTER TABLE #tmpParams ADD ID INT IDENTITY (1,1)

 /* drop #tmpResults if exists */
       IF OBJECT_ID('tempdb..#tmpResults') IS NOT NULL 
             BEGIN
                   DROP TABLE #tmpResults
             END
       GO

 /* Create table to hold results */
 CREATE TABLE #tmpResults
  ( 
     StudentID          int
     ,Disability     varchar(10)
     ,Grade          varchar(10)
  )
 GO 

 DECLARE @i AS INT; -- set variable to parse through 
 SET @i = 1
 WHILE @i <= (SELECT MAX(ID) FROM #tmpParams) -- Max number in table
 BEGIN
     INSERT INTO #tmpResults
         SELECT TOP 1
             TS.StudentID
             ,TS.Disability
             ,TS.Grade
         FROM #tmpSource AS TS
         JOIN #tmpParams AS TP
             ON TS.Disability = TP.Disability
             AND TS.Grade = TP.Grade
         WHERE TP.ID = @i 
             AND TS.StudentID NOT IN (SELECT StudentID FROM #tmpResults)  -- so no duplicates
         ORDER BY NEWID()
     SET @i = @i + 1
 END;
 GO

Thanks for your help, Martin, it got me on the right track.

0

精彩评论

暂无评论...
验证码 换一张
取 消