开发者

TSQL not generating a new value per row

开发者 https://www.devze.com 2023-03-01 09:38 出处:网络
I\'m trying to anonymize all the data in my database, so I\'m renaming all the people in it. I asked a similar question earlier, and was told to use NewID to force the creation of a new value per upda

I'm trying to anonymize all the data in my database, so I'm renaming all the people in it. I asked a similar question earlier, and was told to use NewID to force the creation of a new value per updated row, but in this situation it doesn't seem to be working.

What am I doing wrong?

-- Create Table Customer
CREATE TABLE #FirstName
(
    ID int,
    FirstName nvarchar(255) NULL,
    Gender nvarchar(255) NULL
)  

CREATE TABLE #LastName (
    ID int,
    LastName nvarchar(255)
)

-- BULK INSERT to import data from Text or CSV File
BULK INSERT #FirstName
FROM 'C:\Users\jhollon\Desktop\tmp\names\firstnames.lined.txt'
WITH
(
 FIRSTROW = 1,
 FIELDTERMINATOR = ',',
 ROWTERMINATOR = '\n'
)

BULK INSERT #LastName
FROM 'C:\Users\jhollon\Desktop\tmp\names\lastnames.lined.txt'
WITH
(
 FIRSTROW = 1,
 FIELDTERMINATOR = ',',
 ROWTERMINATOR = '\n'
)

/*SELECT FirstName FROM #FirstName WHERE ID = (
    SELECT RandomNumber FROM (
        SELECT ABS(CHECKSUM(NewID())) % 1500 AS RandomNumber FROM tblTenant WHERE Sex = '1'
        ) AS A
    );*/

UPDATE tblTenant SET TenantName = ( 
    SELECT LastName + ', ' + FirstName FROM 
        (SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT AB开发者_StackOverflow社区S(CHECKSUM(NewID())) % 500 + 1501)) AS A,
        (SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '2';

UPDATE tblTenant SET TenantName = ( 
    SELECT LastName + ', ' + FirstName FROM 
        (SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1)) AS A,
        (SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '1';

DROP TABLE #FirstName;
DROP TABLE #LastName;


Correct. The subquery is evaluated once which is as advertised ("cachable scalar subquery")

Try this which uses NEWID as a derived table

UPDATE T
SET
    TenantName =  L.LastName + ', ' + F.FirstName
FROM
   tblTenant T
   CROSS APPLY
   (SELECT TOP 1 UPPER(FirstName) as FirstName FROM #FirstName 
           WHERE CHECKSUM(NEWID()) <> T.ID
           ORDER BY NEWID()) F
   CROSS APPLY
   (SELECT TOP 1 LastName FROM #LastName
           WHERE CHECKSUM(NEWID()) <> T.ID
           ORDER BY NEWID()) L


I'm not sure I understand your question, but if you want the ID to be unique values, you can make it an identity column. Ex:

[ID] [int] IDENTITY(1,1) NOT NULL


The code below demonstrates that without an inner to outer correlation, that the old name is not guaranteed to differ from the new name when using the CROSS APPLY answer above.
WHERE F.Id <> T.Id ORDER BY NEWID() would be better within the FirstName CROSS APPLY

USE tempdb
GO           
IF OBJECT_ID('tblTenant') IS NOT NULL 
  DROP TABLE tblTenant
GO
CREATE TABLE tblTenant
(
  Id        int,
  FirstName nvarchar(20),
  LastName  nvarchar(20),
  Gender    bit
)
INSERT INTO tblTenant
VALUES (1, 'Bob'   , 'Marley', 1),
       (2, 'Boz'   , 'Skaggs', 1)

SELECT DISTINCT FirstName
INTO #FirstNames
FROM tblTenant

SELECT DISTINCT LastName
INTO #LastNames
FROM tblTenant

    -- There is a probability > 0 that a tenant's new name = tenants old name
    SELECT
      OldFirst = T.FirstName,
      OldLast  = T.LastName,
      NewFirst = F.FirstName,
      NewLast  = L.LastName
    FROM  
      tblTenant T

      CROSS APPLY
      (
        SELECT TOP 1 UPPER(FirstName) AS FirstName 
        FROM #FirstNames 
        WHERE CHECKSUM(NEWID()) <> T.ID
        ORDER BY NEWID()
      ) F

      CROSS APPLY
      (
        SELECT TOP 1 LastName 
        FROM #LastNames
        WHERE CHECKSUM(NEWID()) <> T.ID
        ORDER BY NEWID()
      ) L
0

精彩评论

暂无评论...
验证码 换一张
取 消