I'm trying to anonymize all the data in my database, so I'm renaming all the people in it. I asked a similar question earlier, and was told to use NewID to force the creation of a new value per updated row, but in this situation it doesn't seem to be working.
What am I doing wrong?
-- Create Table Customer
CREATE TABLE #FirstName
(
ID int,
FirstName nvarchar(255) NULL,
Gender nvarchar(255) NULL
)
CREATE TABLE #LastName (
ID int,
LastName nvarchar(255)
)
-- BULK INSERT to import data from Text or CSV File
BULK INSERT #FirstName
FROM 'C:\Users\jhollon\Desktop\tmp\names\firstnames.lined.txt'
WITH
(
FIRSTROW = 1,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
BULK INSERT #LastName
FROM 'C:\Users\jhollon\Desktop\tmp\names\lastnames.lined.txt'
WITH
(
FIRSTROW = 1,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
/*SELECT FirstName FROM #FirstName WHERE ID = (
SELECT RandomNumber FROM (
SELECT ABS(CHECKSUM(NewID())) % 1500 AS RandomNumber FROM tblTenant WHERE Sex = '1'
) AS A
);*/
UPDATE tblTenant SET TenantName = (
SELECT LastName + ', ' + FirstName FROM
(SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT AB开发者_StackOverflow社区S(CHECKSUM(NewID())) % 500 + 1501)) AS A,
(SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '2';
UPDATE tblTenant SET TenantName = (
SELECT LastName + ', ' + FirstName FROM
(SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1)) AS A,
(SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '1';
DROP TABLE #FirstName;
DROP TABLE #LastName;
Correct. The subquery is evaluated once which is as advertised ("cachable scalar subquery")
Try this which uses NEWID as a derived table
UPDATE T
SET
TenantName = L.LastName + ', ' + F.FirstName
FROM
tblTenant T
CROSS APPLY
(SELECT TOP 1 UPPER(FirstName) as FirstName FROM #FirstName
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()) F
CROSS APPLY
(SELECT TOP 1 LastName FROM #LastName
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()) L
I'm not sure I understand your question, but if you want the ID to be unique values, you can make it an identity column. Ex:
[ID] [int] IDENTITY(1,1) NOT NULL
The code below demonstrates that without an inner to outer correlation, that the old name is not guaranteed to differ from the new name when using the CROSS APPLY answer above.
WHERE F.Id <> T.Id ORDER BY NEWID()
would be better within the FirstName CROSS APPLY
USE tempdb
GO
IF OBJECT_ID('tblTenant') IS NOT NULL
DROP TABLE tblTenant
GO
CREATE TABLE tblTenant
(
Id int,
FirstName nvarchar(20),
LastName nvarchar(20),
Gender bit
)
INSERT INTO tblTenant
VALUES (1, 'Bob' , 'Marley', 1),
(2, 'Boz' , 'Skaggs', 1)
SELECT DISTINCT FirstName
INTO #FirstNames
FROM tblTenant
SELECT DISTINCT LastName
INTO #LastNames
FROM tblTenant
-- There is a probability > 0 that a tenant's new name = tenants old name
SELECT
OldFirst = T.FirstName,
OldLast = T.LastName,
NewFirst = F.FirstName,
NewLast = L.LastName
FROM
tblTenant T
CROSS APPLY
(
SELECT TOP 1 UPPER(FirstName) AS FirstName
FROM #FirstNames
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()
) F
CROSS APPLY
(
SELECT TOP 1 LastName
FROM #LastNames
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()
) L
精彩评论