I have a quite long and (for me) complex question. I have voting data from the Council of the European Union, where the voting behaviour for each country has been coded according to a nominal scale:
0: yes
1: no
2: Abstention
3: no with a negative statement
4: Abstention with a negative statement
5: yes with a negative statement
The data is in the following format (see the end of the post for a dump of 20 observations from the data set):
Country1 Country2 Country3 ... Date
1 0 0 ... 2004-12-12
1 2 0 ... 2003-02-14
2 0 1 ... 2004-05-22
... ... ... ... ...
First of all I would like to aggregate the data into monthly intervals, where for each month we have a sum of how many 0,1,2 etc there were for each country. Ideally the data should look like this:
Month Country sum of 0s sum of 1s Sum of 2s
January Country1 2 0 1
January Country2 4 0 0
... ... ... ... ...
Once this has been done I would like to put the data into panel format like this:
Country Month sum of 0s sum of 1s sum of 2s
Country1 January 2 0 1
Country1 February 0 1 3
... ... ... ... ...
Country2 January 4 0 0
Country2 February 2 2 0
... ... ... ... ...
I am sorry if this is a very time consuming question, but I have been playing around with aggregate, by and different apply functions forever, without being able to get the desired result. Any help will be greatly appreciated!
20 observations from the data set (output from the dput() function):
structure(list(Recitals = c(29L, 13L, 2L, 20L, 10L, 18L, 29L,
6L, 4L, 16L, 7L, 6L, 12L, 23L, 6L, 10L, 2L, 6L, 9L, 8L, 7L),
Voting_Rule = structure(c(4L, 4L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 4L, 4L, 4L), .Label = c("0",
"Qualified Majority", "Simple Majority", "Unanimity", "Qualified majority",
"Simple majority"), class = "factor"), Belgium = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Denmark = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
Czech.Republic = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Germany = c(0L,
0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Estonia = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Greece = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Spain = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), France = c(0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Ireland = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Italy = c(0L, 0L, 0L, 0L, 0L, 0L, 5L,
0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
Cyprus = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Latvia = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Lithuania = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Luxembourg = c(0L,
0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Hungary = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Malta = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Netherlands = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Austria = c(0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Poland = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Portugal = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Slovenia = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 开发者_C百科0L, 0L, 0L, 0L, 0L
), Slovakia = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Finland = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Sweden = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
UK = c(0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 5L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Dates = structure(c(12716,
12716, 12716, 12674, 12674, 12698, 12705, 12724, 12738, 12738,
12716, 12741, 12744, 12754, 12754, 12758, 12758, 12758, 12759,
12759, 12759), class = "Date")), .Names = c("Recitals", "Voting_Rule",
"Belgium", "Denmark", "Czech.Republic", "Germany", "Estonia",
"Greece", "Spain", "France", "Ireland", "Italy", "Cyprus", "Latvia",
"Lithuania", "Luxembourg", "Hungary", "Malta", "Netherlands",
"Austria", "Poland", "Portugal", "Slovenia", "Slovakia", "Finland",
"Sweden", "UK", "Dates"), row.names = c(752L, 753L, 762L, 774L,
775L, 776L, 777L, 780L, 789L, 790L, 793L, 794L, 797L, 816L, 817L,
818L, 819L, 820L, 824L, 825L, 826L), class = "data.frame")
Hadley's reshape2 and plyr package would greatly help your work:
library(reshape2)
# load your data into variable d1
# d1 <- struct( ... your data ...)
# first, melt the data.
# molten data is very very useful for further transformation.
d2 <- reshape2:::melt.data.frame(d1[,-(1:2)], # drop unused variables
id.vars="Dates",
variable.name="Country")
# create a variable "Month" from Date
d2$Month <- months(d2$Date)
# cast the data to the desired format using appropriate function (here, length())
d3 <- dcast(d2, Country+Month~value, length)
# finally change the names of newly created variables
names(d3)[-(1:2)] <- sprintf("sum of %ss", names(d3)[-(1:2)])
精彩评论