create a loop

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

create a loop

Marna Wagley
Hi R Users,
I do have very big data sets and wanted to run some of the analyses many
times with randomization (1000 times).
I have done the analysis using an example data but it need to be done with
randomized data (1000 times). I am doing manually for 10000 times but
taking so much time, I wonder whether it is possible to perform the
analysis with creating a loop for many replicated datasets?  The code and
the example data sets are attached.

I will be very grateful if someone help me to create the loop for the
following example data and the analyses.

I appreciate  your help.


MW

#####

dat1<-structure(list(RegionA = structure(c(1L, 1L, 2L, 3L, 3L, 4L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("Ra", "Rb", "Rc",
"Rd", "Re", "Rf"), class = "factor"), site = structure(c(1L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), .Label
= c("s1", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18",
"s19", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9"), class = "factor"),
temp = c(23L, 21L, 10L, 15L, 16L, 8L, 13L, 1L, 23L, 19L, 25L, 19L, 12L, 16L,
19L, 21L, 12L, 5L, 7L), group = structure(c(1L, 1L, 1L, 2L, 2L, 2L,

2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B",
"C"), class = "factor")), .Names = c("RegionA", "site", "temp", "group"),
class = "data.frame", row.names = c(NA, -19L))

head(dat1)


dat2<-structure(list(group = structure(1:3, .Label = c("A", "B", "C"

), class = "factor"), totalP = c(250L, 375L, 180L), sampled = c(25L,

37L, 27L)), .Names = c("group", "total.pop", "sampled.pop"), class =
"data.frame", row.names = c(NA,

-3L))


##

idx <- 1:nrow(dat1)

lll <- split(idx, dat1$group)


##########################

#Replication 1 create a resampled data

############################

Replication1<-dat1[unlist(lapply(lll, sample, rep=TRUE)),]


Summary.Rep1<-ddply(Replication1, c("group"), summarise,

N    = length(group),

       mean = mean(temp, na.rm=TRUE),

               sd   = sd(temp),

               se   = sd / sqrt(N),

               variance=sd^2

)

#merge two datasets (dat1 and dat2)

Rep1<-merge(Summary.Rep1, dat2, by="group")


#calclate adjusted mean. variance

Rep1$adj.mean<-(Rep1$total.pop*Rep1$mean)/sum(Rep1$total.pop)

Rep1$adj.var<-(Rep1$variance)/(Rep1$sampled.pop/(1-(Rep1$sampled.pop/Rep1$
total.pop)))

Rep1$over.adj.var<-(Rep1$total.pop/sum(Rep1$total.pop))^2*Rep1$adj.var


Rep1$total<-Rep1$adj.mean*(Rep1$total.pop)

##

Estimated.TotalTemp<-sum(Rep1$adj.mean)*sum(Rep1$total.pop)

Estimated.totalvar<-sum(Rep1$adj.var)

Estimated.SE<-sqrt(Estimated.totalvar)*sum(Rep1$total.pop)

RESULTS.R1<-data.frame(Estimated.TotalTemp, SE=Estimated.SE)

RESULTS.R1




##########################

#Replication 2 create a resampled data

############################

Replication2<-dat1[unlist(lapply(lll, sample, rep=TRUE)),]


Summary.Rep2<-ddply(Replication2, c("group"), summarise,

N    = length(group),

       mean = mean(temp, na.rm=TRUE),

               sd   = sd(temp),

               se   = sd / sqrt(N),

               variance=sd^2

)

#merge two datasets

Rep1<-merge(Summary.Rep2, dat2, by="group")


#calclate adjusted mean. variance

Rep2$adj.mean<-(Rep2$total.pop*Rep2$mean)/sum(Rep2$total.pop)

Rep2$adj.var<-(Rep2$variance)/(Rep2$sampled.pop/(1-(Rep2$sampled.pop/Rep2$
total.pop)))

Rep2$over.adj.var<-(Rep2$total.pop/sum(Rep2$total.pop))^2*Rep2$adj.var


Rep2$total<-Rep2$adj.mean*(Rep2$total.pop)


##

Estimated.TotalTemp<-sum(Rep2$adj.mean)*sum(Rep2$total.pop)

Estimated.totalvar<-sum(Rep2$adj.var)

Estimated.SE<-sqrt(Estimated.totalvar)*sum(Rep2$total.pop)

RESULTS.R2<-data.frame(Estimated.TotalTemp, SE=Estimated.SE)


##############################

#combined all results from 1000 runs

ALL.Results(Restult.R1, Result.R2....)

        [[alternative HTML version deleted]]

______________________________________________
[hidden email] mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Reply | Threaded
Open this post in threaded view
|

Re: create a loop

Ismail SEZEN
you sould look at "boot" package. also search "bootstrap R" keywords in
google.

20 Eki 2017 23:12 tarihinde "Marna Wagley" <[hidden email]> yazd─▒:

> Hi R Users,
> I do have very big data sets and wanted to run some of the analyses many
> times with randomization (1000 times).
> I have done the analysis using an example data but it need to be done with
> randomized data (1000 times). I am doing manually for 10000 times but
> taking so much time, I wonder whether it is possible to perform the
> analysis with creating a loop for many replicated datasets?  The code and
> the example data sets are attached.
>
> I will be very grateful if someone help me to create the loop for the
> following example data and the analyses.
>
> I appreciate  your help.
>
>
> MW
>
> #####
>
> dat1<-structure(list(RegionA = structure(c(1L, 1L, 2L, 3L, 3L, 4L, 5L, 5L,
> 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("Ra", "Rb", "Rc",
> "Rd", "Re", "Rf"), class = "factor"), site = structure(c(1L, 12L, 13L, 14L,
> 15L, 16L, 17L, 18L, 19L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), .Label
> = c("s1", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18",
> "s19", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9"), class = "factor"),
> temp = c(23L, 21L, 10L, 15L, 16L, 8L, 13L, 1L, 23L, 19L, 25L, 19L, 12L,
> 16L,
> 19L, 21L, 12L, 5L, 7L), group = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
>
> 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B",
> "C"), class = "factor")), .Names = c("RegionA", "site", "temp", "group"),
> class = "data.frame", row.names = c(NA, -19L))
>
> head(dat1)
>
>
> dat2<-structure(list(group = structure(1:3, .Label = c("A", "B", "C"
>
> ), class = "factor"), totalP = c(250L, 375L, 180L), sampled = c(25L,
>
> 37L, 27L)), .Names = c("group", "total.pop", "sampled.pop"), class =
> "data.frame", row.names = c(NA,
>
> -3L))
>
>
> ##
>
> idx <- 1:nrow(dat1)
>
> lll <- split(idx, dat1$group)
>
>
> ##########################
>
> #Replication 1 create a resampled data
>
> ############################
>
> Replication1<-dat1[unlist(lapply(lll, sample, rep=TRUE)),]
>
>
> Summary.Rep1<-ddply(Replication1, c("group"), summarise,
>
> N    = length(group),
>
>        mean = mean(temp, na.rm=TRUE),
>
>                sd   = sd(temp),
>
>                se   = sd / sqrt(N),
>
>                variance=sd^2
>
> )
>
> #merge two datasets (dat1 and dat2)
>
> Rep1<-merge(Summary.Rep1, dat2, by="group")
>
>
> #calclate adjusted mean. variance
>
> Rep1$adj.mean<-(Rep1$total.pop*Rep1$mean)/sum(Rep1$total.pop)
>
> Rep1$adj.var<-(Rep1$variance)/(Rep1$sampled.pop/(1-(Rep1$sampled.pop/Rep1$
> total.pop)))
>
> Rep1$over.adj.var<-(Rep1$total.pop/sum(Rep1$total.pop))^2*Rep1$adj.var
>
>
> Rep1$total<-Rep1$adj.mean*(Rep1$total.pop)
>
> ##
>
> Estimated.TotalTemp<-sum(Rep1$adj.mean)*sum(Rep1$total.pop)
>
> Estimated.totalvar<-sum(Rep1$adj.var)
>
> Estimated.SE<-sqrt(Estimated.totalvar)*sum(Rep1$total.pop)
>
> RESULTS.R1<-data.frame(Estimated.TotalTemp, SE=Estimated.SE)
>
> RESULTS.R1
>
>
>
>
> ##########################
>
> #Replication 2 create a resampled data
>
> ############################
>
> Replication2<-dat1[unlist(lapply(lll, sample, rep=TRUE)),]
>
>
> Summary.Rep2<-ddply(Replication2, c("group"), summarise,
>
> N    = length(group),
>
>        mean = mean(temp, na.rm=TRUE),
>
>                sd   = sd(temp),
>
>                se   = sd / sqrt(N),
>
>                variance=sd^2
>
> )
>
> #merge two datasets
>
> Rep1<-merge(Summary.Rep2, dat2, by="group")
>
>
> #calclate adjusted mean. variance
>
> Rep2$adj.mean<-(Rep2$total.pop*Rep2$mean)/sum(Rep2$total.pop)
>
> Rep2$adj.var<-(Rep2$variance)/(Rep2$sampled.pop/(1-(Rep2$sampled.pop/Rep2$
> total.pop)))
>
> Rep2$over.adj.var<-(Rep2$total.pop/sum(Rep2$total.pop))^2*Rep2$adj.var
>
>
> Rep2$total<-Rep2$adj.mean*(Rep2$total.pop)
>
>
> ##
>
> Estimated.TotalTemp<-sum(Rep2$adj.mean)*sum(Rep2$total.pop)
>
> Estimated.totalvar<-sum(Rep2$adj.var)
>
> Estimated.SE<-sqrt(Estimated.totalvar)*sum(Rep2$total.pop)
>
> RESULTS.R2<-data.frame(Estimated.TotalTemp, SE=Estimated.SE)
>
>
> ##############################
>
> #combined all results from 1000 runs
>
> ALL.Results(Restult.R1, Result.R2....)
>
>         [[alternative HTML version deleted]]
>
> ______________________________________________
> [hidden email] mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/
> posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>

        [[alternative HTML version deleted]]

______________________________________________
[hidden email] mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.