Summary functions to dataframe

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

Summary functions to dataframe

Mike Bock
I have written a few different summary functions. I want to calculate
the statistics by groups and I am having trouble getting the output as a
dataframe. I have attached one example with a small dataset that
calculates summary stats and percentiles, I have others that calculate
upper confidence limits etc. I would like the output to be converted to
a dataframe with one of the columns as the grouping variable. This seems
simple but my attempts with do.call("cbind") and rbind have not worked
so I have concluded I a missing something obvious. Any help is
appreciated.

Thanks,
Mike



areas <- structure (list(N_Type = structure(c(4, 1, 4, 1, 1, 4, 1, 4, 4,
1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1,
4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1,
4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4,
1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 1, 4, 1, 4, 4, 1, 4,
1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1,
4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4,
1, 4, 1, 4, 1, 1, 4, 1, 4, 2, 1, 2, 1, 1, 4, 1, 4, 1, 4, 4, 1,
4, 1), .Label = c("All", "Inside 370", "Not Applicable", "Outside 370"
), class = "factor"), AdRes = c(23.7, 23.7, 42.4, 42.4, 630,
630, 990, 990, 72.85, 72.85, 70.6, 70.6, 10, 10, 21.7, 21.7,
171.66, 171.66, 306, 306, 62.1, 62.1, 53.25, 53.25, 208, 208,
64.8, 64.8, 87.3, 87.3, 356, 356, 25.8, 25.8, 156, 156, 166,
166, 135.5, 135.5, 170.5, 170.5, 203, 203, 227.5, 227.5, 224,
224, 123, 123, 140.66, 140.66, 142.5, 142.5, 44.65, 44.65, 50.3,
50.3, 1320, 1320, 577, 577, 71.1, 71.1, 411, 411, 104, 104, 122,
122, 201, 201, 230, 230, 192, 192, 304, 304, 184.5, 184.5, 350,
350, 536, 536, 470.5, 470.5, 172, 172, 166, 166, 205, 205, 595,
595, 227.5, 227.5, 9.1, 9.1, 14.6, 14.6, 10.9, 10.9, 11.1, 11.1,
313.5, 313.5, 53.8, 53.8, 29.8, 29.8, 29.5, 29.5, 34.05, 34.05,
21.8, 21.8, 385.5, 385.5, 541, 541, 168, 168, 119, 119, 376,
376, 91.9, 91.9, 97.76, 97.76, 164, 164, 244, 244, 303.5, 303.5,
388, 388, 59.8, 59.8, 227.5, 227.5, 165, 165, 19.15, 19.15, 651,
651, 195, 195, 190, 190, 164, 164, 190, 190, 334, 334)), .Names =
c("N_Type",
"AdRes"), row.names = c("8956", "8957", "8972", "8973", "8974",
"8975", "8976", "8977", "8978", "8979", "8980", "8981", "8982",
"8983", "8984", "8985", "9159", "9160", "9175", "9176", "9177",
"9178", "9185", "9186", "9201", "9202", "9203", "9204", "9205",
"9206", "9207", "9208", "9209", "9210", "9217", "9218", "9233",
"9234", "9241", "9242", "9261", "9262", "9277", "9278", "9285",
"9286", "9301", "9302", "9309", "9310", "9329", "9330", "9345",
"9346", "9353", "9354", "9369", "9370", "9371", "9372", "9373",
"9374", "9410", "9411", "9412", "9413", "9414", "9415", "9422",
"9423", "9424", "9425", "9426", "9427", "9428", "9429", "9430",
"9431", "9432", "9433", "9434", "9435", "9436", "9437", "9444",
"9445", "9452", "9453", "9454", "9455", "9456", "9457", "9458",
"9459", "9460", "9461", "9468", "9469", "9470", "9471", "9472",
"9473", "9474", "9475", "9476", "9477", "9478", "9479", "9480",
"9481", "9488", "9489", "9496", "9497", "9498", "9499", "9720",
"9721", "9722", "9723", "9724", "9725", "9726", "9727", "9728",
"9729", "9730", "9731", "9732", "9733", "9734", "9735", "9736",
"9737", "9738", "9739", "9740", "9741", "9742", "9743", "9744",
"9745", "9746", "9747", "9748", "9749", "9750", "9751", "9752",
"9753", "9754", "9755", "9756", "9757", "9758", "9759", "9760",
"9761"), class = "data.frame")


Pstats <- function(x)
                {
    Max = max(x)
    Min = min(x)
                AMean = mean(x)
                AStdev = sd(x)
                Samples <- length(x)
                p10 <- quantile(x,0.1,na.rm = TRUE, names = FALSE)
                p20 <- quantile(x,0.2,na.rm = TRUE, names = FALSE)
                p30 <- quantile(x,0.3,na.rm = TRUE, names = FALSE)
                p40 <- quantile(x,0.4,na.rm = TRUE, names = FALSE)
                p50 <- quantile(x,0.5,na.rm = TRUE, names = FALSE)
                p60 <- quantile(x,0.6,na.rm = TRUE, names = FALSE)
                p70 <- quantile(x,0.7,na.rm = TRUE, names = FALSE)
                p80 <- quantile(x,0.8,na.rm = TRUE, names = FALSE)
                p90 <- quantile(x,0.9,na.rm = TRUE, names = FALSE)
    Result <- data.frame(Samples,AMean,AStdev,
Min,Max,p10,p20,p30,p40,p50,p60,p70,p80,p90)
    return(Result)
    #write.table(Result, file = "Results.csv", sep = ",",row.names =
FALSE)
        }

attach(areas)
res <- by(areas, N_Type, function (x)
  (Pstats(AdRes)))

#need to convert res to a dataframe



Michael Bock, PhD
ENVIRON International Corporation
136 Commercial Street, Suite 402
Portland, ME 04101
phone: 207.347.4413
fax: 207.347.4384




This message contains information that may be confidential, ...{{dropped}}

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
Reply | Threaded
Open this post in threaded view
|

Re: Summary functions to dataframe

Gabor Grothendieck
Try this:

Pstats <- function(x) c(Max = max(x),
   Min = min(x),
   AMean = mean(x),
   AStdev = sd(x),
   Samples = length(x),
   quantile(x, 1:9/10, na.rm = TRUE))

res <- with(areas, by(AdRes, N_Type, Pstats))
do.call("rbind", res)

Also, check out summaryBy in the doBy package at
http://genetics.agrsci.dk/~sorenh/misc/index.html



On 1/3/06, Mike Bock <[hidden email]> wrote:

> I have written a few different summary functions. I want to calculate
> the statistics by groups and I am having trouble getting the output as a
> dataframe. I have attached one example with a small dataset that
> calculates summary stats and percentiles, I have others that calculate
> upper confidence limits etc. I would like the output to be converted to
> a dataframe with one of the columns as the grouping variable. This seems
> simple but my attempts with do.call("cbind") and rbind have not worked
> so I have concluded I a missing something obvious. Any help is
> appreciated.
>
> Thanks,
> Mike
>
>
>
> areas <- structure (list(N_Type = structure(c(4, 1, 4, 1, 1, 4, 1, 4, 4,
> 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1,
> 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1,
> 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4,
> 1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 1, 4, 1, 4, 4, 1, 4,
> 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1,
> 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4,
> 1, 4, 1, 4, 1, 1, 4, 1, 4, 2, 1, 2, 1, 1, 4, 1, 4, 1, 4, 4, 1,
> 4, 1), .Label = c("All", "Inside 370", "Not Applicable", "Outside 370"
> ), class = "factor"), AdRes = c(23.7, 23.7, 42.4, 42.4, 630,
> 630, 990, 990, 72.85, 72.85, 70.6, 70.6, 10, 10, 21.7, 21.7,
> 171.66, 171.66, 306, 306, 62.1, 62.1, 53.25, 53.25, 208, 208,
> 64.8, 64.8, 87.3, 87.3, 356, 356, 25.8, 25.8, 156, 156, 166,
> 166, 135.5, 135.5, 170.5, 170.5, 203, 203, 227.5, 227.5, 224,
> 224, 123, 123, 140.66, 140.66, 142.5, 142.5, 44.65, 44.65, 50.3,
> 50.3, 1320, 1320, 577, 577, 71.1, 71.1, 411, 411, 104, 104, 122,
> 122, 201, 201, 230, 230, 192, 192, 304, 304, 184.5, 184.5, 350,
> 350, 536, 536, 470.5, 470.5, 172, 172, 166, 166, 205, 205, 595,
> 595, 227.5, 227.5, 9.1, 9.1, 14.6, 14.6, 10.9, 10.9, 11.1, 11.1,
> 313.5, 313.5, 53.8, 53.8, 29.8, 29.8, 29.5, 29.5, 34.05, 34.05,
> 21.8, 21.8, 385.5, 385.5, 541, 541, 168, 168, 119, 119, 376,
> 376, 91.9, 91.9, 97.76, 97.76, 164, 164, 244, 244, 303.5, 303.5,
> 388, 388, 59.8, 59.8, 227.5, 227.5, 165, 165, 19.15, 19.15, 651,
> 651, 195, 195, 190, 190, 164, 164, 190, 190, 334, 334)), .Names =
> c("N_Type",
> "AdRes"), row.names = c("8956", "8957", "8972", "8973", "8974",
> "8975", "8976", "8977", "8978", "8979", "8980", "8981", "8982",
> "8983", "8984", "8985", "9159", "9160", "9175", "9176", "9177",
> "9178", "9185", "9186", "9201", "9202", "9203", "9204", "9205",
> "9206", "9207", "9208", "9209", "9210", "9217", "9218", "9233",
> "9234", "9241", "9242", "9261", "9262", "9277", "9278", "9285",
> "9286", "9301", "9302", "9309", "9310", "9329", "9330", "9345",
> "9346", "9353", "9354", "9369", "9370", "9371", "9372", "9373",
> "9374", "9410", "9411", "9412", "9413", "9414", "9415", "9422",
> "9423", "9424", "9425", "9426", "9427", "9428", "9429", "9430",
> "9431", "9432", "9433", "9434", "9435", "9436", "9437", "9444",
> "9445", "9452", "9453", "9454", "9455", "9456", "9457", "9458",
> "9459", "9460", "9461", "9468", "9469", "9470", "9471", "9472",
> "9473", "9474", "9475", "9476", "9477", "9478", "9479", "9480",
> "9481", "9488", "9489", "9496", "9497", "9498", "9499", "9720",
> "9721", "9722", "9723", "9724", "9725", "9726", "9727", "9728",
> "9729", "9730", "9731", "9732", "9733", "9734", "9735", "9736",
> "9737", "9738", "9739", "9740", "9741", "9742", "9743", "9744",
> "9745", "9746", "9747", "9748", "9749", "9750", "9751", "9752",
> "9753", "9754", "9755", "9756", "9757", "9758", "9759", "9760",
> "9761"), class = "data.frame")
>
>
> Pstats <- function(x)
>                {
>    Max = max(x)
>    Min = min(x)
>                AMean = mean(x)
>                AStdev = sd(x)
>                Samples <- length(x)
>                p10 <- quantile(x,0.1,na.rm = TRUE, names = FALSE)
>                p20 <- quantile(x,0.2,na.rm = TRUE, names = FALSE)
>                p30 <- quantile(x,0.3,na.rm = TRUE, names = FALSE)
>                p40 <- quantile(x,0.4,na.rm = TRUE, names = FALSE)
>                p50 <- quantile(x,0.5,na.rm = TRUE, names = FALSE)
>                p60 <- quantile(x,0.6,na.rm = TRUE, names = FALSE)
>                p70 <- quantile(x,0.7,na.rm = TRUE, names = FALSE)
>                p80 <- quantile(x,0.8,na.rm = TRUE, names = FALSE)
>                p90 <- quantile(x,0.9,na.rm = TRUE, names = FALSE)
>    Result <- data.frame(Samples,AMean,AStdev,
> Min,Max,p10,p20,p30,p40,p50,p60,p70,p80,p90)
>    return(Result)
>    #write.table(Result, file = "Results.csv", sep = ",",row.names =
> FALSE)
>        }
>
> attach(areas)
> res <- by(areas, N_Type, function (x)
>  (Pstats(AdRes)))
>
> #need to convert res to a dataframe
>
>
>
> Michael Bock, PhD
> ENVIRON International Corporation
> 136 Commercial Street, Suite 402
> Portland, ME 04101
> phone: 207.347.4413
> fax: 207.347.4384
>
>
>
>
> This message contains information that may be confidential, ...{{dropped}}
>
> ______________________________________________
> [hidden email] mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
>

______________________________________________
[hidden email] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html