r - Sum of longest string of non-zero values -


i have dataframe containing daily rainfall values @ 76 stations 1964-2013. each row different month particular station. here snippet of dataframe-

 station     year month days 1   2   3  4 5   6  7  8 9 10 11 12 13 14 15 16 17 18  19 20 21 22 23  24 25 26 27 28 29  30  31 usc00020750 1964     1   31 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0  25  0 23 51 36   0  0  0  0  0  0   0   0 usc00020750 1964     2   29 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0  48  0  0  0  3   0  0  0  0  0  0 inf inf usc00020750 1964     3   31 0  46  51  0 0  36 41 46 0  0  0  0 43  0  0  0  0  0   0  0  0 53 99 140 36  0  0  0  0   0   0 usc00020750 1964     4   30 5  69  23 30 0  18  0  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0  33 13  0  0  0 15   0 inf usc00020750 1964     5   31 0   0   0  0 0   0 43  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0   0  0 51  8  0  0   0   0 usc00020750 1964     6   30 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0   0  0  0 38  0  0   0 inf usc00020750 1964     7   31 0   0   0  0 0   0  0  0 0  0  0  0 41  0 13 13  0  0   0  0  8 51  0  71  0 10  0  0 20 165  25 usc00020750 1964     8   31 8  30 137  0 0   5 89  0 0  0 18 64  5  0  0  0  0  0   0  0  0  0  0   0  0 76  0  0  0   0   0 usc00020750 1964     9   30 0   0   0  0 0 119  0  0 0  0  0  0  0 41 25  0  0  0   0  0 25  0  0   0  0  0  0  0  0   0 inf usc00020750 1964    10   31 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0   0  0  0  0  0  0   0   0 usc00020750 1964    11   30 0   5   0  0 0   0  0  0 0  0 91  0  0  0 36 94  0  0   0  0  0  0  0   0  0  0  0  0  0   0 inf usc00020750 1964    12   31 0 107  20  0 0   0  0  0 0  0  0  0  0  0  0  0  0 79 152  0  0  0  0   0  0  0  0  0  0   0   0 

...

station year month days  1  2   3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28  29  30  31 usw00093129 2013    10   31  0  0   0  0  0  0  0  0 43 15  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0 41   3   8   0 usw00093129 2013    11   30  0  0   0 23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3 79 18 20  0  0  0  0  0   0   0 inf usw00093129 2013    12   31  0  0 175 33  0  0  3  0  0  0  0  0  0  0  0  0  0  0  5 15  0  0  0  0  0  0  0  0   0   0   0 

i trying find length of longest stretch of non-zero rainfall values each row , total rainfall in stretch. easiest way find length of longest stretch convert dataframe 0s , 1s, use rle , apply max(y$lengths[y$values!=0]) along each row. how find sum of values? helping out, in advance!

not one-liner, works :

df <- read.table(header=true,stringsasfactors=false,check.names=false,text= "station     year month days 1   2   3  4 5   6  7  8 9 10 11 12 13 14 15 16 17 18  19 20 21 22 23  24 25 26 27 28 29  30  31 usc00020750 1964     1   31 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0  25  0 23 51 36   0  0  0  0  0  0   0   0 usc00020750 1964     2   29 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0  48  0  0  0  3   0  0  0  0  0  0 inf inf usc00020750 1964     3   31 0  46  51  0 0  36 41 46 0  0  0  0 43  0  0  0  0  0   0  0  0 53 99 140 36  0  0  0  0   0   0 usc00020750 1964     4   30 5  69  23 30 0  18  0  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0  33 13  0  0  0 15   0 inf usc00020750 1964     5   31 0   0   0  0 0   0 43  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0   0  0 51  8  0  0   0   0 usc00020750 1964     6   30 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0   0  0  0 38  0  0   0 inf usc00020750 1964     7   31 0   0   0  0 0   0  0  0 0  0  0  0 41  0 13 13  0  0   0  0  8 51  0  71  0 10  0  0 20 165  25 usc00020750 1964     8   31 8  30 137  0 0   5 89  0 0  0 18 64  5  0  0  0  0  0   0  0  0  0  0   0  0 76  0  0  0   0   0 usc00020750 1964     9   30 0   0   0  0 0 119  0  0 0  0  0  0  0 41 25  0  0  0   0  0 25  0  0   0  0  0  0  0  0   0 inf usc00020750 1964    10   31 0   0   0  0 0   0  0  0 0  0  0  0  0  0  0  0  0  0   0  0  0  0  0   0  0  0  0  0  0   0   0 usc00020750 1964    11   30 0   5   0  0 0   0  0  0 0  0 91  0  0  0 36 94  0  0   0  0  0  0  0   0  0  0  0  0  0   0 inf usc00020750 1964    12   31 0 107  20  0 0   0  0  0 0  0  0  0  0  0  0  0  0 79 152  0  0  0  0   0  0  0  0  0  0   0   0")  res <- lapply(1:nrow(df), function(r){   monthdays <- df[r,'days']   rain <- as.numeric(df[r,(1:monthdays) + 4])   enc <- rle(rain > 0)   if(all(!enc$values))     return(c(0,0))   len <- enc$lengths   len[!enc$values] <- 0   max.idx <- which.max(len)   lastidx <- cumsum(enc$lengths)[max.idx]   firstidx <- lastidx - enc$lengths[max.idx] + 1   tot <- sum(rain[firstidx:lastidx])   stretch <- lastidx - firstidx + 1   return(c(stretch,tot))   }) columnstoadd <- do.call(rbind,res) colnames(columnstoadd) <- c('stretchlen','stretchrain')  df2 <- cbind(df,columnstoadd) 

result :

# print result without months values better readability > df2[,-(5:35)]        station year month days stretchlen stretchrain 1  usc00020750 1964     1   31          3         110 2  usc00020750 1964     2   29          1          48 3  usc00020750 1964     3   31          4         328 4  usc00020750 1964     4   30          4         127 5  usc00020750 1964     5   31          2          59 6  usc00020750 1964     6   30          1          38 7  usc00020750 1964     7   31          3         210 8  usc00020750 1964     8   31          3         175 9  usc00020750 1964     9   30          2          66 10 usc00020750 1964    10   31          0           0 11 usc00020750 1964    11   30          2         130 12 usc00020750 1964    12   31          2         127 

btw, if want stick apply, :

columnstoadd <-  t(apply(df[,-(1:3)],margin=1,function(r){   monthdays <- r[1]   rain <- as.numeric(r[-1])   enc <- rle(rain > 0)   if(all(!enc$values))     return(c(0,0))   len <- enc$lengths   len[!enc$values] <- 0   max.idx <- which.max(len)   lastidx <- cumsum(enc$lengths)[max.idx]   firstidx <- lastidx - enc$lengths[max.idx] + 1   tot <- sum(rain[firstidx:lastidx])   stretch <- lastidx - firstidx + 1   return(c(stretch,tot))   }))  colnames(columnstoadd) <- c('stretchlen','stretchrain')  df2 <- cbind(df,columnstoadd) 

i don't using apply on data.frame's since has been created matrices , coerces columns same type before calling function (hence if work on columns of different types need careful).


Comments

Popular posts from this blog

javascript - Slick Slider width recalculation -

jsf - PrimeFaces Datatable - What is f:facet actually doing? -

angular2 services - Angular 2 RC 4 Http post not firing -