Here is an option with data.table_1.9.5
. Create the "data.table" from "data.frame" using setDT
, remove the NA
values in "dialled" column (!is.na(dialled)
), generate grouping variable by using rleid
on "Dialled_nbr", get the row index of the first and last rows for the levels of grouping variable (.I(c(1L, .N)]
), finally subset the "dt1" based on the row index.
library(data.table)
dt1 <- setDT(df)[!is.na(dialled)]
dt1[dt1[,.I[c(1L, .N)],rleid(dialled)]$V1]
# dialled Ringing state duration
#1: 123 NA NA 0
#2: 123 NA NA 60
#3: 222 NA inactive 0
#4: 222 NA inactive 37
#5: 123 NA inactive 0
#6: 123 NA active 60
Or using base R
df1 <- df[!is.na(df$dialled),]
grp<- inverse.rle(within.list(rle(df1$dialled),
values <- seq_along(values)))
df1[!duplicated(grp)|!duplicated(grp,fromLast=TRUE),]
# dialled Ringing state duration
#5 123 NA <NA> 0
#8 123 NA <NA> 60
#19 222 NA inactive 0
#21 222 NA inactive 37
#25 123 NA inactive 0
#27 123 NA active 60
Update
Based on the new dataset,
grp <- cumsum(c(TRUE,df$duration[-nrow(df)]!=0))
df[!duplicated(grp)|!duplicated(grp,fromLast=TRUE),]
# dialled Ringing state duration
#1 123 NA <NA> 0
#3 123 NA <NA> 60
#4 123 NA <NA> 0
#6 123 NA <NA> 70
#7 222 NA inactive 0
#9 222 NA inactive 37
#10 123 NA inactive 0
#12 123 NA active 60
data
df <- structure(list(dialled = c(NA, NA, NA, NA, 123L, 123L, 123L,
123L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 222L, 222L, 222L,
NA, NA, NA, 123L, 123L, 123L, NA), Ringing = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 145L, 145L, 145L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), state = c(NA, NA, NA,
NA, NA, NA, NA, NA, "active", "active", "inactive", "inactive",
"inactive", "inactive", "inactive", "active", "active", "inactive",
"inactive", "inactive", "inactive", "active", "active", "inactive",
"inactive", "inactive", "active", "active"), duration = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 60L, 0L, 0L, 0L, 0L, 0L, 0L, 56L, 0L,
0L, 0L, 0L, 0L, 37L, 0L, 0L, 0L, 0L, 0L, 60L, 0L)), .Names =
c("dialled", "Ringing", "state", "duration"), class = "data.frame",
row.names = c(NA, -28L))
newdata
df <- structure(list(dialled = c(123L, 123L, 123L, 123L, 123L, 123L,
222L, 222L, 222L, 123L, 123L, 123L), Ringing = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), state = c(NA, NA, NA, NA,
NA, NA, "inactive", "inactive", "inactive", "inactive", "inactive",
"active"), duration = c(0L, 0L, 60L, 0L, 0L, 70L, 0L, 0L, 37L,
0L, 0L, 60L)), .Names = c("dialled", "Ringing", "state", "duration"
), class = "data.frame", row.names = c(NA, -12L))