Hi
I've just written an R script which will do this. The output of the 'combo' command should be saved as a text file. The script reads this interactively using the 'scan' function. The tidyverse needs to be loaded (dplyr commands are used)
Here's the script:
# Read in data interactively. File should be a text file.
options(stringsAsFactors = FALSE)
script <- scan(file.choose(), what = "string", sep = "\n")
df <- as.data.frame(script)
# Create variable for type of line, e.g. "utt" = UTTERANCE, "mor" = MOR
df$type <- ""
# Function for identifying 'mor' line (by searching for downpipes)
has_downpipe <- function(string){
return(grepl("[\x7C]" , string))
}
library(tidyverse)
# Create variable showing line type (e.g. is it the 'mor' tier?)
for(i in 1:nrow(df)){
if(substr(df$script[i], 1, 8) == "*** File"){df$type[i] <- "file"}
if((substr(df$script[i], 1, 1) == "*") &
(substr(df$script[i], 5, 5) == ":")){df$type[i] <- "utt"}
if(has_downpipe(df$script[i])){df$type[i] <- "mor"}
}
# Fill down 'utt' (utterance over more than one line)
for(i in 2:nrow(df)){
if(df$type[i] == "" & df$type[i - 1] == "utt"){df$type[i] == "utt"}
}
# Remove non-essential lines and rename rows
df <- df[which(df$type != ""),]
row.names(df) <- seq(1, nrow(df))
# Create variable for filename
df$file <- ""
for(i in 1:nrow(df)){
if(df$type[i] == "file"){
vector <- unlist(gregexpr("[\x22]", df$script[i]))
start <- vector[1] + 1
stop <- vector[2] - 1
df$file[i] <- substr(df$script[i], start, stop)
}
}
# Create variables of utterance and mor line
df$utt <- ""
df$mor <- ""
file_row <- 1
for(i in 1:nrow(df)){
if(df$type[i] == "file"){file_row <- i}
if(df$type[i] == "utt"){df$utt[file_row] <- paste(df$utt[file_row], df$script[i])}
if(df$type[i] == "mor"){df$mor[file_row] <- paste(df$mor[file_row], df$script[i])}
}
# Remove blank lines and rename rows
df <- df[which(df$file != ""),]
row.names(df) <- seq(1, nrow(df))
# Drop unnecessary variables
df <- df %>% select(file, utt, mor)