使用單迴圈做資料處理之問題

16 views
Skip to first unread message

本本

unread,
Sep 7, 2023, 2:51:57 PM9/7/23
to R軟體使用者論壇

下面是我想詢問的問題
我想將data1(col 3) 取代 data 2 (col1)的數值,前提是data1 的 col 1 或 col2 的數值等於 data 2 (col1),我想到的方法只有雙迴圈,有沒有辦法用單迴圈或which( )的方式找出解答?


data 1 (49892 row 3 col )


r/rstats - data base address question without nested loop

data 2 (49778 row 3 col )


r/rstats - data base address question without nested loop

here is my first approach


loc1=which(outside_data[,3]%in%genes_correspondence[,1])

loc2=which(outside_data[,3]%in%genes_correspondence[,2])

loc=unique(c(loc1, loc2))

outside_data_address=c()

for (i in 1:length(outside_data_overlape)) {

cat('now is handling', i, '-th pw', '\n')

found_match = FALSE

for (j in 1:nrow(genes_correspondence)) {

if (outside_data_overlape[i] %in% genes_correspondence[j, 1:2]) {

outside_data_address[i] <- genes_correspondence[j, 3]

found_match = TRUE

break

}

}

}

cbind(outside_data_address,outside_data[loc,c(5,6,8,9)])



this is the second approach


loc = which(as.character(genes_correspondence_final[,1]) %in% as.character(outside_data_address[,1]) |

as.character(genes_correspondence_final[,2]) %in% as.character(outside_data_address[,1]))


loc2 = which(as.character(outside_data_address[,1]) %in% as.character(genes_correspondence_final[,1]) |

as.character(outside_data_address[,1]) %in% as.character(genes_correspondence_final[,2]))


sample=genes_correspondence_final[loc,] #

sample2=outside_data_address[loc2,] #


sample3=numeric()

i=0

for (i in i+1:6000) {

for (j in i:6500){

cat('now is handling',i,'-th pw','\n')

sample3 = as.matrix(as.character(sample2[i,1])) %in% as.matrix(as.character(sample[j,1:2]))

if (sample3==T){sample2[i,1] =sample[j,3]}

}

if (i == 6000) {break}

}

辛苦您們了

WEPA ^_^

unread,
Sep 19, 2023, 12:53:35 AM9/19/23
to R軟體使用者論壇
Hi friend,

參考以下程式碼, 本例使用一層 for-loop, 判斷 data1的第1,2行資料.

另以下網址亦有提供相關方法 

# title: for-loop
# date: 2023.9.18
set.seed(168)

# 準備 data1, data2
data1 <- data.frame(x1 = c("no correspond", "Glyma03g", "Glyma0003s50", "Glyma06g", "Glyma07g", "Glyma11g", "Glyma15g", "no correspond", "Glyma18g"),
                    x2 = paste0("Glyma", sprintf("%004d", c(2,3,5,6,7,11,15,17,18))),
                    x3 = paste0("Glyma.", sprintf("%02d", c(2,3,5,6,7,11,15,17,18)), "g"))

data1$x1[7] <- "Glyma0006s50"
data1$x2[8] <- "Glyma0007s50"

data2 <- data.frame(y1 = paste0("Glyma", sprintf("%004d", c(2,3,5,6,7)), "s50"),
                    y2 = round(runif(5)*-1, 5),
                    y3 = round(runif(5), 9))

# 方法1
# 使用x1判斷, data1$x1 符合條件的指標
data1_col1_index <- which(data1$x1 %in% data2$y1)
data1_col1_index # 3 7

# data1$x1 符合條件的值
data1$x1[data1_col1_index] # "Glyma0003s50" "Glyma0006s50"

# data2$y1 符合條件的指標
data2_replace1_index <- which(data2$y1 %in% data1$x1[data1_col1_index]) # 2 4

# 將 data1$x3 取代 data2$y1
data2$y1[data2_replace1_index] <- data1$x3[data1_col1_index]

# 顯示取代之結果
data2

# 同理使用x2判斷, data1$x2 符合條件的指標

# 方法2 將上述程式改為迴圈 for 執行

# 準備 data1, data2
set.seed(168)

data1 <- data.frame(x1 = c("no correspond", "Glyma03g", "Glyma0003s50", "Glyma06g", "Glyma07g", "Glyma11g", "Glyma15g", "no correspond", "Glyma18g"),
                    x2 = paste0("Glyma", sprintf("%004d", c(2,3,5,6,7,11,15,17,18))),
                    x3 = paste0("Glyma.", sprintf("%02d", c(2,3,5,6,7,11,15,17,18)), "g"))

data1$x1[7] <- "Glyma0006s50"
data1$x2[8] <- "Glyma0007s50"

data2 <- data.frame(y1 = paste0("Glyma", sprintf("%004d", c(2,3,5,6,7)), "s50"),
                    y2 = round(runif(5)*-1, 5),
                    y3 = round(runif(5), 9))

# data1 比對的欄位指標
check_index <- c(1, 2)

# 建立 data2_new
data2_new <- data2

for (i in check_index) {
  data1_check_index <- which(data1[,i] %in% data2_new$y1) # 3 7
  data2_replace_index <- which(data2_new$y1 %in% data1[,i][data1_check_index]) # 2 4
  data2_new$y1[data2_replace_index] <- data1$x3[data1_check_index]
}
data1
data2
data2_new
# end

2023.09.18-for-loop.png

chrisli...@gmail.com 在 2023年9月8日 星期五凌晨2:51:57 [UTC+8] 的信中寫道:
Reply all
Reply to author
Forward
0 new messages