Code>cloneでurlをコピーNew Project>Version Control>Git>Repositry URL
にurl貼り付けてCreate Project## [1] 1 2 3
## [1] 1 3 5 7 9
## [1] 1 1 1
## [1] 1 2 3 1 2 3 1 2 3
## [1] 1 1 1 2 2 2 3 3 3
## [1] "a" "d"
## [1] "a" "d"
## [1] │ <1>
## [2] │ <1><1>
## [3] │ <1><1><1>
## [1] │ <1>
## [2] │ <1><1>
## [3] │ <1><1><1>
## [1] │ <1>
## [2] │ <1><1>
## [3] │ <1><1><1>
## [1] │ 1
## [2] │ 11
## [3] │ 111
## [4] │ <l>
## [5] │ |
## [6] │ <o><n><e>
## [7] │ 一
## [1] │ <1>
## [2] │ <1><1>
## [3] │ <1><1><1>
## [4] │ <l>
## [6] │ <o><n><e>
## [1] │ <1>
## [2] │ <1><1>
## [3] │ <1><1><1>
## [4] │ <l>
## [5] │ |
## [6] │ <o><n><e>
## [7] │ 一
## [2] │ <test_test>
## [3] │ <test_test>_test
## [5] │ <test_test>test
## [6] │ test<test_test>
## [7] │ <test_test>_
## [2] │ <test_test>
## [3] │ <test_test>_test
## [5] │ <test_test>test
## [6] │ test<test_test>
## [7] │ <test_test>_
## [8] │ <teest_teest>
## [4] │ <testtest>
## [5] │ test_<testtest>
## [6] │ <testtest>_test
## [6] │ <testtest_test>
mtcars_vsam <-
mtcars |>
summarise(across(c(vs, am),
list(
p = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(x, na.rm = TRUE),
total_n = \(x) sum(!is.na(x))
))
)
mtcars_vsam## vs_p vs_n vs_total_n am_p am_n am_total_n
## 1 0.4375 14 32 0.40625 13 32
# 降順に並び替えて1行目を取得
max_value <-
names(mtcars_vsam) |>
str_count() |>
as_tibble() |>
arrange(desc(value))
max_value## # A tibble: 6 × 1
## value
## <int>
## 1 10
## 2 10
## 3 4
## 4 4
## 5 4
## 6 4
# 文字数の最大値取得
max_value <-
max_value |>
slice(1)
# total_pの前までで切る情報を追加したいため、最大値-total_pの文字数計算
max_value - str_count("total_p")## value
## 1 3
mtcars_vsam |>
pivot_longer(everything(),
names_to = c("items", ".value"),
names_pattern = "(.{1,2})_(.+)") # _の前は1~2字までに限定## # A tibble: 2 × 4
## items p n total_n
## <chr> <dbl> <dbl> <int>
## 1 vs 0.438 14 32
## 2 am 0.406 13 32
https://r4ds.had.co.nz/strings.html#repetition
## [1] │ <x><x><x><x><x>
## [1] │ <xxx>xx
## [1] │ <xx>_<xxx>
## [2] │ x_<xx>_<xxx>_<xxxx>_<xxxx>x
## [1] │ <xx>_<xxx>
## [2] │ x_<xx>_<xxx>_<xxxx>_<xxxx>x
df_peng_res <-
df_peng |>
summarise(across(ends_with("_m"),
list( p = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(x, na.rm = TRUE), # bill_length_mm_mが1のn
total_n = \(x) sum(!is.na(x)))) # bill_length_mm_mのna以外のn(total)
)
# 変数名の文字の長さ確認
# 降順に並び替えて1行目を取得
max_value <-
names(df_peng_res) |>
str_count() |>
as_tibble() |>
arrange(desc(value))
max_value## # A tibble: 9 × 1
## value
## <int>
## 1 27
## 2 24
## 3 23
## 4 21
## 5 21
## 6 18
## 7 18
## 8 17
## 9 17
# 文字数の最大値取得
max_value <-
max_value |>
slice(1)
# total_pの前までで切る情報を追加したいため、最大値-total_pの文字数計算
max_value - str_count("total_p")## value
## 1 20
df_peng_res |>
pivot_longer(everything(),
names_to = c("items", ".value"),
names_pattern = "(.{1,20})_(.+)") # _の前は1~20字までに限定## # A tibble: 3 × 4
## items p n total_n
## <chr> <dbl> <dbl> <int>
## 1 bill_length_mm_m 0.512 175 342
## 2 bill_depth_mm_m 0.532 182 342
## 3 flipper_length_mm_m 0.433 148 342
## # A tibble: 3 × 2
## moji n
## <chr> <int>
## 1 a 3
## 2 c 2
## 3 <NA> 1
## moji n percent valid_percent
## a 3 0.5000000 0.6
## c 2 0.3333333 0.4
## <NA> 1 0.1666667 NA
## # A tibble: 3 × 2
## moji n
## <fct> <int>
## 1 a 3
## 2 c 2
## 3 <NA> 1
## moji n percent valid_percent
## a 3 0.5000000 0.6
## b 0 0.0000000 0.0
## c 2 0.3333333 0.4
## <NA> 1 0.1666667 NA
## # A tibble: 4 × 2
## moji n
## <fct> <int>
## 1 a 3
## 2 b 0
## 3 c 2
## 4 <NA> 1
## gear vs n
## 1 3 0 12
## 2 3 1 3
## 3 4 0 2
## 4 4 1 10
## 5 5 0 4
## 6 5 1 1
## # A tibble: 8 × 3
## species sex n
## <fct> <fct> <int>
## 1 Adelie female 73
## 2 Adelie male 73
## 3 Adelie <NA> 6
## 4 Chinstrap female 34
## 5 Chinstrap male 34
## 6 Gentoo female 58
## 7 Gentoo male 61
## 8 Gentoo <NA> 5
## # A tibble: 8 × 4
## # Groups: species [3]
## species sex n perc
## <fct> <fct> <int> <dbl>
## 1 Adelie female 73 48.0
## 2 Adelie male 73 48.0
## 3 Adelie <NA> 6 3.95
## 4 Chinstrap female 34 50
## 5 Chinstrap male 34 50
## 6 Gentoo female 58 46.8
## 7 Gentoo male 61 49.2
## 8 Gentoo <NA> 5 4.03
penguins |>
drop_na(species, sex) |>
count(species, sex) |>
group_by(species) |>
mutate(perc = n/sum(n)*100)## # A tibble: 6 × 4
## # Groups: species [3]
## species sex n perc
## <fct> <fct> <int> <dbl>
## 1 Adelie female 73 50
## 2 Adelie male 73 50
## 3 Chinstrap female 34 50
## 4 Chinstrap male 34 50
## 5 Gentoo female 58 48.7
## 6 Gentoo male 61 51.3
## gear 0 1
## 3 12 3
## 4 2 10
## 5 4 1
## gear 0 1
## 3 0.8000000 0.2000000
## 4 0.1666667 0.8333333
## 5 0.8000000 0.2000000
## gear 0 1
## 3 0.8 (12) 0.2 (3)
## 4 0.166666666666667 (2) 0.833333333333333 (10)
## 5 0.8 (4) 0.2 (1)
## gear 0 1
## 3 80.0% (12) 20.0% (3)
## 4 16.7% (2) 83.3% (10)
## 5 80.0% (4) 20.0% (1)
## # A tibble: 13 × 5
## # Groups: island, species [5]
## island species sex n perc
## <fct> <fct> <fct> <int> <dbl>
## 1 Biscoe Adelie female 22 50
## 2 Biscoe Adelie male 22 50
## 3 Biscoe Gentoo female 58 46.8
## 4 Biscoe Gentoo male 61 49.2
## 5 Biscoe Gentoo <NA> 5 4.03
## 6 Dream Adelie female 27 48.2
## 7 Dream Adelie male 28 50
## 8 Dream Adelie <NA> 1 1.79
## 9 Dream Chinstrap female 34 50
## 10 Dream Chinstrap male 34 50
## 11 Torgersen Adelie female 24 46.2
## 12 Torgersen Adelie male 23 44.2
## 13 Torgersen Adelie <NA> 5 9.62
| species | female | male | All | |
|---|---|---|---|---|
| Adelie | N | 73 | 73 | 152 |
| % row | 48.0 | 48.0 | 100.0 | |
| Chinstrap | N | 34 | 34 | 68 |
| % row | 50.0 | 50.0 | 100.0 | |
| Gentoo | N | 58 | 61 | 124 |
| % row | 46.8 | 49.2 | 100.0 | |
| All | N | 165 | 168 | 344 |
| % row | 48.0 | 48.8 | 100.0 |
## [1] 2
## [1] 3
{}不要## [1] 11
## [1] 101
## [1] 5
( )で囲むと実行できる## [1] 2
## function (.data, ...)
## {
## UseMethod("select")
## }
## <bytecode: 0x0000021e49b55b70>
## <environment: namespace:dplyr>
methods()で調べるといくつか候補が出る## [1] select.data.frame* select.list
## see '?methods' for accessing help and source code
:::(注:コロン3つ)をつけてmethodsで調べた対象を入力して実行するとコードが確認できる## function (.data, ...)
## {
## error_call <- dplyr_error_call()
## loc <- tidyselect::eval_select(expr(c(...)), data = .data,
## error_call = error_call)
## loc <- ensure_group_vars(loc, .data, notify = TRUE)
## out <- dplyr_col_select(.data, loc)
## out <- set_names(out, names(loc))
## out
## }
## <bytecode: 0x0000021e529122b0>
## <environment: namespace:dplyr>
## A single object matching 'select.data.frame' was found
## It was found in the following places
## registered S3 method for select from namespace dplyr
## namespace:dplyr
## with value
##
## function (.data, ...)
## {
## error_call <- dplyr_error_call()
## loc <- tidyselect::eval_select(expr(c(...)), data = .data,
## error_call = error_call)
## loc <- ensure_group_vars(loc, .data, notify = TRUE)
## out <- dplyr_col_select(.data, loc)
## out <- set_names(out, names(loc))
## out
## }
## <bytecode: 0x0000021e529122b0>
## <environment: namespace:dplyr>
mean_n <-
function(data, variable){
data |>
summarise(across({{variable}},
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x))))
)
}
mean_n(penguins, where(is.numeric))## # A tibble: 1 × 10
## bill_length_mm_mean bill_length_mm_n bill_depth_mm_mean bill_depth_mm_n
## <dbl> <int> <dbl> <int>
## 1 43.9 342 17.2 342
## # ℹ 6 more variables: flipper_length_mm_mean <dbl>, flipper_length_mm_n <int>,
## # body_mass_g_mean <dbl>, body_mass_g_n <int>, year_mean <dbl>, year_n <int>
mean_ns <-
\(data, variable){
data |>
summarise(across({{variable}},
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x))))
)
}
mean_ns(penguins, where(is.numeric))## # A tibble: 1 × 10
## bill_length_mm_mean bill_length_mm_n bill_depth_mm_mean bill_depth_mm_n
## <dbl> <int> <dbl> <int>
## 1 43.9 342 17.2 342
## # ℹ 6 more variables: flipper_length_mm_mean <dbl>, flipper_length_mm_n <int>,
## # body_mass_g_mean <dbl>, body_mass_g_n <int>, year_mean <dbl>, year_n <int>
penguins |>
summarise(across(where(is.numeric),
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x))))
)## # A tibble: 1 × 10
## bill_length_mm_mean bill_length_mm_n bill_depth_mm_mean bill_depth_mm_n
## <dbl> <int> <dbl> <int>
## 1 43.9 342 17.2 342
## # ℹ 6 more variables: flipper_length_mm_mean <dbl>, flipper_length_mm_n <int>,
## # body_mass_g_mean <dbl>, body_mass_g_n <int>, year_mean <dbl>, year_n <int>
cmean_n <-
function(data, variable){
data |>
summarise(across({{variable}},
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x))))
) |>
pivot_longer(everything(),
names_to = c("variables", ".value"), # ".value"の部分を列名に
names_pattern = "(.*)_(.*)")
}
cmean_n(penguins, where(is.numeric))## # A tibble: 5 × 3
## variables mean n
## <chr> <dbl> <int>
## 1 bill_length_mm 43.9 342
## 2 bill_depth_mm 17.2 342
## 3 flipper_length_mm 201. 342
## 4 body_mass_g 4202. 342
## 5 year 2008. 344
penguins |>
summarise(across(where(is.numeric),
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x))))
) |>
pivot_longer(everything(),
names_to = c("variables", ".value"), # ".value"の部分を列名に
names_pattern = "(.*)_(.*)")## # A tibble: 5 × 3
## variables mean n
## <chr> <dbl> <int>
## 1 bill_length_mm 43.9 342
## 2 bill_depth_mm 17.2 342
## 3 flipper_length_mm 201. 342
## 4 body_mass_g 4202. 342
## 5 year 2008. 344
https://community.rstudio.com/t/using-map-with-a-vector-of-variables-and-dplyr-programming/93088
pickを使う理由
across()はその後に関数を適用する仕様だから,そうじゃない場合のよりよい選択肢としてall_group = function(data, outcome_vars, group_vars = NULL) {
data |>
group_by(pick({{group_vars}})) |>
summarise(across({{outcome_vars}},
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x)))))
}
all_group(penguins, bill_length_mm)## # A tibble: 1 × 2
## bill_length_mm_mean bill_length_mm_n
## <dbl> <int>
## 1 43.9 342
## # A tibble: 3 × 3
## species bill_length_mm_mean bill_length_mm_n
## <fct> <dbl> <int>
## 1 Adelie 38.8 151
## 2 Chinstrap 48.8 68
## 3 Gentoo 47.5 123
penguins |>
summarise(across(bill_length_mm,
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x)))))## # A tibble: 1 × 2
## bill_length_mm_mean bill_length_mm_n
## <dbl> <int>
## 1 43.9 342
penguins |>
group_by(species) |>
summarise(across(bill_length_mm,
list(mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x)))))## # A tibble: 3 × 3
## species bill_length_mm_mean bill_length_mm_n
## <fct> <dbl> <int>
## 1 Adelie 38.8 151
## 2 Chinstrap 48.8 68
## 3 Gentoo 47.5 123
## [[1]]
## # A tibble: 1 × 2
## bill_length_mm_mean bill_length_mm_n
## <dbl> <int>
## 1 43.9 342
##
## [[2]]
## # A tibble: 3 × 3
## species bill_length_mm_mean bill_length_mm_n
## <fct> <dbl> <int>
## 1 Adelie 38.8 151
## 2 Chinstrap 48.8 68
## 3 Gentoo 47.5 123
quos(NULL, species, island, c(species, island)) |>
map( \(x) all_group(penguins, bill_length_mm, !!x))## [[1]]
## # A tibble: 1 × 2
## bill_length_mm_mean bill_length_mm_n
## <dbl> <int>
## 1 43.9 342
##
## [[2]]
## # A tibble: 3 × 3
## species bill_length_mm_mean bill_length_mm_n
## <fct> <dbl> <int>
## 1 Adelie 38.8 151
## 2 Chinstrap 48.8 68
## 3 Gentoo 47.5 123
##
## [[3]]
## # A tibble: 3 × 3
## island bill_length_mm_mean bill_length_mm_n
## <fct> <dbl> <int>
## 1 Biscoe 45.3 167
## 2 Dream 44.2 124
## 3 Torgersen 39.0 51
##
## [[4]]
## # A tibble: 5 × 4
## # Groups: species [3]
## species island bill_length_mm_mean bill_length_mm_n
## <fct> <fct> <dbl> <int>
## 1 Adelie Biscoe 39.0 44
## 2 Adelie Dream 38.5 56
## 3 Adelie Torgersen 39.0 51
## 4 Chinstrap Dream 48.8 68
## 5 Gentoo Biscoe 47.5 123
## # A tibble: 4 × 3
## bill_length_mm_mean bill_length_mm_n species
## <dbl> <int> <fct>
## 1 43.9 342 <NA>
## 2 38.8 151 Adelie
## 3 48.8 68 Chinstrap
## 4 47.5 123 Gentoo
quos(NULL, species, island, c(species, island)) |>
map(\(x) all_group(penguins, bill_length_mm, !!x)) |>
bind_rows()## # A tibble: 12 × 4
## bill_length_mm_mean bill_length_mm_n species island
## <dbl> <int> <fct> <fct>
## 1 43.9 342 <NA> <NA>
## 2 38.8 151 Adelie <NA>
## 3 48.8 68 Chinstrap <NA>
## 4 47.5 123 Gentoo <NA>
## 5 45.3 167 <NA> Biscoe
## 6 44.2 124 <NA> Dream
## 7 39.0 51 <NA> Torgersen
## 8 39.0 44 Adelie Biscoe
## 9 38.5 56 Adelie Dream
## 10 39.0 51 Adelie Torgersen
## 11 48.8 68 Chinstrap Dream
## 12 47.5 123 Gentoo Biscoe
nest_tabyl = function(df, outcome_var, nest_vars = NULL){
df |>
nest(data = !c({{nest_vars}})) |>
mutate(freq =
map(data, \(x) tabyl(x, {{outcome_var}}))) |> # show_na = FALSEで欠損無しのpercentのみ算出
select(!data) |>
unnest(cols = c(freq)) |>
as_tibble()
}
# island x species
nest_tabyl(penguins, species, island)## # A tibble: 9 × 4
## island species n percent
## <fct> <fct> <int> <dbl>
## 1 Torgersen Adelie 52 1
## 2 Torgersen Chinstrap 0 0
## 3 Torgersen Gentoo 0 0
## 4 Biscoe Adelie 44 0.262
## 5 Biscoe Chinstrap 0 0
## 6 Biscoe Gentoo 124 0.738
## 7 Dream Adelie 56 0.452
## 8 Dream Chinstrap 68 0.548
## 9 Dream Gentoo 0 0
## # A tibble: 3 × 3
## species n percent
## <fct> <int> <dbl>
## 1 Adelie 152 0.442
## 2 Chinstrap 68 0.198
## 3 Gentoo 124 0.360
## [[1]]
## # A tibble: 3 × 3
## species n percent
## <fct> <int> <dbl>
## 1 Adelie 152 0.442
## 2 Chinstrap 68 0.198
## 3 Gentoo 124 0.360
##
## [[2]]
## # A tibble: 9 × 4
## island species n percent
## <fct> <fct> <int> <dbl>
## 1 Torgersen Adelie 52 1
## 2 Torgersen Chinstrap 0 0
## 3 Torgersen Gentoo 0 0
## 4 Biscoe Adelie 44 0.262
## 5 Biscoe Chinstrap 0 0
## 6 Biscoe Gentoo 124 0.738
## 7 Dream Adelie 56 0.452
## 8 Dream Chinstrap 68 0.548
## 9 Dream Gentoo 0 0
# 層:全体,island, sex, island x sex
quos(NULL, island, sex, c(island, sex)) |>
map( \(x) nest_tabyl(penguins, species, !!x))## [[1]]
## # A tibble: 3 × 3
## species n percent
## <fct> <int> <dbl>
## 1 Adelie 152 0.442
## 2 Chinstrap 68 0.198
## 3 Gentoo 124 0.360
##
## [[2]]
## # A tibble: 9 × 4
## island species n percent
## <fct> <fct> <int> <dbl>
## 1 Torgersen Adelie 52 1
## 2 Torgersen Chinstrap 0 0
## 3 Torgersen Gentoo 0 0
## 4 Biscoe Adelie 44 0.262
## 5 Biscoe Chinstrap 0 0
## 6 Biscoe Gentoo 124 0.738
## 7 Dream Adelie 56 0.452
## 8 Dream Chinstrap 68 0.548
## 9 Dream Gentoo 0 0
##
## [[3]]
## # A tibble: 9 × 4
## sex species n percent
## <fct> <fct> <int> <dbl>
## 1 male Adelie 73 0.435
## 2 male Chinstrap 34 0.202
## 3 male Gentoo 61 0.363
## 4 female Adelie 73 0.442
## 5 female Chinstrap 34 0.206
## 6 female Gentoo 58 0.352
## 7 <NA> Adelie 6 0.545
## 8 <NA> Chinstrap 0 0
## 9 <NA> Gentoo 5 0.455
##
## [[4]]
## # A tibble: 27 × 5
## island sex species n percent
## <fct> <fct> <fct> <int> <dbl>
## 1 Torgersen male Adelie 23 1
## 2 Torgersen male Chinstrap 0 0
## 3 Torgersen male Gentoo 0 0
## 4 Torgersen female Adelie 24 1
## 5 Torgersen female Chinstrap 0 0
## 6 Torgersen female Gentoo 0 0
## 7 Torgersen <NA> Adelie 5 1
## 8 Torgersen <NA> Chinstrap 0 0
## 9 Torgersen <NA> Gentoo 0 0
## 10 Biscoe female Adelie 22 0.275
## # ℹ 17 more rows
penguins |>
nest(data = !c(island)) |>
mutate(freq =
map(data, \(x) tabyl(x, species))) |>
select(!data) |>
unnest(cols = c(freq)) |>
as_tibble()## # A tibble: 9 × 4
## island species n percent
## <fct> <fct> <int> <dbl>
## 1 Torgersen Adelie 52 1
## 2 Torgersen Chinstrap 0 0
## 3 Torgersen Gentoo 0 0
## 4 Biscoe Adelie 44 0.262
## 5 Biscoe Chinstrap 0 0
## 6 Biscoe Gentoo 124 0.738
## 7 Dream Adelie 56 0.452
## 8 Dream Chinstrap 68 0.548
## 9 Dream Gentoo 0 0
## [[1]]
## # A tibble: 3 × 3
## species n percent
## <fct> <int> <dbl>
## 1 Adelie 152 0.442
## 2 Chinstrap 68 0.198
## 3 Gentoo 124 0.360
##
## [[2]]
## # A tibble: 3 × 3
## island n percent
## <fct> <int> <dbl>
## 1 Biscoe 168 0.488
## 2 Dream 124 0.360
## 3 Torgersen 52 0.151
## [[1]]
## # A tibble: 3 × 3
## species n percent
## <fct> <int> <dbl>
## 1 Adelie 152 0.442
## 2 Chinstrap 68 0.198
## 3 Gentoo 124 0.360
##
## [[2]]
## # A tibble: 3 × 3
## island n percent
## <fct> <int> <dbl>
## 1 Biscoe 168 0.488
## 2 Dream 124 0.360
## 3 Torgersen 52 0.151
## [[1]]
## # A tibble: 9 × 4
## sex species n percent
## <fct> <fct> <int> <dbl>
## 1 male Adelie 73 0.435
## 2 male Chinstrap 34 0.202
## 3 male Gentoo 61 0.363
## 4 female Adelie 73 0.442
## 5 female Chinstrap 34 0.206
## 6 female Gentoo 58 0.352
## 7 <NA> Adelie 6 0.545
## 8 <NA> Chinstrap 0 0
## 9 <NA> Gentoo 5 0.455
##
## [[2]]
## # A tibble: 9 × 4
## sex island n percent
## <fct> <fct> <int> <dbl>
## 1 male Biscoe 83 0.494
## 2 male Dream 62 0.369
## 3 male Torgersen 23 0.137
## 4 female Biscoe 80 0.485
## 5 female Dream 61 0.370
## 6 female Torgersen 24 0.145
## 7 <NA> Biscoe 5 0.455
## 8 <NA> Dream 1 0.0909
## 9 <NA> Torgersen 5 0.455
library(psychTools) # bfiの格納パッケージ
ac <-
bfi |>
select(A1,A2,C1,C2) |>
names()
# 上で定義した変数についてnest_tabyl実行
ac_res <-
map(all_of(ac), \(x) nest_tabyl(bfi, x, gender)) |>
set_names(ac) # 各要素に項目名の名前を設定
# 各リスト要素の上から3つを表示
map(ac_res, \(x) head(x, 3))## $A1
## # A tibble: 3 × 5
## gender A1 n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 202 0.220 0.220
## 2 1 2 284 0.309 0.309
## 3 1 3 160 0.174 0.174
##
## $A2
## # A tibble: 3 × 5
## gender A2 n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 26 0.0283 0.0286
## 2 1 2 63 0.0686 0.0694
## 3 1 3 67 0.0729 0.0738
##
## $C1
## # A tibble: 3 × 5
## gender C1 n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 23 0.0250 0.0252
## 2 1 2 53 0.0577 0.0581
## 3 1 3 103 0.112 0.113
##
## $C2
## # A tibble: 3 × 5
## gender C2 n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 31 0.0337 0.0340
## 2 1 2 95 0.103 0.104
## 3 1 3 112 0.122 0.123
# 各変数名を統一した名前にrename
ac_res <-
ac_res |>
map(\(x) rename_with(x,
\(x) "response", # 統一名をここに
starts_with(c("A", "C")))
)
# 各リスト要素の上から3つを表示
map(ac_res, \(x) head(x, 3))## $A1
## # A tibble: 3 × 5
## gender response n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 202 0.220 0.220
## 2 1 2 284 0.309 0.309
## 3 1 3 160 0.174 0.174
##
## $A2
## # A tibble: 3 × 5
## gender response n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 26 0.0283 0.0286
## 2 1 2 63 0.0686 0.0694
## 3 1 3 67 0.0729 0.0738
##
## $C1
## # A tibble: 3 × 5
## gender response n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 23 0.0250 0.0252
## 2 1 2 53 0.0577 0.0581
## 3 1 3 103 0.112 0.113
##
## $C2
## # A tibble: 3 × 5
## gender response n percent valid_percent
## <int> <int> <int> <dbl> <dbl>
## 1 1 1 31 0.0337 0.0340
## 2 1 2 95 0.103 0.104
## 3 1 3 112 0.122 0.123
## # A tibble: 56 × 6
## items gender response n percent valid_percent
## <chr> <int> <int> <int> <dbl> <dbl>
## 1 A1 1 1 202 0.220 0.220
## 2 A1 1 2 284 0.309 0.309
## 3 A1 1 3 160 0.174 0.174
## 4 A1 1 4 139 0.151 0.151
## 5 A1 1 5 99 0.108 0.108
## 6 A1 1 6 34 0.0370 0.0370
## 7 A1 1 NA 1 0.00109 NA
## 8 A1 2 1 720 0.383 0.386
## 9 A1 2 2 534 0.284 0.286
## 10 A1 2 3 242 0.129 0.130
## # ℹ 46 more rows
## [[1]]
## # A tibble: 5 × 4
## vs cyl n percent
## <dbl> <dbl> <int> <dbl>
## 1 0 4 1 0.0556
## 2 0 6 3 0.167
## 3 0 8 14 0.778
## 4 1 4 10 0.714
## 5 1 6 4 0.286
##
## [[2]]
## # A tibble: 6 × 4
## vs gear n percent
## <dbl> <dbl> <int> <dbl>
## 1 0 3 12 0.667
## 2 0 4 2 0.111
## 3 0 5 4 0.222
## 4 1 3 3 0.214
## 5 1 4 10 0.714
## 6 1 5 1 0.0714
## [[1]]
## # A tibble: 7 × 5
## vs am cyl n percent
## <dbl> <dbl> <dbl> <int> <dbl>
## 1 0 1 4 1 0.167
## 2 0 1 6 3 0.5
## 3 0 1 8 2 0.333
## 4 1 1 4 7 1
## 5 1 0 4 3 0.429
## 6 1 0 6 4 0.571
## 7 0 0 8 12 1
##
## [[2]]
## # A tibble: 7 × 5
## vs am gear n percent
## <dbl> <dbl> <dbl> <int> <dbl>
## 1 0 1 4 2 0.333
## 2 0 1 5 4 0.667
## 3 1 1 4 6 0.857
## 4 1 1 5 1 0.143
## 5 1 0 3 3 0.429
## 6 1 0 4 4 0.571
## 7 0 0 3 12 1
groups <- quos(NULL, NULL, vs, am)
outcomes <- quos(cyl, gear, cyl, gear) # 層別と数をそろえるため対応する対象を繰り返す
map2(outcomes, groups, \(x, y) nest_tabyl(mtcars, !!x, !!y))## [[1]]
## # A tibble: 3 × 3
## cyl n percent
## <dbl> <int> <dbl>
## 1 4 11 0.344
## 2 6 7 0.219
## 3 8 14 0.438
##
## [[2]]
## # A tibble: 3 × 3
## gear n percent
## <dbl> <int> <dbl>
## 1 3 15 0.469
## 2 4 12 0.375
## 3 5 5 0.156
##
## [[3]]
## # A tibble: 5 × 4
## vs cyl n percent
## <dbl> <dbl> <int> <dbl>
## 1 0 4 1 0.0556
## 2 0 6 3 0.167
## 3 0 8 14 0.778
## 4 1 4 10 0.714
## 5 1 6 4 0.286
##
## [[4]]
## # A tibble: 4 × 4
## am gear n percent
## <dbl> <dbl> <int> <dbl>
## 1 1 4 8 0.615
## 2 1 5 5 0.385
## 3 0 3 15 0.789
## 4 0 4 4 0.211
引数の文字そのままだけだとエラーになるので特別な指定が必要
{{}}演算子を使えば関数の中で,データフレームの変数名を入れられる
参照
mean_by <- function(df, by, var){
df |>
group_by({{by}}) |>
summarise(mean = mean({{var}}, na.rm = TRUE))
}
mean_by(penguins, species, bill_length_mm)## # A tibble: 3 × 2
## species mean
## <fct> <dbl>
## 1 Adelie 38.8
## 2 Chinstrap 48.8
## 3 Gentoo 47.5
mean_by2 <- function(df, by, var){
df |>
group_by(!!enquo(by)) |>
summarise(mean = mean(!!enquo(var), na.rm = TRUE))
}
mean_by2(penguins, species, bill_length_mm)## # A tibble: 3 × 2
## species mean
## <fct> <dbl>
## 1 Adelie 38.8
## 2 Chinstrap 48.8
## 3 Gentoo 47.5
:=はセイウチ演算子(walrus operator)というらしいmean_by3 <- function(df, by, var, prefix = "mean"){
df |>
group_by({{ by }}) |>
summarise("{prefix}_{{ var }}" := mean({{ var }}, na.rm = TRUE))
}
mean_by3(penguins, species, bill_length_mm, prefix = "mean")## # A tibble: 3 × 2
## species mean_bill_length_mm
## <fct> <dbl>
## 1 Adelie 38.8
## 2 Chinstrap 48.8
## 3 Gentoo 47.5
# 使用する変数格納
vars <-
mtcars |>
select(vs,am,gear) |>
names()
map(vars, \(x) count(mtcars, .data[[x]])) |>
set_names(vars) # listの要素名付与## $vs
## vs n
## 1 0 18
## 2 1 14
##
## $am
## am n
## 1 0 19
## 2 1 13
##
## $gear
## gear n
## 1 3 15
## 2 4 12
## 3 5 5
# 割合も
map(vars, \(x) count(mtcars, .data[[x]])) |>
set_names(vars) |>
map(\(x) mutate(x, percent = n/sum(n))) ## $vs
## vs n percent
## 1 0 18 0.5625
## 2 1 14 0.4375
##
## $am
## am n percent
## 1 0 19 0.59375
## 2 1 13 0.40625
##
## $gear
## gear n percent
## 1 3 15 0.46875
## 2 4 12 0.37500
## 3 5 5 0.15625
# renameしたい変数の型を因子型に
mtcars_f <-
mtcars |>
mutate(across(all_of(vars),
factor))
tables <-
map(vars, \(x) count(mtcars_f, .data[[x]], .drop = FALSE)) |>
set_names(vars) |>
map(\(x) mutate(x, percent = n/sum(n)))
# 各変数に対し関数を適用(tidyselect 1.2.0 >= でwarningが出るように)
# tables <-
# map(vars, \(x) tabyl(mtcars_f, .data[[x]])) |>
# set_names(vars)
# 各変数のtabyl結果で連番作成
tables <-
map(tables, \(x) mutate(x, vid = row_number()))
# 各リスト要素の変数名の列名に文字を追加
tables <-
tables |>
map(\(x) rename_with(x,
\(y) str_c(y, "_test"),
starts_with("per")))
# purrr 1.0.0より前
# tables <-
# tables |>
# map(~rename_with(.,
# ~ str_c(., "_test"),
# starts_with("per")))
# 各リスト要素の変数名の列を同じ名前に
tables <-
tables |>
map(\(x) rename_with(x,
~ "levels",
where(is.factor)))
# purrr 1.0.0より前
# tables <-
# tables |>
# map(~rename_with(.,
# ~ "levels",
# where(is.factor)))
bind_rows(tables, .id = "var_name")## var_name levels n percent_test vid
## 1 vs 0 18 0.56250 1
## 2 vs 1 14 0.43750 2
## 3 am 0 19 0.59375 1
## 4 am 1 13 0.40625 2
## 5 gear 3 15 0.46875 1
## 6 gear 4 12 0.37500 2
## 7 gear 5 5 0.15625 3
df_2020 <- tribble(
~id, ~var1, ~var2,
1, 1, 0,
2, 0, 1,
3, 1, 1
)
df_2021 <- tribble(
~id, ~var1, ~var2,
1, 0, 0,
2, 1, 0,
3, 1, 1
)
# データフレームをリストにまとめて要素名をつける
df_list <-
list(df_2020, df_2021) |>
set_names("y20", "y21")
# リスト内のデータフレームのid以外の変数に要素名の接尾辞をつける
df_list <-
imap(df_list,
function(a,b){
rename_with(a,
\(x) {str_c(x, b, sep = "_")}, !id)}
)
df_list## $y20
## # A tibble: 3 × 3
## id var1_y20 var2_y20
## <dbl> <dbl> <dbl>
## 1 1 1 0
## 2 2 0 1
## 3 3 1 1
##
## $y21
## # A tibble: 3 × 3
## id var1_y21 var2_y21
## <dbl> <dbl> <dbl>
## 1 1 0 0
## 2 2 1 0
## 3 3 1 1
## # A tibble: 3 × 5
## id var1_y20 var2_y20 var1_y21 var2_y21
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 0 0 0
## 2 2 0 1 1 0
## 3 3 1 1 1 1
# アウトカム変数名格納
outcomes <-
penguins |>
select(ends_with("_mm")) |>
names()
# 回帰
res_l <-
str_c(outcomes, "~ species*sex") |>
map(\(x) lm(as.formula(x), data = penguins)) |>
set_names(outcomes)
# 結果の要約
res_l |>
map(\(x) broom::tidy(x, conf.int = TRUE)) |>
bind_rows(.id = "outcome") |>
print(n = 18)## # A tibble: 18 × 8
## outcome term estimate std.error statistic p.value conf.low conf.high
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 bill_length_… (Int… 37.3 0.271 137. 2.30e-291 36.7 37.8
## 2 bill_length_… spec… 9.32 0.481 19.4 3.06e- 56 8.37 10.3
## 3 bill_length_… spec… 8.31 0.407 20.4 3.18e- 60 7.50 9.11
## 4 bill_length_… sexm… 3.13 0.383 8.17 6.64e- 15 2.38 3.89
## 5 bill_length_… spec… 1.39 0.680 2.04 4.21e- 2 0.0501 2.73
## 6 bill_length_… spec… 0.777 0.572 1.36 1.75e- 1 -0.348 1.90
## 7 bill_depth_mm (Int… 17.6 0.0976 180. 0 17.4 17.8
## 8 bill_depth_mm spec… -0.0337 0.173 -0.194 8.46e- 1 -0.374 0.307
## 9 bill_depth_mm spec… -3.38 0.147 -23.1 1.55e- 70 -3.67 -3.10
## 10 bill_depth_mm sexm… 1.45 0.138 10.5 1.96e- 22 1.18 1.72
## 11 bill_depth_mm spec… 0.214 0.245 0.874 3.83e- 1 -0.268 0.696
## 12 bill_depth_mm spec… 0.0294 0.206 0.143 8.87e- 1 -0.376 0.435
## 13 flipper_leng… (Int… 188. 0.662 284. 0 186. 189.
## 14 flipper_leng… spec… 3.94 1.17 3.36 8.84e- 4 1.63 6.25
## 15 flipper_leng… spec… 24.9 0.995 25.0 4.93e- 78 23.0 26.9
## 16 flipper_leng… sexm… 4.62 0.936 4.93 1.30e- 6 2.77 6.46
## 17 flipper_leng… spec… 3.56 1.66 2.14 3.28e- 2 0.293 6.83
## 18 flipper_leng… spec… 4.22 1.40 3.02 2.74e- 3 1.47 6.97
## vars n mean sd median trimmed mad min max
## species* 1 344 1.92 0.89 2.00 1.90 1.48 1.0 3.0
## island* 2 344 1.66 0.73 2.00 1.58 1.48 1.0 3.0
## bill_length_mm 3 342 43.92 5.46 44.45 43.91 7.04 32.1 59.6
## bill_depth_mm 4 342 17.15 1.97 17.30 17.17 2.22 13.1 21.5
## flipper_length_mm 5 342 200.92 14.06 197.00 200.34 16.31 172.0 231.0
## body_mass_g 6 342 4201.75 801.95 4050.00 4154.01 889.56 2700.0 6300.0
## sex* 7 333 1.50 0.50 2.00 1.51 0.00 1.0 2.0
## year 8 344 2008.03 0.82 2008.00 2008.04 1.48 2007.0 2009.0
## range skew kurtosis se
## species* 2.0 0.16 -1.73 0.05
## island* 2.0 0.61 -0.91 0.04
## bill_length_mm 27.5 0.05 -0.89 0.30
## bill_depth_mm 8.4 -0.14 -0.92 0.11
## flipper_length_mm 59.0 0.34 -1.00 0.76
## body_mass_g 3600.0 0.47 -0.74 43.36
## sex* 1.0 -0.02 -2.01 0.03
## year 2.0 -0.05 -1.51 0.04
## # A tibble: 8 × 14
## skim_type skim_variable n_missing complete_rate factor.ordered factor.n_unique
## <chr> <chr> <int> <dbl> <lgl> <int>
## 1 factor species 0 1 FALSE 3
## 2 factor island 0 1 FALSE 3
## 3 factor sex 11 0.968 FALSE 2
## 4 numeric bill_length_… 2 0.994 NA NA
## 5 numeric bill_depth_mm 2 0.994 NA NA
## 6 numeric flipper_leng… 2 0.994 NA NA
## 7 numeric body_mass_g 2 0.994 NA NA
## 8 numeric year 0 1 NA NA
## # ℹ 8 more variables: factor.top_counts <chr>, numeric.mean <dbl>,
## # numeric.sd <dbl>, numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>,
## # numeric.p75 <dbl>, numeric.p100 <dbl>
## # A tibble: 5 × 11
## skim_variable n_missing complete_rate mean sd p0 p25 p50
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 bill_length_mm 2 0.994 43.9 5.46 32.1 39.2 44.4
## 2 bill_depth_mm 2 0.994 17.2 1.97 13.1 15.6 17.3
## 3 flipper_length_mm 2 0.994 201. 14.1 172 190 197
## 4 body_mass_g 2 0.994 4202. 802. 2700 3550 4050
## 5 year 0 1 2008. 0.818 2007 2007 2008
## # ℹ 3 more variables: p75 <dbl>, p100 <dbl>, hist <chr>
| Name | penguins |
| Number of rows | 344 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| factor | 3 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n | ordered | n_unique | top_counts |
|---|---|---|---|---|
| species | 344 | FALSE | 3 | Ade: 152, Gen: 124, Chi: 68 |
| island | 344 | FALSE | 3 | Bis: 168, Dre: 124, Tor: 52 |
| sex | 333 | FALSE | 2 | mal: 168, fem: 165 |
Variable type: numeric
| skim_variable | n | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|
| bill_length_mm | 342 | 43.92 | 5.46 | 32.1 | 39.23 | 44.45 | 48.5 | 59.6 | ▃▇▇▆▁ |
| bill_depth_mm | 342 | 17.15 | 1.97 | 13.1 | 15.60 | 17.30 | 18.7 | 21.5 | ▅▅▇▇▂ |
| flipper_length_mm | 342 | 200.92 | 14.06 | 172.0 | 190.00 | 197.00 | 213.0 | 231.0 | ▂▇▃▅▂ |
| body_mass_g | 342 | 4201.75 | 801.95 | 2700.0 | 3550.00 | 4050.00 | 4750.0 | 6300.0 | ▃▇▆▃▂ |
| year | 344 | 2008.03 | 0.82 | 2007.0 | 2007.00 | 2008.00 | 2009.0 | 2009.0 | ▇▁▇▁▇ |
## # A tibble: 21 × 16
## skim_type skim_variable sex n_missing complete_rate factor.ordered
## <chr> <chr> <fct> <int> <dbl> <lgl>
## 1 factor species female 0 1 FALSE
## 2 factor species male 0 1 FALSE
## 3 factor species <NA> 0 1 FALSE
## 4 factor island female 0 1 FALSE
## 5 factor island male 0 1 FALSE
## 6 factor island <NA> 0 1 FALSE
## 7 numeric bill_length_mm female 0 1 NA
## 8 numeric bill_length_mm male 0 1 NA
## 9 numeric bill_length_mm <NA> 2 0.818 NA
## 10 numeric bill_depth_mm female 0 1 NA
## # ℹ 11 more rows
## # ℹ 10 more variables: factor.n_unique <int>, factor.top_counts <chr>,
## # numeric.mean <dbl>, numeric.sd <dbl>, numeric.p0 <dbl>, numeric.p25 <dbl>,
## # numeric.p50 <dbl>, numeric.p75 <dbl>, numeric.p100 <dbl>,
## # numeric.hist <chr>
penguins |>
group_by(species, sex) |>
skim(bill_length_mm, bill_depth_mm) |> # selectを先にもってきてもよい
as_tibble()## # A tibble: 16 × 14
## skim_type skim_variable species sex n_missing complete_rate numeric.mean
## <chr> <chr> <fct> <fct> <int> <dbl> <dbl>
## 1 numeric bill_length_mm Adelie fema… 0 1 37.3
## 2 numeric bill_length_mm Adelie male 0 1 40.4
## 3 numeric bill_length_mm Adelie <NA> 1 0.833 37.8
## 4 numeric bill_length_mm Chinstrap fema… 0 1 46.6
## 5 numeric bill_length_mm Chinstrap male 0 1 51.1
## 6 numeric bill_length_mm Gentoo fema… 0 1 45.6
## 7 numeric bill_length_mm Gentoo male 0 1 49.5
## 8 numeric bill_length_mm Gentoo <NA> 1 0.8 45.6
## 9 numeric bill_depth_mm Adelie fema… 0 1 17.6
## 10 numeric bill_depth_mm Adelie male 0 1 19.1
## 11 numeric bill_depth_mm Adelie <NA> 1 0.833 18.3
## 12 numeric bill_depth_mm Chinstrap fema… 0 1 17.6
## 13 numeric bill_depth_mm Chinstrap male 0 1 19.3
## 14 numeric bill_depth_mm Gentoo fema… 0 1 14.2
## 15 numeric bill_depth_mm Gentoo male 0 1 15.7
## 16 numeric bill_depth_mm Gentoo <NA> 1 0.8 14.6
## # ℹ 7 more variables: numeric.sd <dbl>, numeric.p0 <dbl>, numeric.p25 <dbl>,
## # numeric.p50 <dbl>, numeric.p75 <dbl>, numeric.p100 <dbl>,
## # numeric.hist <chr>
my_skim <- skim_with(base = sfl(n = length))
penguins |>
group_by(species, sex) |>
my_skim(bill_length_mm) |>
as_tibble()## # A tibble: 8 × 13
## skim_type skim_variable species sex n numeric.mean numeric.sd numeric.p0
## <chr> <chr> <fct> <fct> <int> <dbl> <dbl> <dbl>
## 1 numeric bill_length_… Adelie fema… 73 37.3 2.03 32.1
## 2 numeric bill_length_… Adelie male 73 40.4 2.28 34.6
## 3 numeric bill_length_… Adelie <NA> 6 37.8 2.80 34.1
## 4 numeric bill_length_… Chinst… fema… 34 46.6 3.11 40.9
## 5 numeric bill_length_… Chinst… male 34 51.1 1.56 48.5
## 6 numeric bill_length_… Gentoo fema… 58 45.6 2.05 40.9
## 7 numeric bill_length_… Gentoo male 61 49.5 2.72 44.4
## 8 numeric bill_length_… Gentoo <NA> 5 45.6 1.37 44.5
## # ℹ 5 more variables: numeric.p25 <dbl>, numeric.p50 <dbl>, numeric.p75 <dbl>,
## # numeric.p100 <dbl>, numeric.hist <chr>
## # A tibble: 65 × 4
## skim_type skim_variable stat formatted
## <chr> <chr> <chr> <chr>
## 1 factor species n_missing 0
## 2 factor island n_missing 0
## 3 factor sex n_missing 11
## 4 numeric bill_length_mm n_missing 2
## 5 numeric bill_depth_mm n_missing 2
## 6 numeric flipper_length_mm n_missing 2
## 7 numeric body_mass_g n_missing 2
## 8 numeric year n_missing 0
## 9 factor species complete_rate 1
## 10 factor island complete_rate 1
## # ℹ 55 more rows
## [1] 1e+10
## [1] "10000000000"
## [1] 0.12346
## [1] 0.12345
# library(plyr) # dplyr読んだ後に読み込むとdplyrの関数で動かなくなるのがあるので読むならtidyverseの先に
plyr::round_any(0.123456, 0.00001, floor) # 切り捨て## [1] 0.12345
## [1] 0.12346
## drat wt
## Mazda RX4 3.90 2.620
## Mazda RX4 Wag 3.90 2.875
## Datsun 710 3.85 2.320
## Hornet 4 Drive 3.08 3.215
## Hornet Sportabout 3.15 3.440
## Valiant 2.76 3.460
## Duster 360 3.21 3.570
## Merc 240D 3.69 3.190
## Merc 230 3.92 3.150
## Merc 280 3.92 3.440
## Merc 280C 3.92 3.440
## Merc 450SE 3.07 4.070
## Merc 450SL 3.07 3.730
## Merc 450SLC 3.07 3.780
## Cadillac Fleetwood 2.93 5.250
## Lincoln Continental 3.00 5.424
## Chrysler Imperial 3.23 5.345
## Fiat 128 4.08 2.200
## Honda Civic 4.93 1.615
## Toyota Corolla 4.22 1.835
## Toyota Corona 3.70 2.465
## Dodge Challenger 2.76 3.520
## AMC Javelin 3.15 3.435
## Camaro Z28 3.73 3.840
## Pontiac Firebird 3.08 3.845
## Fiat X1-9 4.08 1.935
## Porsche 914-2 4.43 2.140
## Lotus Europa 3.77 1.513
## Ford Pantera L 4.22 3.170
## Ferrari Dino 3.62 2.770
## Maserati Bora 3.54 3.570
## Volvo 142E 4.11 2.780
mtcars |>
mutate(across(c(drat,wt),
\(x) plyr::round_any(x, 0.1, floor))) |>
select(drat, wt) |>
head()## drat wt
## Mazda RX4 3.9 2.6
## Mazda RX4 Wag 3.9 2.8
## Datsun 710 3.8 2.3
## Hornet 4 Drive 3.0 3.2
## Hornet Sportabout 3.1 3.4
## Valiant 2.7 3.4
mtcars |>
select(drat, wt) |>
mutate(across(c(drat,wt),
list(fl = \(x) plyr::round_any(x, 0.1, floor)))) |>
head()## drat wt drat_fl wt_fl
## Mazda RX4 3.90 2.620 3.9 2.6
## Mazda RX4 Wag 3.90 2.875 3.9 2.8
## Datsun 710 3.85 2.320 3.8 2.3
## Hornet 4 Drive 3.08 3.215 3.0 3.2
## Hornet Sportabout 3.15 3.440 3.1 3.4
## Valiant 2.76 3.460 2.7 3.4
moji_vec <-
as.character(1:3) |> # ベクトルの値: 変更先の文字
set_names(c("one", "two", "three")) # 名前: 元の文字
moji_vec## one two three
## "1" "2" "3"
## [1] "item_one" "item_two" "item_three" "item_two_three"
## [1] "item_1" "item_2" "item_3" "item_2_3"
## [1] "文字列 このテキストより後を削除"
## [1] "species" "island" "bill_length_mm"
## [4] "bill_depth_mm" "flipper_length_mm" "body_mass_g"
## [7] "sex" "year"
## c("species", "island", "bill_length_mm", "bill_depth_mm", "flipper_length_mm",
## "body_mass_g", "sex", "year")
## c(
## "species", "island", "bill_length_mm", "bill_depth_mm", "flipper_length_mm",
## "body_mass_g", "sex", "year"
## )
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
## [1] species island bill_length_mm bill_depth_mm
## [5] flipper_length_mm body_mass_g sex year
色々方法があるがutils::stack()が早い
## values ind
## 1 1 a
## 2 2 a
## 3 1 b
## 4 2 b
## 5 3 b
## 6 1 c
## 7 2 c
## 8 3 c
## 9 4 c
p1 <-
penguins |>
select(species, starts_with("bill")) |>
slice(1:3) |>
mutate(id = row_number())
p2 <-
penguins |>
select(species, starts_with("bill")) |>
slice(4:6) |>
mutate(id = row_number())## # A tibble: 3 × 6
## species bill_length_mm.x bill_depth_mm.x id bill_length_mm.y
## <fct> <dbl> <dbl> <int> <dbl>
## 1 Adelie 39.1 18.7 1 NA
## 2 Adelie 39.5 17.4 2 36.7
## 3 Adelie 40.3 18 3 39.3
## # ℹ 1 more variable: bill_depth_mm.y <dbl>
# 対象となる変数名のベクトル作成
var_name <-
penguins |>
select(starts_with("bill")) |>
names()
# 差分を計算する関数作成
make_dif <- function(df, var){
df |>
mutate("dif_,{var}" :=
!!sym(str_c(var, ".x")) - !!sym(str_c(var, ".y"))
)
}
make_dif2 <- function(df, var){
df |>
mutate("dif_{var}" :=
!!sym(str_c(var, ".x")) - !!sym(str_c(var, ".y"))
)
}
# 動く
# make_dif <- function(df, var){
# df |>
# mutate(!!sym(str_c("dif_",{{var}})) :=
# !!sym(str_c(var, ".x")) - !!sym(str_c(var, ".y")))
# }
#
# make_dif2 <- function(df, var){
# df |>
# mutate("dif_{{var}}" := !!sym(str_c(var, ".x")) - !!sym(str_c(var, ".y")))
# }
# 動く
# make_dif <- function(df, var){
# df |>
# mutate(!!sym(str_c("dif_",{{var}})) :=
# !!sym(str_c({{var}}, ".x")) - !!sym(str_c({{var}}, ".y")))
# }
#
# make_dif2 <- function(df, var){
# df |>
# mutate("dif_{{var}}" := !!sym(str_c({{var}}, ".x")) - !!sym(str_c({{var}}, ".y")))
# }
# 動く
# make_di2 <- function(df, var){
# var1 <- sym(str_c({{var}}, ".x"))
# var2 <- sym(str_c({{var}}, ".y"))
# df |>
# mutate("dif_{{var}}" := !!var1 - !!var2)
# }
# 動く
# make_di2 <- function(df, var){
# var1 <- sym("{{var}}.x")
# var2 <- sym("{{var}}.y")
# df |>
# mutate("dif_{{var}}" := !!var1 - !!var2)
# }
# 動かない
# make_dif3 <- function(df, var){
# df |>
# mutate("dif_{{var}}" := !!sym("{{var}}.x") - !!sym("{{var}}.y"))
# }
# 動かない
# make_dif3 <- function(df, var){
# form <- "{{var}}.x - {{var}}.y"
# df |>
# mutate("dif_{{var}}" := !! rlang::parse_expr(form) )
# }
p1_2 |>
reduce(var_name, make_dif, .init = _) |>
select(id, species, contains("bill_length"), contains("bill_depth")) |>
glimpse()## Rows: 3
## Columns: 8
## $ id <int> 1, 2, 3
## $ species <fct> Adelie, Adelie, Adelie
## $ bill_length_mm.x <dbl> 39.1, 39.5, 40.3
## $ bill_length_mm.y <dbl> NA, 36.7, 39.3
## $ `dif_,bill_length_mm` <dbl> NA, 2.8, 1.0
## $ bill_depth_mm.x <dbl> 18.7, 17.4, 18.0
## $ bill_depth_mm.y <dbl> NA, 19.3, 20.6
## $ `dif_,bill_depth_mm` <dbl> NA, -1.9, -2.6
p1_2 |>
reduce(var_name, make_dif2, .init = _) |>
select(id, species, contains("bill_length"), contains("bill_depth")) |>
glimpse()## Rows: 3
## Columns: 8
## $ id <int> 1, 2, 3
## $ species <fct> Adelie, Adelie, Adelie
## $ bill_length_mm.x <dbl> 39.1, 39.5, 40.3
## $ bill_length_mm.y <dbl> NA, 36.7, 39.3
## $ dif_bill_length_mm <dbl> NA, 2.8, 1.0
## $ bill_depth_mm.x <dbl> 18.7, 17.4, 18.0
## $ bill_depth_mm.y <dbl> NA, 19.3, 20.6
## $ dif_bill_depth_mm <dbl> NA, -1.9, -2.6
p1_long <-
pivot_longer(p1, !c(species, id),
values_to = "old")
p2_long <-
pivot_longer(p2, !c(species, id),
values_to = "new")
p1_2_long <-
p1_long |>
left_join(p2_long, by = c("id", "species", "name"))
p1_2_long |>
mutate(diff = old - new) |>
pivot_wider(
names_from = name,
values_from = old:diff,
names_glue = "{name}_{.value}"
) |>
select(id, species, contains("bill_length"), contains("bill_depth")) |>
glimpse()## Rows: 3
## Columns: 8
## $ id <int> 1, 2, 3
## $ species <fct> Adelie, Adelie, Adelie
## $ bill_length_mm_old <dbl> 39.1, 39.5, 40.3
## $ bill_length_mm_new <dbl> NA, 36.7, 39.3
## $ bill_length_mm_diff <dbl> NA, 2.8, 1.0
## $ bill_depth_mm_old <dbl> 18.7, 17.4, 18.0
## $ bill_depth_mm_new <dbl> NA, 19.3, 20.6
## $ bill_depth_mm_diff <dbl> NA, -1.9, -2.6
## tibble::tibble(
## species = factor(c("Adelie", "Adelie"), levels = c("Adelie", "Chinstrap", "Gentoo")),
## island = factor(c("Torgersen", "Torgersen"), levels = c("Biscoe", "Dream", "Torgersen")),
## bill_length_mm = c(39.1, 39.5),
## bill_depth_mm = c(18.7, 17.4),
## flipper_length_mm = c(181L, 186L),
## body_mass_g = c(3750L, 3800L),
## sex = factor(c("male", "female")),
## year = c(2007L, 2007L),
## )
btheb <-
HSAUR3::BtheB |>
clean_names() |>
as_tibble()
# id付ける
btheb <-
btheb |>
rowid_to_column(var = "id")
btheb## # A tibble: 100 × 9
## id drug length treatment bdi_pre bdi_2m bdi_3m bdi_5m bdi_8m
## <int> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 No >6m TAU 29 2 2 NA NA
## 2 2 Yes >6m BtheB 32 16 24 17 20
## 3 3 Yes <6m TAU 25 20 NA NA NA
## 4 4 No >6m BtheB 21 17 16 10 9
## 5 5 Yes >6m BtheB 26 23 NA NA NA
## 6 6 Yes <6m BtheB 7 0 0 0 0
## 7 7 Yes <6m TAU 17 7 7 3 7
## 8 8 No >6m TAU 20 20 21 19 13
## 9 9 Yes <6m BtheB 18 13 14 20 11
## 10 10 Yes >6m BtheB 20 5 5 8 12
## # ℹ 90 more rows
btheb_long <-
btheb |>
pivot_longer(!c(id:treatment),
names_to = "time",
names_prefix = "bdi_", # timeの値から"bdi_"を削除
values_to = "bdi")
# timeをfactorにして順序指定
btheb_long <-
btheb_long |>
mutate(time = fct_relevel(time, "pre", "2m", "3m", "5m"))
btheb_long## # A tibble: 500 × 6
## id drug length treatment time bdi
## <int> <fct> <fct> <fct> <fct> <dbl>
## 1 1 No >6m TAU pre 29
## 2 1 No >6m TAU 2m 2
## 3 1 No >6m TAU 3m 2
## 4 1 No >6m TAU 5m NA
## 5 1 No >6m TAU 8m NA
## 6 2 Yes >6m BtheB pre 32
## 7 2 Yes >6m BtheB 2m 16
## 8 2 Yes >6m BtheB 3m 24
## 9 2 Yes >6m BtheB 5m 17
## 10 2 Yes >6m BtheB 8m 20
## # ℹ 490 more rows
Brown, V. A. (2021). An Introduction to Linear Mixed-Effects Modeling in R. Advances in Methods and Practices in Psychological Science, 4(1), 2515245920960351.
## Linear mixed model fit by REML ['lmerMod']
## Formula: bdi ~ treatment * time + (1 | id)
## Data: btheb_long
##
## REML criterion at convergence: 2632.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1787 -0.5372 -0.1079 0.4510 3.3967
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 83.59 9.143
## Residual 36.44 6.037
## Number of obs: 380, groups: id, 100
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 24.188 1.581 15.295
## treatmentBtheB -1.649 2.193 -0.752
## time2m -4.497 1.267 -3.551
## time3m -6.115 1.375 -4.448
## time5m -7.585 1.485 -5.108
## time8m -10.454 1.566 -6.676
## treatmentBtheB:time2m -3.330 1.734 -1.920
## treatmentBtheB:time3m -3.573 1.922 -1.859
## treatmentBtheB:time5m -3.284 2.088 -1.573
## treatmentBtheB:time8m -1.181 2.172 -0.543
##
## Correlation of Fixed Effects:
## (Intr) trtmBB time2m time3m time5m time8m trBB:2 trBB:3 trBB:5
## tretmntBthB -0.721
## time2m -0.379 0.273
## time3m -0.349 0.252 0.456
## time5m -0.323 0.233 0.422 0.430
## time8m -0.307 0.221 0.401 0.408 0.408
## trtmntBtB:2 0.277 -0.384 -0.731 -0.333 -0.309 -0.293
## trtmntBtB:3 0.250 -0.346 -0.326 -0.715 -0.308 -0.292 0.449
## trtmntBtB:5 0.230 -0.319 -0.300 -0.306 -0.711 -0.290 0.413 0.423
## trtmntBtB:8 0.221 -0.306 -0.289 -0.294 -0.294 -0.721 0.397 0.406 0.407
## # A tibble: 12 × 8
## effect group term estimate std.error statistic conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) 24.2 1.58 15.3 21.1 27.3
## 2 fixed <NA> treatmentB… -1.65 2.19 -0.752 -5.95 2.65
## 3 fixed <NA> time2m -4.50 1.27 -3.55 -6.98 -2.01
## 4 fixed <NA> time3m -6.11 1.37 -4.45 -8.81 -3.42
## 5 fixed <NA> time5m -7.58 1.49 -5.11 -10.5 -4.67
## 6 fixed <NA> time8m -10.5 1.57 -6.68 -13.5 -7.38
## 7 fixed <NA> treatmentB… -3.33 1.73 -1.92 -6.73 0.0686
## 8 fixed <NA> treatmentB… -3.57 1.92 -1.86 -7.34 0.193
## 9 fixed <NA> treatmentB… -3.28 2.09 -1.57 -7.38 0.808
## 10 fixed <NA> treatmentB… -1.18 2.17 -0.543 -5.44 3.08
## 11 ran_pars id sd__(Inter… 9.14 NA NA NA NA
## 12 ran_pars Residual sd__Observ… 6.04 NA NA NA NA
## # A tibble: 2 × 6
## grp var1 var2 vcov sdcor icc
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 id (Intercept) <NA> 83.6 9.14 0.696
## 2 Residual <NA> <NA> 36.4 6.04 0.304
btheb_long |>
mutate(id = factor(id)) |>
filter(id %in% c(1:20)) |> # IDが1~20までに限定
ggplot(aes(time, bdi,
group = id)) + # color = id
geom_smooth(method = "lm", se = FALSE) +
geom_point(size = 1) +
theme_minimal() +
facet_wrap(vars(treatment))## $emmeans
## time treatment emmean SE df lower.CL upper.CL
## pre TAU 24.2 1.58 151 21.06 27.3
## 2m TAU 19.7 1.61 158 16.51 22.9
## 3m TAU 18.1 1.69 186 14.73 21.4
## 5m TAU 16.6 1.79 216 13.08 20.1
## 8m TAU 13.7 1.85 239 10.08 17.4
## pre BtheB 22.5 1.52 151 19.54 25.5
## 2m BtheB 14.7 1.52 151 11.71 17.7
## 3m BtheB 12.9 1.65 193 9.60 16.1
## 5m BtheB 11.7 1.75 229 8.22 15.1
## 8m BtheB 10.9 1.78 240 7.39 14.4
##
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## 2m TAU - pre TAU -4.50 1.27 275 -3.551 0.0005
## 3m TAU - pre TAU -6.11 1.37 279 -4.448 <.0001
## 5m TAU - pre TAU -7.58 1.49 280 -5.108 <.0001
## 8m TAU - pre TAU -10.45 1.57 280 -6.676 <.0001
## pre BtheB - pre TAU -1.65 2.19 151 -0.752 0.4532
## 2m BtheB - pre TAU -9.48 2.19 151 -4.321 <.0001
## 3m BtheB - pre TAU -11.34 2.28 171 -4.967 <.0001
## 5m BtheB - pre TAU -12.52 2.36 190 -5.309 <.0001
## 8m BtheB - pre TAU -13.28 2.38 196 -5.576 <.0001
##
## Degrees-of-freedom method: satterthwaite
\[ d = \frac{2t}{\sqrt{df}} \]
mmt <-
lmerTest::lmer(bdi ~ treatment*time + (1 | id),
data = btheb_long)
# 交互作用項だけに
summary(mmt)$coefficients |>
as_tibble(rownames = "terms") |>
filter(terms == "treatmentBtheB:time8m")## # A tibble: 1 × 6
## terms Estimate `Std. Error` df `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 treatmentBtheB:time8m -1.18 2.17 280. -0.543 0.587
(2*-0.543)/sqrt(280) = -0.0649009btheb |>
group_by(treatment) |>
summarise(mean = mean(bdi_8m, na.rm = TRUE),
sd = sd(bdi_8m, na.rm = TRUE),
n = sum(!is.na(bdi_8m)))## # A tibble: 2 × 4
## treatment mean sd n
## <fct> <dbl> <dbl> <int>
## 1 TAU 13.6 11.5 25
## 2 BtheB 8.85 6.09 27
## Mean Differences ES:
##
## d [ 95 %CI] = 0.52 [ -0.03 , 1.08 ]
## var(d) = 0.08
## p-value(d) = 0.07
## U3(d) = 69.92 %
## CLES(d) = 64.4 %
## Cliff's Delta = 0.29
##
## g [ 95 %CI] = 0.51 [ -0.03 , 1.06 ]
## var(g) = 0.08
## p-value(g) = 0.07
## U3(g) = 69.65 %
## CLES(g) = 64.19 %
##
## Correlation ES:
##
## r [ 95 %CI] = 0.26 [ -0.02 , 0.5 ]
## var(r) = 0.02
## p-value(r) = 0.07
##
## z [ 95 %CI] = 0.26 [ -0.02 , 0.54 ]
## var(z) = 0.02
## p-value(z) = 0.07
##
## Odds Ratio ES:
##
## OR [ 95 %CI] = 2.58 [ 0.95 , 7.03 ]
## p-value(OR) = 0.07
##
## Log OR [ 95 %CI] = 0.95 [ -0.06 , 1.95 ]
## var(lOR) = 0.26
## p-value(Log OR) = 0.07
##
## Other:
##
## NNT = 5.73
## Total N = 52
Evans, C., Margison, F., & Barkham, M. (1998). The contribution of reliable and clinically significant change methods to evidence-based mental health. Evidence-Based Mental Health, 1(3), 70-72.
パッケージ
BDI-IIの情報
# data.frame型にする必要あり
btheb <-
btheb |>
as.data.frame()
# インストール
# devtools::install_github("AWKruijt/JT-RCI")
# jacobson & truaxのreliable and clinically significant change
JTRCI::JTRCI(data = btheb,
pre = "bdi_pre", post = "bdi_8m",
group = "treatment",
reliability = .75,
JTcrit = "C",
normM = 13.7, normSD = 12.2 # 健康な集団の平均値とSD,
# dysfM = , dysfSD =
)## Jacobson-Truax classification TAU BtheB
## <char> <char> <char>
## 1: deteriorated 0 0
## 2: unchanged 7 2
## 3: improved 1 0
## 4: non reliably recovered 8 15
## 5: recovered 9 10
## ppid group pre post change_abs SEmeasurement Sdiff crittype critval
## 1 1 BtheB 32 20 -12 4.804502 6.794592 criterion C 18.9132
## 2 2 BtheB 21 9 -12 4.804502 6.794592 criterion C 18.9132
## 3 3 BtheB 7 0 -7 4.804502 6.794592 criterion C 18.9132
## 4 4 TAU 17 7 -10 4.804502 6.794592 criterion C 18.9132
## 5 5 TAU 20 13 -7 4.804502 6.794592 criterion C 18.9132
## 6 6 BtheB 18 11 -7 4.804502 6.794592 criterion C 18.9132
## RCI class_JTRCI
## 1 -1.766111 unchanged
## 2 -1.766111 non reliably recovered
## 3 -1.030231 non reliably recovered
## 4 -1.471759 non reliably recovered
## 5 -1.030231 non reliably recovered
## 6 -1.030231 non reliably recovered
suddengainsパッケージ# メタ分析による再検査信頼性
test_retest_bdi <- 0.75
# 8mの欠損を除外したtime1でのBDIのSD
bdi_pre_sd <-
btheb |>
drop_na(bdi_8m) |>
summarise(sd(bdi_pre, na.rm = TRUE)) |>
pull()
# RCI算出
suddengains::define_crit1_cutoff(sd = bdi_pre_sd,
reliability = test_retest_bdi)## $sd
## [1] 9.609004
##
## $reliability
## [1] 0.75
##
## $standard_error_measurement
## [1] 4.804502
##
## $standard_error_difference
## [1] 6.794592
##
## $reliable_change_value
## [1] 13.3174
## # A tibble: 344 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <chr>, year <dbl>
## # A tibble: 344 × 2
## species sex
## <chr> <chr>
## 1 Adelie male
## 2 Adelie female
## 3 Adelie female
## 4 Adelie <NA>
## 5 Adelie female
## 6 Adelie male
## 7 Adelie female
## 8 Adelie male
## 9 Adelie <NA>
## 10 Adelie <NA>
## # ℹ 334 more rows
## # A tibble: 344 × 2
## bill_length_mm bill_depth_mm
## <dbl> <dbl>
## 1 39.1 18.7
## 2 39.5 17.4
## 3 40.3 18
## 4 NA NA
## 5 36.7 19.3
## 6 39.3 20.6
## 7 38.9 17.8
## 8 39.2 19.6
## 9 34.1 18.1
## 10 42 20.2
## # ℹ 334 more rows
## # A tibble: 344 × 6
## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
## <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 39.1 18.7 181 3750 male 2007
## 2 39.5 17.4 186 3800 female 2007
## 3 40.3 18 195 3250 female 2007
## 4 NA NA NA NA <NA> 2007
## 5 36.7 19.3 193 3450 female 2007
## 6 39.3 20.6 190 3650 male 2007
## 7 38.9 17.8 181 3625 female 2007
## 8 39.2 19.6 195 4675 male 2007
## 9 34.1 18.1 193 3475 <NA> 2007
## 10 42 20.2 190 4250 <NA> 2007
## # ℹ 334 more rows
## # A tibble: 344 × 4
## island flipper_length_mm body_mass_g year
## <chr> <dbl> <dbl> <dbl>
## 1 Torgersen 181 3750 2007
## 2 Torgersen 186 3800 2007
## 3 Torgersen 195 3250 2007
## 4 Torgersen NA NA 2007
## 5 Torgersen 193 3450 2007
## 6 Torgersen 190 3650 2007
## 7 Torgersen 181 3625 2007
## 8 Torgersen 195 4675 2007
## 9 Torgersen 193 3475 2007
## 10 Torgersen 190 4250 2007
## # ℹ 334 more rows
## # A tibble: 344 × 7
## species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## <fct> <chr> <dbl> <dbl> <dbl> <chr>
## 1 Adelie 39.1 18.7 181 3750 male
## 2 Adelie 39.5 17.4 186 3800 female
## 3 Adelie 40.3 18 195 3250 female
## 4 Adelie <NA> NA NA NA <NA>
## 5 Adelie 36.7 19.3 193 3450 female
## 6 Adelie 39.3 20.6 190 3650 male
## 7 Adelie 38.9 17.8 181 3625 female
## 8 Adelie 39.2 19.6 195 4675 male
## 9 Adelie 34.1 18.1 193 3475 <NA>
## 10 Adelie 42 20.2 190 4250 <NA>
## # ℹ 334 more rows
## # ℹ 1 more variable: year <dbl>
## # A tibble: 344 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen <NA> <NA> <NA> <NA>
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <chr>, year <chr>
## # A tibble: 3 × 4
## a b c total
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA NA
## 2 1 2 NA NA
## 3 1 2 3 6
## # A tibble: 3 × 4
## a b c total
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 10
## 2 1 2 NA 10
## 3 1 2 3 10
## # A tibble: 3 × 4
## a b c mean_abc
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1.67
## 2 1 2 NA 1.67
## 3 1 2 3 1.67
## [1] 1.666667
## # A tibble: 3 × 4
## # Rowwise:
## a b c total
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1
## 2 1 2 NA 3
## 3 1 2 3 6
## # A tibble: 3 × 4
## # Rowwise:
## a b c mean_abc
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1
## 2 1 2 NA 1.5
## 3 1 2 3 2
## # A tibble: 3 × 4
## # Rowwise:
## a b c mean_abc
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1
## 2 1 2 NA 1.5
## 3 1 2 3 2
## # A tibble: 3 × 4
## # Rowwise:
## a b c mean_abc
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1
## 2 1 2 NA 1.5
## 3 1 2 3 2
## # A tibble: 3 × 4
## a b c total
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1
## 2 1 2 NA 3
## 3 1 2 3 6
## # A tibble: 3 × 4
## a b c total
## <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1
## 2 1 2 NA 1.5
## 3 1 2 3 2
old <- c("species", "island", "bill_length_mm")
new <- c("種", "島", "くちばし長さ")
newvars <- setNames(old, new)
penguins |>
rename(!!!newvars)## # A tibble: 344 × 8
## 種 島 くちばし長さ bill_depth_mm flipper_length_mm body_mass_g sex
## <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 Adelie Torger… 39.1 18.7 181 3750 male
## 2 Adelie Torger… 39.5 17.4 186 3800 fema…
## 3 Adelie Torger… 40.3 18 195 3250 fema…
## 4 Adelie Torger… NA NA NA NA <NA>
## 5 Adelie Torger… 36.7 19.3 193 3450 fema…
## 6 Adelie Torger… 39.3 20.6 190 3650 male
## 7 Adelie Torger… 38.9 17.8 181 3625 fema…
## 8 Adelie Torger… 39.2 19.6 195 4675 male
## 9 Adelie Torger… 34.1 18.1 193 3475 <NA>
## 10 Adelie Torger… 42 20.2 190 4250 <NA>
## # ℹ 334 more rows
## # ℹ 1 more variable: year <int>
tribble(~a, ~b,
0, 1,
1, 2,
2, 3) |>
mutate(b = case_when(a == 0 ~ (\(x) x+1)(b),
a == 1 ~ (\(x) x+2)(b),
.default = b)
)## # A tibble: 3 × 2
## a b
## <dbl> <dbl>
## 1 0 2
## 2 1 4
## 3 2 3
penguins |>
mutate(speciesj = case_when(
species == "Adelie" ~ "アデリー",
species == "Chinstrap" ~ "ヒゲ",
species == "Gentoo" ~ "ジェンツー",
.default = "その他"),
.keep = "used"
) |>
count(species, speciesj)## # A tibble: 3 × 3
## species speciesj n
## <fct> <chr> <int>
## 1 Adelie アデリー 152
## 2 Chinstrap ヒゲ 68
## 3 Gentoo ジェンツー 124
penguins |>
mutate(speciesj = case_match(species,
"Adelie" ~ "アデリー",
"Chinstrap" ~ "ヒゲ",
"Gentoo" ~ "ジェンツー",
.default = "その他"
),
.keep = "used"
) |>
count(species, speciesj)## # A tibble: 3 × 3
## species speciesj n
## <fct> <chr> <int>
## 1 Adelie アデリー 152
## 2 Chinstrap ヒゲ 68
## 3 Gentoo ジェンツー 124
old <-
penguins |>
count(species) |>
pull(species)
new <- c("アデリー", "ヒゲ", "ジェンツー")
taisho <-
tibble(species = old,
speciesj = new)
penguins |>
left_join(taisho) |>
count(species, speciesj)## # A tibble: 3 × 3
## species speciesj n
## <fct> <chr> <int>
## 1 Adelie アデリー 152
## 2 Chinstrap ヒゲ 68
## 3 Gentoo ジェンツー 124
new_old <-
new |>
set_names(old)
# オブジェクト作らずnewを上書き
# names(new) <- old
penguins |>
mutate(speciesj = new_old[species]) |>
count(species, speciesj)## # A tibble: 3 × 3
## species speciesj n
## <fct> <chr> <int>
## 1 Adelie アデリー 152
## 2 Chinstrap ヒゲ 68
## 3 Gentoo ジェンツー 124
# 区切りにスペースなし
df <- tribble(~id, ~multi_answer,
1, "りんご,みかん,ぶどう",
2, "りんご,ぶどう",
3, "メロン,ぶどう",
4, "ぶどう,りんご,みかん,メロン")
df## # A tibble: 4 × 2
## id multi_answer
## <dbl> <chr>
## 1 1 りんご,みかん,ぶどう
## 2 2 りんご,ぶどう
## 3 3 メロン,ぶどう
## 4 4 ぶどう,りんご,みかん,メロン
## # A tibble: 4 × 3
## id multi_answer multi_answer_l
## <dbl> <chr> <list>
## 1 1 りんご,みかん,ぶどう <chr [3]>
## 2 2 りんご,ぶどう <chr [2]>
## 3 3 メロン,ぶどう <chr [2]>
## 4 4 ぶどう,りんご,みかん,メロン <chr [4]>
## # A tibble: 4 × 6
## id multi_answer f1 f2 f3 f4
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 りんご,みかん,ぶどう りんご みかん ぶどう <NA>
## 2 2 りんご,ぶどう りんご ぶどう <NA> <NA>
## 3 3 メロン,ぶどう メロン ぶどう <NA> <NA>
## 4 4 ぶどう,りんご,みかん,メロン ぶどう りんご みかん メロン
df |>
mutate(りんご = case_when(f1 == "りんご" |
f2 == "りんご" |
f3 == "りんご" |
f4 == "りんご" ~ 1,
TRUE ~ 0),
みかん = case_when(f1 == "みかん" |
f2 == "みかん" |
f3 == "みかん" |
f4 == "みかん" ~ 1,
TRUE ~ 0),
ぶどう = case_when(f1 == "ぶどう" |
f2 == "ぶどう" |
f3 == "ぶどう" |
f4 == "ぶどう" ~ 1,
TRUE ~ 0),
メロン = case_when(f1 == "メロン" |
f2 == "メロン" |
f3 == "メロン" |
f4 == "メロン" ~ 1,
TRUE ~ 0)
)## # A tibble: 4 × 10
## id multi_answer f1 f2 f3 f4 りんご みかん ぶどう メロン
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 りんご,みかん,ぶどう りん… みか… ぶど… <NA> 1 1 1 0
## 2 2 りんご,ぶどう りん… ぶど… <NA> <NA> 1 0 1 0
## 3 3 メロン,ぶどう メロ… ぶど… <NA> <NA> 0 0 1 1
## 4 4 ぶどう,りんご,みか… ぶど… りん… みか… メロ… 1 1 1 1
# 回答選択肢ごとに列を作る関数作成
multi_answer <- function(data, answer){
data |>
mutate(!!sym({{answer}}) := case_when(f1 == answer |
f2 == answer |
f3 == answer |
f4 == answer ~ 1,
TRUE ~ 0))
}
multi_answer(df, "りんご")## # A tibble: 4 × 7
## id multi_answer f1 f2 f3 f4 りんご
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1 りんご,みかん,ぶどう りんご みかん ぶどう <NA> 1
## 2 2 りんご,ぶどう りんご ぶどう <NA> <NA> 1
## 3 3 メロン,ぶどう メロン ぶどう <NA> <NA> 0
## 4 4 ぶどう,りんご,みかん,メロン ぶどう りんご みかん メロン 1
## # A tibble: 4 × 10
## id multi_answer f1 f2 f3 f4 りんご みかん ぶどう メロン
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 りんご,みかん,ぶどう りん… みか… ぶど… <NA> 1 1 1 0
## 2 2 りんご,ぶどう りん… ぶど… <NA> <NA> 1 0 1 0
## 3 3 メロン,ぶどう メロ… ぶど… <NA> <NA> 0 0 1 1
## 4 4 ぶどう,りんご,みか… ぶど… りん… みか… メロ… 1 1 1 1
contents <-
c("りんご", "みかん", "ぶどう", "メロン")
df_multi <-
map(contents, \(x) multi_answer(df, x)) |>
reduce(full_join)
df_multi## # A tibble: 4 × 10
## id multi_answer f1 f2 f3 f4 りんご みかん ぶどう メロン
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 りんご,みかん,ぶどう りん… みか… ぶど… <NA> 1 1 1 0
## 2 2 りんご,ぶどう りん… ぶど… <NA> <NA> 1 0 1 0
## 3 3 メロン,ぶどう メロ… ぶど… <NA> <NA> 0 0 1 1
## 4 4 ぶどう,りんご,みか… ぶど… りん… みか… メロ… 1 1 1 1
across()の中で.names = 引数を入れるpenguins |>
summarise(across(c(bill_length_mm, bill_depth_mm),
list(
mean = \(x) mean(x, na.rm = TRUE),
n = \(x) sum(!is.na(x))),
.names = "{.fn}_{.col}"
)
)## # A tibble: 1 × 4
## mean_bill_length_mm n_bill_length_mm mean_bill_depth_mm n_bill_depth_mm
## <dbl> <int> <dbl> <int>
## 1 43.9 342 17.2 342
## vs n
## 1 0 18
## 2 1 14
## # A tibble: 1 × 2
## `0` `1`
## <int> <int>
## 1 18 14
## vs n name
## 1 0 18 vs
## 2 1 14 vs
## # A tibble: 1 × 2
## vs_0 vs_1
## <int> <int>
## 1 18 14
mtcars |>
count(vs) |>
pivot_wider(names_from = vs,
names_glue ="{.value}_{.name}",
values_from = n)## # A tibble: 1 × 2
## n_0 n_1
## <int> <int>
## 1 18 14
## # A tibble: 1 × 2
## n_0 n_1
## <int> <int>
## 1 18 14
## # A tibble: 11 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 12.3 18.7 0.657 0.518
## 2 cyl -0.111 1.05 -0.107 0.916
## 3 disp 0.0133 0.0179 0.747 0.463
## 4 hp -0.0215 0.0218 -0.987 0.335
## 5 drat 0.787 1.64 0.481 0.635
## 6 wt -3.72 1.89 -1.96 0.0633
## 7 qsec 0.821 0.731 1.12 0.274
## 8 vs 0.318 2.10 0.151 0.881
## 9 am 2.52 2.06 1.23 0.234
## 10 gear 0.655 1.49 0.439 0.665
## 11 carb -0.199 0.829 -0.241 0.812
## # A tibble: 11 × 6
## term estimate std_estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 12.3 NA 18.7 0.657 0.518
## 2 cyl -0.111 -0.0330 1.05 -0.107 0.916
## 3 disp 0.0133 0.274 0.0179 0.747 0.463
## 4 hp -0.0215 -0.244 0.0218 -0.987 0.335
## 5 drat 0.787 0.0698 1.64 0.481 0.635
## 6 wt -3.72 -0.603 1.89 -1.96 0.0633
## 7 qsec 0.821 0.243 0.731 1.12 0.274
## 8 vs 0.318 0.0266 2.10 0.151 0.881
## 9 am 2.52 0.209 2.06 1.23 0.234
## 10 gear 0.655 0.0802 1.49 0.439 0.665
## 11 carb -0.199 -0.0534 0.829 -0.241 0.812
## # A tibble: 11 × 12
## term mpg cyl disp hp drat wt qsec vs am
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mpg NA -0.852 -0.848 -0.776 0.681 -0.868 0.419 0.664 0.600
## 2 cyl -0.852 NA 0.902 0.832 -0.700 0.782 -0.591 -0.811 -0.523
## 3 disp -0.848 0.902 NA 0.791 -0.710 0.888 -0.434 -0.710 -0.591
## 4 hp -0.776 0.832 0.791 NA -0.449 0.659 -0.708 -0.723 -0.243
## 5 drat 0.681 -0.700 -0.710 -0.449 NA -0.712 0.0912 0.440 0.713
## 6 wt -0.868 0.782 0.888 0.659 -0.712 NA -0.175 -0.555 -0.692
## 7 qsec 0.419 -0.591 -0.434 -0.708 0.0912 -0.175 NA 0.745 -0.230
## 8 vs 0.664 -0.811 -0.710 -0.723 0.440 -0.555 0.745 NA 0.168
## 9 am 0.600 -0.523 -0.591 -0.243 0.713 -0.692 -0.230 0.168 NA
## 10 gear 0.480 -0.493 -0.556 -0.126 0.700 -0.583 -0.213 0.206 0.794
## 11 carb -0.551 0.527 0.395 0.750 -0.0908 0.428 -0.656 -0.570 0.0575
## # ℹ 2 more variables: gear <dbl>, carb <dbl>
## # A tibble: 3 × 4
## term mpg disp wt
## <chr> <dbl> <dbl> <dbl>
## 1 mpg NA -0.848 -0.868
## 2 disp -0.848 NA 0.888
## 3 wt -0.868 0.888 NA
## # A tibble: 3 × 4
## term mpg disp wt
## <chr> <dbl> <dbl> <dbl>
## 1 mpg NA NA NA
## 2 disp -0.848 NA NA
## 3 wt -0.868 0.888 NA
## A1 A2 C1 C2
## A1 2784 2757 2764 2761
## A2 2757 2773 2752 2752
## C1 2764 2752 2779 2755
## C2 2761 2752 2755 2776
## attr(,"class")
## [1] "n_mat" "matrix"
## # A tibble: 4 × 4
## A1 A2 C1 C2
## <n_mat> <n_mat> <n_mat> <n_mat>
## 1 2784 2757 2764 2761
## 2 2757 2773 2752 2752
## 3 2764 2752 2779 2755
## 4 2761 2752 2755 2776
mtcars |>
select(mpg, disp, wt, cyl) |>
nest(data = !cyl) |>
mutate(
correlations = map(data, correlate)
) |>
unnest(correlations)## # A tibble: 9 × 6
## cyl data term mpg disp wt
## <dbl> <list> <chr> <dbl> <dbl> <dbl>
## 1 6 <tibble [7 × 3]> mpg NA 0.103 -0.682
## 2 6 <tibble [7 × 3]> disp 0.103 NA 0.473
## 3 6 <tibble [7 × 3]> wt -0.682 0.473 NA
## 4 4 <tibble [11 × 3]> mpg NA -0.805 -0.713
## 5 4 <tibble [11 × 3]> disp -0.805 NA 0.857
## 6 4 <tibble [11 × 3]> wt -0.713 0.857 NA
## 7 8 <tibble [14 × 3]> mpg NA -0.520 -0.650
## 8 8 <tibble [14 × 3]> disp -0.520 NA 0.755
## 9 8 <tibble [14 × 3]> wt -0.650 0.755 NA
## # A tibble: 9 × 5
## # Groups: cyl [3]
## cyl term mpg disp wt
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 4 mpg NA -0.805 -0.713
## 2 4 disp -0.805 NA 0.857
## 3 4 wt -0.713 0.857 NA
## 4 6 mpg NA 0.103 -0.682
## 5 6 disp 0.103 NA 0.473
## 6 6 wt -0.682 0.473 NA
## 7 8 mpg NA -0.520 -0.650
## 8 8 disp -0.520 NA 0.755
## 9 8 wt -0.650 0.755 NA
# groupにする変数の水準を抜き出す
cyl_level <-
mtcars |>
count(cyl) |>
pull(cyl)
mtcars |>
select(mpg, disp, wt, cyl) |>
group_by(cyl) |>
group_map(~ correlate(.)) |> # ここは旧表記じゃないと動かない
map(\(x) shave(x)) |> # 相関行列の上側を
set_names(cyl_level) |> # group変数の水準を適用
bind_rows(.id = "cyl_level")## # A tibble: 9 × 5
## cyl_level term mpg disp wt
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 4 mpg NA NA NA
## 2 4 disp -0.805 NA NA
## 3 4 wt -0.713 0.857 NA
## 4 6 mpg NA NA NA
## 5 6 disp 0.103 NA NA
## 6 6 wt -0.682 0.473 NA
## 7 8 mpg NA NA NA
## 8 8 disp -0.520 NA NA
## 9 8 wt -0.650 0.755 NA
## 1.03 sec elapsed
## 1秒待つ: 1.02 sec elapsed
## 2秒待つ: 2.03 sec elapsed
remedyパッケージを読み込んだ後に修正したい範囲を指定し以下を実行
<- Addins > Align= Addins > AlignEqualcitation()citation("パッケージ名")RStudio.Version()## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# 空のワークブックとシートを作成
library(openxlsx)
wb <- createWorkbook()
addWorksheet(wb, "ペンギン")
addWorksheet(wb, "ペンギンraw")
# フォントをデフォルトからMS Pゴシックに変更
modifyBaseFont(wb, "MS PGothic")
# データをwbに入れる
writeData(wb, sheet = "ペンギン", penguins, startCol = 1, startRow = 1)
writeData(wb, sheet = "ペンギンraw", penguins_raw, startCol = 1, startRow = 1)
# 先頭行および先頭列固定
freezePane(wb, "ペンギン", firstRow = TRUE, firstCol = TRUE)
freezePane(wb, "ペンギンraw", firstRow = TRUE, firstCol = TRUE)
# 保存
saveWorkbook(wb, "out/penguins.xlsx", overwrite = TRUE)