Title: | Tools for Cleaning Up Messy Files |
---|---|
Description: | Some tools for cleaning up messy 'Excel' files to be suitable for R. People who have been working with 'Excel' for years built more or less complicated sheets with names, characters, formats that are not homogeneous. To be able to use them in R nowadays, we built a set of functions that will avoid the majority of importation problems and keep all the data at best. |
Authors: | Vincent Guyader [aut, cre] , Sébastien Rochette [aut] , ThinkR [cph] |
Maintainer: | Vincent Guyader <[email protected]> |
License: | GPL-3 |
Version: | 0.16 |
Built: | 2024-12-14 03:21:38 UTC |
Source: | https://github.com/ThinkR-open/thinkr |
Only usefull during package developpement using testthat package
.efface_test()
.efface_test()
not in
x %ni% table
x %ni% table
x |
vector or NULL: the values to be matched |
table |
the values to be matched against |
"a" %ni% letters "coucou" %ni% letters
"a" %ni% letters "coucou" %ni% letters
Save all ggplot in a pptx
all_ggplot_to_pptx( out = "tous_les_graphs.pptx", open = TRUE, png = TRUE, folder = "dessin", global = TRUE )
all_ggplot_to_pptx( out = "tous_les_graphs.pptx", open = TRUE, png = TRUE, folder = "dessin", global = TRUE )
out |
output file name |
open |
booleen open file after creation |
png |
booleen also save as png |
folder |
png's folder |
global |
booleen use .GlobalEnv |
## Not run: all_ggplot_to_pptx() ## End(Not run)
## Not run: all_ggplot_to_pptx() ## End(Not run)
Transform a vector into numeric if meaningful, even with bad decimal, space or %
as_mon_numeric(vec)
as_mon_numeric(vec)
vec |
a vector |
Note that text and factors are not transformed as numeric (except FALSE, TRUE, F, T), contrary to R default behavior with 'as.numeric(factor())'
a numeric vector
as_mon_numeric(c("1", "0", "1")) as_mon_numeric(c("1.3", "1,5", "1;6", "16%", "17 87 ")) as_mon_numeric(c(TRUE, "A", "F")) as_mon_numeric(c(TRUE, TRUE, FALSE)) as_mon_numeric(factor(c("toto", "tata", "toto")))
as_mon_numeric(c("1", "0", "1")) as_mon_numeric(c("1.3", "1,5", "1;6", "16%", "17 87 ")) as_mon_numeric(c(TRUE, "A", "F")) as_mon_numeric(c(TRUE, TRUE, FALSE)) as_mon_numeric(factor(c("toto", "tata", "toto")))
Clean levels label
clean_levels(vec, verbose = FALSE, translit = FALSE, punct = FALSE)
clean_levels(vec, verbose = FALSE, translit = FALSE, punct = FALSE)
vec |
a factor |
verbose |
booleen is the function verbose |
translit |
booleen remove non ascii character |
punct |
booleen do you remove punctuation |
clean_names
clean_names(dataset, verbose = FALSE, translit = TRUE)
clean_names(dataset, verbose = FALSE, translit = TRUE)
dataset |
a dataframe |
verbose |
logical |
translit |
logical remove non ascii character |
a dataframe
data(iris) clean_names(iris)
data(iris) clean_names(iris)
Clean character vector
clean_vec( vec, verbose = FALSE, unique = TRUE, keep_number = FALSE, translit = TRUE, punct = TRUE )
clean_vec( vec, verbose = FALSE, unique = TRUE, keep_number = FALSE, translit = TRUE, punct = TRUE )
vec |
character vector to clean |
verbose |
logical is the function verbose |
unique |
logical do we have to apply make_unique |
keep_number |
logical keep number at begining |
translit |
logical remove non ascii character |
punct |
logical do you remove punctuation |
return R instruction to create levels
dput_levels(vec)
dput_levels(vec)
vec |
a factor or character vector |
a R instruction
dput_levels(iris$Species)
dput_levels(iris$Species)
ncol_to_excel
returns excel column name from a position number. excel_to_ncol
returns excel column position number from a column name. excel_col
returns all excel column name.
ncol_to_excel(n) excel_to_ncol(col_name) excel_col()
ncol_to_excel(n) excel_to_ncol(col_name) excel_col()
n |
the column position |
col_name |
the culumn name |
ncol_to_excel(35) excel_to_ncol("BF") excel_col() ncol_to_excel(1:6) excel_to_ncol(c('AF', 'AG', 'AH'))
ncol_to_excel(35) excel_to_ncol("BF") excel_col() ncol_to_excel(1:6) excel_to_ncol(c('AF', 'AG', 'AH'))
find pattern in name's dataset
find_name(dataset, pattern)
find_name(dataset, pattern)
dataset |
a data.frame (or list or anything with names parameter) |
pattern |
pattern we are looking for |
a list with position and value
find_name(iris,"Sepal")
find_name(iris,"Sepal")
transform the excel numeric date format into POSIXct
from_excel_to_posixt(vec, origin = "1904-01-01")
from_excel_to_posixt(vec, origin = "1904-01-01")
vec |
a vector |
origin |
a date-time object, or something which can be coerced by as.POSIXct(tz = "GMT") to such an object. |
like gsub but keep a factor as factor
gsub2(x, ...)
gsub2(x, ...)
x |
a vector |
... |
les parametres de la fonction gsub |
a vector
detects if a character vector is only made with figures. Useful when you
is_full_figures(.)
is_full_figures(.)
. |
a vector of character (and eventually NA's) |
a boolean
is_full_figures(c(NA,"0","25.3")) is_full_figures((c(NA,"0","25_3")))
is_full_figures(c(NA,"0","25.3")) is_full_figures((c(NA,"0","25_3")))
is_full_na test if the vector is full of NA's
is_full_na(.)
is_full_na(.)
. |
a vector |
a vector of boolean
is_full_na(c(NA, NA, NA))
is_full_na(c(NA, NA, NA))
is a factor a likert scale
is_likert(vec, lev)
is_likert(vec, lev)
vec |
a factor |
lev |
le scale |
boolean
is_likert(iris$Species,c("setosa","versicolor","virginica")) is_likert(iris$Species,c("setosa","versicolor","virginica","banana")) is_likert(iris$Species,c("setosa","versicolor"))
is_likert(iris$Species,c("setosa","versicolor","virginica")) is_likert(iris$Species,c("setosa","versicolor","virginica","banana")) is_likert(iris$Species,c("setosa","versicolor"))
does this vector only contains 0 and 1
is.01(x)
is.01(x)
x |
a vector |
a boolean
is.01(c(0,1,0,0,1)) is.01(c(0,1,0,0,5))
is.01(c(0,1,0,0,1)) is.01(c(0,1,0,0,5))
does this vector only contains 1 and 2
is.12(x)
is.12(x)
x |
a vector |
a boolean
is.12(c(1,1,2,1,2)) is.12(c(1,1,2,1,5))
is.12(c(1,1,2,1,2)) is.12(c(1,1,2,1,5))
return TRUE if this look like a number
look_like_a_number(vec)
look_like_a_number(vec)
vec |
a vector |
un booleen
make.unique improvement
make_unique(vec, sep = "_")
make_unique(vec, sep = "_")
vec |
a vector |
sep |
char separator to use |
a vector
make_unique(c("a","a","a","b","a","b","c"))
make_unique(c("a","a","a","b","a","b","c"))
peep some data at one step of a pipeline.
peep(data, ..., printer = print, verbose = FALSE)
peep(data, ..., printer = print, verbose = FALSE)
data |
some data |
... |
function names or expressions that use |
printer |
which function use to print |
verbose |
TRUE to include what is printed |
the input data
if( require(magrittr) ){ # just symbols iris %>% peep(head,tail) %>% summary # expressions with . iris %>% peep(head(., n=2),tail(., n=3) ) %>% summary # or both iris %>% peep(head,tail(., n=3) ) %>% summary # use verbose to see what happens iris %>% peep(head,tail(., n=3), verbose = TRUE) %>% summary }
if( require(magrittr) ){ # just symbols iris %>% peep(head,tail) %>% summary # expressions with . iris %>% peep(head(., n=2),tail(., n=3) ) %>% summary # or both iris %>% peep(head,tail(., n=3) ) %>% summary # use verbose to see what happens iris %>% peep(head,tail(., n=3), verbose = TRUE) %>% summary }
Replace pattern everywhere in a data.frame
replace_pattern(dataset, pattern, replacement, exact = FALSE)
replace_pattern(dataset, pattern, replacement, exact = FALSE)
dataset |
a data.frame |
pattern |
Pattern to look for. |
replacement |
A character of replacements. |
exact |
a boolean if TRUE the whole value need ton match |
a data.frame
dataset <- data.frame( col_a = as.factor(letters)[1:7], col_b = letters[1:7], col_c = 1:7, col_d = paste0(letters[1:7], letters[1:7]), stringsAsFactors = FALSE ) # replace pattern replace_pattern(dataset, "a", "XXX-") # With exact matching replace_pattern(dataset, "a", "XXX-", exact = TRUE)
dataset <- data.frame( col_a = as.factor(letters)[1:7], col_b = letters[1:7], col_c = 1:7, col_d = paste0(letters[1:7], letters[1:7]), stringsAsFactors = FALSE ) # replace pattern replace_pattern(dataset, "a", "XXX-") # With exact matching replace_pattern(dataset, "a", "XXX-", exact = TRUE)
export a data.frame to csv
save_as_csv(dataset, path, row.names = FALSE, ...)
save_as_csv(dataset, path, row.names = FALSE, ...)
dataset |
a data.frame |
path |
the path |
row.names |
booleen do we have to save the row names |
... |
other write.csv parameters |
file name as character
## Not run: iris %>% save_as_csv(file.path(tempdir(),'coucou.csv')) %>% browseURL() ## End(Not run)
## Not run: iris %>% save_as_csv(file.path(tempdir(),'coucou.csv')) %>% browseURL() ## End(Not run)
set a given coltype to each column in a data.frame
set_col_type(dataset, col_type)
set_col_type(dataset, col_type)
dataset |
a data.frame |
col_type |
a character vector containing the class to apply |
a data.frame