<- readxl::read_excel("isca_cpe_2023/1. Anomalies in Payroll data.xlsx",
exp_claim_raw sheet = 1,
range = "A1:G33") %>%
::clean_names()
janitor
<- readxl::read_excel("isca_cpe_2023/1. Anomalies in Payroll data.xlsx",
hr_data_raw sheet = 2) %>%
::clean_names()
janitor
<- readxl::read_excel("isca_cpe_2023/1. Anomalies in Payroll data.xlsx",
pay_data_raw sheet = 3,
skip = 2, range = "A3:D25") %>%
::clean_names()
janitor
<- exp_claim_raw %>%
df_comb full_join(hr_data_raw, by = c('staff_id' = 'staff_id')) %>%
left_join(pay_data_raw, by = c('staff_id' = 'staff_id'))
7 Audit
[To my understanding] Audit includes tools and work stipulated by Standards. Audit Data Analytics (ADA) replaces excel-related tools with R/Python to improve efficiency/effectiveness. It does not necessarily reduce audit work required by ISCA. The following example is to audit expense claim based on data from payroll, hr, and finance departments, which demonstrates ADA is a vital move for auditors from all possible perspectives.
Compared to excel-related tools, it could be easily used to test audit assertions (e.g., occurrence, existence, completeness, cut-off, valuation, classification) after reconciled in terms of P2P, O2C, Payroll, R2R, GL.
1. benefit: version control diff
, lightweight size
, powerful 1m
rows, automation script
.
2. pattern recognition: spot deviation and inconsistency.
It also addresses common mistakes throughout the audit process. For instance,
1. version control: which version of PBC data is the latest?
2. reproducible: my result is different from yours after rerun.
3. report: check if number in working papers tally to those in financial statement.
4. automation: roll out audit work next year by copy+paste.
7.1 Cleaning
<- df_comb %>%
df_clean mutate(across(contains("date"), lubridate::dmy)) %>%
mutate(on_leave = lubridate::dmy(on_leave)) %>%
mutate(staff_name = coalesce(staff_name, name.x))
# check if amount is correct
sum(df_clean$amount_s.x, na.rm = TRUE)
%>%
df_clean distinct(staff_id, amount_s.y) %>%
summarise(app_c = sum(amount_s.y, na.rm = TRUE))
<- list("comb" = df_comb, "clean" = df_clean)
sheets ::write_xlsx(sheets, here::here(paste0('audit_sit/audit_payroll', Sys.Date(), '.xlsx')))
writexl::openXL(here::here("audit_sit/audit_payroll2023-12-22.xlsx"))
openxlsx
<- readxl::read_excel(here::here("audit_sit/audit_payroll2023-12-22.xlsx")) %>%
df_clean mutate(across(c(contains("date"), on_leave), lubridate::dmy))
7.2 Procedure
# cross check payroll amount against finance amount
%>%
df_clean group_by(staff_id, staff_name) %>%
summarise(amt_exp = sum(amount_s.x),
amt_paid = sum(amount_s.y) / n(),
amt_diff = amt_exp - amt_paid,
.groups = 'drop')
# compare date to ensure no claim happens before incurred or after resigned
%>%
df_clean ::filter(claim_date > expense_date)
dplyr
%>%
df_clean ::filter(claim_date > last_date | claim_date == on_leave) dplyr
# identify multiple claims for the same expense
%>%
df_clean group_by(staff_id, staff_name, purpose, amount_s.x) %>%
::filter(n() > 1) dplyr
# ensure staff name and their bank account number updated timely
%>%
df_clean ::filter(!is.na(edits_to_hr_data),
dplyr== bank_account_no.y)
bank_account_no.x
%>%
df_clean ::filter(name.x != name.y) dplyr
# produces audit working paper
library(pointblank)
<- df_clean %>%
ag create_agent(label = "A very *simple* example.", tbl_name = "payroll") %>%
col_vals_between(columns = claim_date, left = vars(expense_date), right = vars(last_date)) %>%
interrogate()
ag
7.3 Enhanced
%>%
df_clean count(staff_name, sort = TRUE)
%>%
df_clean ::filter(grepl("\\d+?", purpose)) %>%
dplyrmutate(purpose = gsub("\\d+?", "", purpose)) %>%
mutate(across(where(is.character), ~na_if(., "AB99"))) %>%
mutate(staff_id = replace_na(staff_id, 0))
%>%
df_clean select(contains("date"), purpose) %>%
mutate(if_taxi = case_when(str_detect(purpose, "Taxi") ~ "taxi",
TRUE ~ "other"),
total_date = lubridate::floor_date(claim_date, "week"),
first_date = first(total_date)) %>%
slice_max(order_by = claim_date, n = 3)
%>%
df_clean ::filter(!is.na(amount_s.x)) %>%
dplyrmutate(new = (amount_s.x %/% 100) * 100) %>%
group_by(new, amount_s.x > 300) %>%
summarise(new1 = mean(amount_s.x), .groups = 'drop')
%>%
df_clean ::filter(!is.na(staff_name)) %>%
dplyrgroup_nest(staff_id, staff_name) %>%
mutate(new = map(data, ~pluck(.x, 4))) %>%
mutate(new1 = map(new, ~paste(.x, collapse = '|'))) %>%
select(-data, -new) %>%
unnest(new1)
%>%
df_clean ::filter(!is.na(staff_name)) %>%
dplyrselect(staff_id, staff_name, purpose) %>%
summarise(new1 = paste(purpose, collapse = '|'), .by = c(staff_id, staff_name))
%>%
df_clean select(staff_id, staff_name, division, purpose, amount_s.x) %>%
::filter(!is.na(purpose)) %>%
dplyrseparate(purpose, into = c("type", "info"),
extra = 'merge', remove = FALSE, fill = 'right') %>%
group_by(division, type) %>%
summarise(n = n(),
amt_type = sum(amount_s.x), .groups = 'drop') %>%
arrange(-amt_type)
library(lubridate)
%>%
df_clean pivot_longer(cols = where(is.Date),
names_to = 'activity_date',
values_to = 'detail_date',
names_pattern = "(.*)_.*",
names_transform = list(activity_date = toupper))