SimGBS is a rapid method for simulating Genotyping-By-Sequencing data (GBS). It can be implemented with any genome of choice. Users can modify different parameters to customise GBS settings, such as the choice of restriction enzyme and sequencing depth. By taking the gene-drop approach, users can also specify the demographic history and define population structure (by supplying a pedigree file). Like real sequencers, SimGBS outputs data in FASTQ format.
This document aims to demonstrate how to run SimGBS
in R using the JuliaCall
package.
All Julia commands are executed seamlessly within the R environment,
allowing you to access the full functionality of SimGBS without leaving
R.It mirrors the Julia-based
Colab notebook, but executes all operations through R, delegating
computation to Julia behind the scenes.
# ===============================================================
# JuliaCall requires access to a local Julia installation.
# Below are examples for macOS, Windows, and Linux systems.
# See: https://github.com/JuliaInterop/JuliaCall?tab=readme-ov-file#julia-is-not-found
# ===============================================================
## --- macOS (recommended: Juliaup via Homebrew) ---
# Install Juliaup (official Julia version manager)
# Run these commands in Terminal (not R):
# brew install juliaup
# juliaup default release
# This typically installs Julia under:
# /opt/homebrew/Cellar/juliaup/<version>/bin/
# Verify path with:
# which julia
# Then specify in R:
# julia <- julia_setup(JULIA_HOME = "/opt/homebrew/Cellar/juliaup/1.18.5/bin/")
## --- Windows ---
# Download from: https://julialang.org/downloads/
# During installation, tick "Add Julia to PATH".
# Then specify in R:
# julia <- julia_setup(JULIA_HOME = "C:/Program Files/Julia-1.10.5/bin/")
## --- Linux ---
# Option 1: Install via package manager
# sudo apt update
# sudo apt install julia
#
# Option 2: Manual installation
# wget https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-1.10.5-linux-x86_64.tar.gz
# tar -xzf julia-1.10.5-linux-x86_64.tar.gz
# sudo mv julia-1.10.5 /opt/
# sudo ln -s /opt/julia-1.10.5/bin/julia /usr/local/bin/julia
#
# Then specify in R:
# julia <- julia_setup(JULIA_HOME = "/opt/julia-1.10.5/bin/")
## --- Verify connection ---
# After installation, check JuliaCall can see Julia:
# JuliaCall::julia_command("VERSION")
# Expected output: v"1.10.5" or similar
if (!requireNamespace("JuliaCall", quietly = TRUE))
install.packages("JuliaCall", repos = "https://cloud.r-project.org")
library(JuliaCall)
# Initialise JuliaCall and point explicitly to your local Julia binary.
# Replace the path below with the actual location of your Julia executable if different.
# (e.g. on macOS installed via Juliaup: "/opt/homebrew/Cellar/juliaup/1.18.5/bin/")
# For troubleshooting "Julia is not found" errors, see:
# https://github.com/JuliaInterop/JuliaCall?tab=readme-ov-file#julia-is-not-found
suppressWarnings(try(JuliaCall::julia_stop(), silent = TRUE))
julia <- julia_setup(JULIA_HOME = "/opt/homebrew/Cellar/juliaup/1.18.5/bin/")
# Confirm Julia version and executable path
julia_command('println("Julia version: ", VERSION)')
# Try the registered release first; fall back to GitHub if needed
julia_command('
begin
import Pkg
try
Pkg.add("SimGBS")
catch
Pkg.add(url = "https://github.com/kanji709/SimGBS.jl")
end
using SimGBS
println("✔ SimGBS loaded successfully")
end
')
julia_eval('
println((
digestGenome = isdefined(SimGBS, :digestGenome),
definePopulation = isdefined(SimGBS, :definePopulation),
GBS = isdefined(SimGBS, :GBS),
ApeKI = isdefined(SimGBS, :ApeKI)
))
')
## NULL
julia_command('
begin
using Downloads
cd(pwd())
Downloads.download("https://github.com/kanji709/SimGBS.jl/raw/master/example/ref.fa.gz", "ref.fa.gz")
Downloads.download("https://github.com/kanji709/SimGBS.jl/raw/master/example/GBS_Barcodes.txt", "GBS_Barcodes.txt")
println("✔ Example data downloaded: ", readdir())
end
')
julia_command('
begin
using SimGBS
# ---------- Step 1 : Generate GBS Fragments ----------
genofile = "ref.fa.gz"
re = [SimGBS.ApeKI]
useChr = [1]
useChrLen = Float64[]
lower = 65
upper = 195
winSize = 1_000_000
# ---------- Step 2 : Define Population Structure ----------
numFounders = 100
endSize = 1000
numGenCha = 20
numGenCon = 50
numGenFinal = 4
numInd = 96
useWeights = Float64[]
usePedigree = false
pedFile = "newPed.ped"
pedOutput = false
# ---------- Step 3 : Simulate GBS Process ----------
totalQTL = 100
totalSNP = 0
muDensity = -2.0
sigmasqDensity = 0.001
muAlleleFreq = 0.5
sigmasqAlleleFreq = 0.0061
meanDepth = 20.0
barcodeFile = "GBS_Barcodes.txt"
plotOutput = true # set to FALSE if GR backend fails
writeOutput = true
onlyOutputGBS = true
println("Running SimGBS example ...")
@time digestGenome(genofile, re, useChr, useChrLen, lower, upper, winSize, plotOutput, writeOutput)
@time definePopulation(numFounders, endSize, numGenCha, numGenCon, numGenFinal,
numInd, useWeights, usePedigree, pedFile, pedOutput)
@time GBS(totalQTL, totalSNP, muDensity, sigmasqDensity, winSize,
muAlleleFreq, sigmasqAlleleFreq, re, meanDepth,
barcodeFile, useChr, plotOutput, writeOutput, onlyOutputGBS)
println("✔ SimGBS run completed.")
end
')
list.files(pattern = "txt|csv|gz|pdf|png")
## [1] "ABC12AAXX_1_fastq.txt.gz" "GBS_Barcodes.txt"
## [3] "GBSCoverage.png" "GBSCoverage.txt"
## [5] "GBSFrag.txt" "GBSFragSize.png"
## [7] "keyFile_ABC12AAXX_1.txt" "qtlAF.png"
## [9] "qtlGeno.txt" "qtlInfo.txt"
## [11] "RawFrag.txt" "RawFragSize.png"
## [13] "readDepth.txt" "ref.fa.gz"
## [15] "shortHap.txt" "snpAF.png"
## [17] "snpDepth.txt" "snpFragGBS.txt"
## [19] "snpGeno.txt" "snpInfo.txt"
## [21] "snpPerTag.png"
sessionInfo()
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.0.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Australia/Sydney
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] JuliaCall_0.17.6
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.37 R6_2.6.1 fastmap_1.2.0 xfun_0.52
## [5] cachem_1.1.0 knitr_1.50 htmltools_0.5.8.1 rmarkdown_2.29
## [9] lifecycle_1.0.4 cli_3.6.5 sass_0.4.10 jquerylib_0.1.4
## [13] compiler_4.5.0 rstudioapi_0.17.1 tools_4.5.0 evaluate_1.0.4
## [17] bslib_0.9.0 Rcpp_1.1.0 yaml_2.3.10 rlang_1.1.6
## [21] jsonlite_2.0.0