diff --git a/Scripts/pgs_methods/dbslmm.R b/Scripts/pgs_methods/dbslmm.R
index 2df25cc0..7943d4e7 100644
--- a/Scripts/pgs_methods/dbslmm.R
+++ b/Scripts/pgs_methods/dbslmm.R
@@ -167,10 +167,10 @@ if(any(ldsc_h2*opt$h2f > 1)){
 # Save in plink1 format for DBSLMM
 if(!is.null(opt$ref_keep)){
   log_add(log_file = log_file, message = 'ref_keep used to subset reference genotype data.')
-  plink_subset(pfile = opt$ref_plink_chr, make_bed = T, out = paste0(tmp_dir,'/ref_subset_chr'), plink2 = opt$plink2, chr = CHROMS, keep = opt$ref_keep, memory = opt$memory)
+  plink_subset(pfile = opt$ref_plink_chr, make_bed = T, out = paste0(tmp_dir,'/ref_subset_chr'), extract = gwas$SNP, plink2 = opt$plink2, chr = CHROMS, keep = opt$ref_keep, memory = opt$memory)
   opt$ref_plink_chr_subset<-paste0(tmp_dir,'/ref_subset_chr')
 } else {
-  plink_subset(pfile = opt$ref_plink_chr, make_bed = T, out = paste0(tmp_dir,'/ref_subset_chr'), plink2 = opt$plink2, chr = CHROMS, memory = opt$memory)
+  plink_subset(pfile = opt$ref_plink_chr, make_bed = T, out = paste0(tmp_dir,'/ref_subset_chr'), extract = gwas$SNP, plink2 = opt$plink2, chr = CHROMS, memory = opt$memory)
   opt$ref_plink_chr_subset<-paste0(tmp_dir,'/ref_subset_chr')
 }
 
diff --git a/Scripts/pgs_methods/lassosum.R b/Scripts/pgs_methods/lassosum.R
index 99062bc7..dfcc7c51 100644
--- a/Scripts/pgs_methods/lassosum.R
+++ b/Scripts/pgs_methods/lassosum.R
@@ -18,8 +18,10 @@ option_list = list(
 	    help="Path PLINK v2 software binary [optional]"),
 	make_option("--output", action="store", default=NULL, type='character',
 			help="Path for output files [required]"),
-  make_option("--n_cores", action="store", default=1, type='numeric',
-	    help="Number of cores to use [optional]"),
+	make_option("--n_cores", action="store", default=1, type='numeric',
+      help="Number of cores to use [optional]"),
+	make_option("--pseudo_only", action="store", default=F, type='logical',
+      help="Logical indicating whether only pseudovalidated model should be output [optional]"),
 	make_option("--test", action="store", default=NA, type='character',
 	    help="Specify number of SNPs to include [optional]"),
 	make_option("--sumstats", action="store", default=NULL, type='character',
@@ -143,7 +145,7 @@ out <- lassosum.pipeline(
 # Change working directory back to the original
 setwd(orig_wd)
 
-# Write out a score file
+# Format score file
 score_file <- data.table(SNP = gwas$SNP[out$sumstats$order], out$sumstats[c('A1', 'A2')])
 
 for(i in 1:length(out$s)){
@@ -154,21 +156,6 @@ for(i in 1:length(out$s)){
   }
 }
 
-# Flip effects to match reference alleles
-ref <- read_pvar(opt$ref_plink_chr, chr = CHROMS)[, c('SNP','A1','A2'), with=F]
-score_new <- map_score(ref = ref, score = score_file)
-
-# Reduce number of significant figures to save space
-score_new[, (4:ncol(score_new)) := lapply(.SD, signif, digits = 7), .SDcols = 4:ncol(score_new)]
-
-fwrite(score_new, paste0(opt$output,'.score'), col.names=T, sep=' ', quote=F)
-
-if(file.exists(paste0(opt$output,'.score.gz'))){
-  system(paste0('rm ',opt$output,'.score.gz'))
-}
-
-system(paste0('gzip ',opt$output,'.score'))
-
 #####
 # Perform pseudovalidation
 #####
@@ -190,7 +177,26 @@ log_add(log_file = log_file, message = c(
   paste0('s = ', out2$s),
   paste0('lambda = ', out2$lambda),
   paste0('value = ', v$validation.table$value[v$validation.table$lambda == v$best.lambda & v$validation.table$s == v$best.s])
-  ))
+))
+
+if(opt$pseudo_only){
+  score_file <- score_file[, c('SNP','A1','A2',paste0('SCORE_s', out2$s, '_lambda', out2$lambda)), with=F]
+}
+
+# Flip effects to match reference alleles
+ref <- read_pvar(opt$ref_plink_chr, chr = CHROMS)[, c('SNP','A1','A2'), with=F]
+score_new <- map_score(ref = ref, score = score_file)
+
+# Reduce number of significant figures to save space
+score_new[, (4:ncol(score_new)) := lapply(.SD, signif, digits = 7), .SDcols = 4:ncol(score_new)]
+
+fwrite(score_new, paste0(opt$output,'.score'), col.names=T, sep=' ', quote=F)
+
+if(file.exists(paste0(opt$output,'.score.gz'))){
+  system(paste0('rm ',opt$output,'.score.gz'))
+}
+
+system(paste0('gzip ',opt$output,'.score'))
 
 # Record end time of test
 if(!is.na(opt$test)){
diff --git a/Scripts/pgs_methods/lassosum2.R b/Scripts/pgs_methods/lassosum2.R
new file mode 100644
index 00000000..2c036b44
--- /dev/null
+++ b/Scripts/pgs_methods/lassosum2.R
@@ -0,0 +1,249 @@
+#!/usr/bin/Rscript
+# This script was written by Oliver Pain whilst at King's College London University.
+start.time <- Sys.time()
+library("optparse")
+
+option_list = list(
+  make_option("--ref_plink_chr", action="store", default=NULL, type='character',
+      help="Path to per chromosome reference PLINK files [required]"),
+  make_option("--ref_pcs", action="store", default=NULL, type='character',
+      help="Reference PCs for continuous ancestry correction [optional]"),
+  make_option("--ldpred2_ref_dir", action="store", default=NULL, type='character',
+      help="Path to directory containing LDpred2 reference data [required]"),
+  make_option("--pop_data", action="store", default=NULL, type='character',
+      help="File containing the population code and location of the keep file [required]"),
+  make_option("--plink2", action="store", default='plink2', type='character',
+      help="Path PLINK v2 software binary [optional]"),
+  make_option("--output", action="store", default=NULL, type='character',
+      help="Path for output files [required]"),
+  make_option("--n_cores", action="store", default=1, type='numeric',
+      help="Number of cores for parallel computing [optional]"),
+  make_option("--sample_prev", action="store", default=NULL, type='numeric',
+      help="Sampling ratio in GWAS [optional]"),
+  make_option("--test", action="store", default=NA, type='character',
+      help="Specify number of SNPs to include [optional]"),
+  make_option("--binary", action="store", default=F, type='logical',
+      help="Specify T if GWAS phenotyp is binary [optional]"),
+  make_option("--seed", action="store", default=1, type='numeric',
+      help="Set seed to ensure reproducibility  [optional]"),
+  make_option("--sumstats", action="store", default=NULL, type='character',
+      help="GWAS summary statistics [required]")
+)
+
+opt = parse_args(OptionParser(option_list = option_list))
+
+# Load dependencies
+library(GenoUtils)
+library(data.table)
+source('../functions/misc.R')
+source_all('../functions')
+library(bigsnpr)
+library(ggplot2)
+
+# Check required inputs
+if(is.null(opt$ref_plink_chr)){
+  stop('--ref_plink_chr must be specified.\n')
+}
+if(is.null(opt$ldpred2_ref_dir)){
+  stop('--ldpred2_ref_dir must be specified.\n')
+}
+if(is.null(opt$sumstats)){
+  stop('--sumstats must be specified.\n')
+}
+if(is.null(opt$pop_data)){
+  stop('--pop_data must be specified.\n')
+}
+if(is.null(opt$output)){
+  stop('--output must be specified.\n')
+}
+
+# Create output directory
+opt$output_dir <- paste0(dirname(opt$output),'/')
+system(paste0('mkdir -p ',opt$output_dir))
+
+# Create temp directory
+tmp_dir<-tempdir()
+
+# Initiate log file
+log_file <- paste0(opt$output,'.log')
+log_header(log_file = log_file, opt = opt, script = 'lassosum2.R', start.time = start.time)
+
+# If testing, change CHROMS to chr value
+if(!is.na(opt$test) && opt$test == 'NA'){
+  opt$test<-NA
+}
+if(!is.na(opt$test)){
+  CHROMS <- as.numeric(gsub('chr','',opt$test))
+}
+
+# Format the binary parameter
+if(!is.logical(opt$binary)){
+  opt$binary <- ifelse(opt$binary == 'T', T, F)
+}
+
+if(opt$binary & is.null(opt$sample_prev)){
+  stop('--sample_prev must be specified when --binary T.\n')
+}
+
+#####
+# Read in sumstats
+#####
+
+log_add(log_file = log_file, message = 'Reading in GWAS.')
+
+# Read in, check and format GWAS summary statistics
+sumstats <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','BP','A1','A2','BETA','SE','N','P'))
+
+# Update header for bigsnpr
+names(sumstats)<-c('chr','rsid','pos','a1','a0','beta','beta_se','n_eff','p')
+
+# In binary, update N to be effective N based on opt$sample_prev
+if(opt$binary){
+  ncas<-sumstats$n_eff*opt$sample_prev
+  ncon<-sumstats$n_eff*(1-opt$sample_prev)
+  sumstats$n_eff<-4 / (1/ncas + 1/ncon)
+  log_add(log_file = log_file, message = paste0('Median effective N = ', median(sumstats$n_eff)))
+}
+
+# Record start time for test
+if(!is.na(opt$test)){
+  test_start.time <- test_start(log_file = log_file)
+}
+
+# Harmonise with the LDpred2 reference
+map<-readRDS(paste0(opt$ldpred2_ref_dir, '/map.rds'))
+names(map)[names(map) == 'af_UKBB']<-'af'
+map<-map[, c('chr', 'pos', 'a0', 'a1', 'af', 'ld')]
+info_snp <- snp_match(sumstats, map)
+
+#####
+# Perform additional suggested QC for LDpred2
+#####
+
+# Remove SDss < 0.5 * SDval or SDss > 0.1 + SDval or SDss < 0.1 or SDval < 0.05
+sd_val <- with(info_snp, sqrt(2 * af * (1 - af)))
+
+if(opt$binary == F){
+  sd_y_est = median(sd_val * info_snp$beta_se * sqrt(info_snp$n_eff))
+  sd_ss = with(info_snp, sd_y_est / sqrt(n_eff * beta_se^2))
+} else {
+  sd_ss <- with(info_snp, 2 / sqrt(n_eff * beta_se^2))
+}
+
+is_bad <-sd_ss < (0.5 * sd_val) | sd_ss > (sd_val + 0.1) | sd_ss < 0.1 | sd_val < 0.05
+
+png(paste0(opt$output_dir,'/LDpred2_sd_qc.png'), res=300, unit='px',height=2000, width=2000)
+  plot_obj <- qplot(sd_val, sd_ss, color = is_bad) +
+    theme_bigstatsr() +
+    coord_equal() +
+    scale_color_viridis_d(direction = -1) +
+    geom_abline(linetype = 2, color = "red") +
+    labs(x = "Standard deviations in the validation set",
+        y = "Standard deviations derived from the summary statistics",
+        color = "Removed?")
+  print(plot_obj)
+dev.off()
+
+log_add(log_file = log_file, message = paste0('Sumstats contains ', nrow(info_snp[!is_bad, ]),' after additional genotype SD check.'))
+
+sumstats<-info_snp[!is_bad, ]
+
+# If more than half the variants have the wrong SD then the N is probably inaccurate
+# Recompute N based on BETA and SE
+if(sum(is_bad) > (length(is_bad)*0.5)){
+  log_add(log_file = log_file, message = paste0('>50% of variants had a discordant SD.'))
+  stop('>50% of variants had a discordant SD. Check the sample size in the sumstats.')
+}
+
+#####
+# Prepare LD reference data
+#####
+
+log_add(log_file = log_file, message = 'Creating genome-wide sparse matrix.')
+
+# Create genome-wide sparse LD matrix
+for (chr in CHROMS) {
+  ## indices in 'sumstats'
+  ind.chr <- which(sumstats$chr == chr)
+  ## indices in 'map'
+  ind.chr2 <- sumstats$`_NUM_ID_`[ind.chr]
+  ## indices in 'corr_chr'
+  ind.chr3 <- match(ind.chr2, which(map$chr == chr))
+  
+  corr0 <- readRDS(paste0(opt$ldpred2_ref_dir, '/LD_with_blocks_chr', chr, '.rds'))[ind.chr3, ind.chr3]
+  
+  if (chr == CHROMS[1]) {
+    corr <- as_SFBM(corr0, paste0(tmp_dir, '/LD_GW_sparse'), compact = TRUE)
+  } else {
+    corr$add_columns(corr0, nrow(corr))
+  }
+}
+
+#####
+# Run lassosum2
+#####
+
+log_add(log_file = log_file, message = 'Running lassosum2.')
+
+# Set seed to ensure reproducibility
+set.seed(opt$seed)
+
+# lassosum2
+beta_df <- snp_lassosum2(
+  corr,
+  sumstats,
+  ncores = opt$n_cores
+)
+
+####
+# Create score file
+####
+
+# Convert matrix to data.table with column names
+beta_dt <- as.data.table(beta_df)
+grid <- attr(beta_df, "grid_param")
+new_names <- paste0("s", grid$delta, "_lambda", grid$lambda)
+setnames(beta_dt, new_names)
+
+betas <- data.table(SNP=sumstats$rsid, A1=sumstats$a1, A2=sumstats$a0, beta_dt)
+
+rem<-NULL
+for(i in 4:length(names(betas))){
+  if(is.infinite(sum(betas[[names(betas)[i]]])) | is.na(sum(betas[[names(betas)[i]]]))){
+    log_add(log_file = log_file, message = paste0('Skipping ',names(betas)[i],' due to presence of non-finite values.'))
+    rem<-c(rem,i)
+  }
+}
+
+if(is.null(rem) == F){
+  betas<-betas[, -rem, with=F]
+}
+
+names(betas)[-1:-3] <- paste0('SCORE_', names(betas)[-1:-3])
+
+# Flip effects to match reference alleles
+ref <- read_pvar(opt$ref_plink_chr, chr = CHROMS)[, c('SNP','A1','A2'), with=F]
+score_new <- map_score(ref = ref, score = betas)
+
+# Reduce number of significant figures to save space
+score_new[, (4:ncol(score_new)) := lapply(.SD, signif, digits = 7), .SDcols = 4:ncol(score_new)]
+
+fwrite(score_new, paste0(opt$output,'.score'), col.names=T, sep=' ', quote=F)
+
+if(file.exists(paste0(opt$output,'.score.gz'))){
+  system(paste0('rm ',opt$output,'.score.gz'))
+}
+
+system(paste0('gzip ',opt$output,'.score'))
+
+# Record end time of test
+if(!is.na(opt$test)){
+  test_finish(log_file = log_file, test_start.time = test_start.time)
+}
+
+end.time <- Sys.time()
+time.taken <- end.time - start.time
+sink(file = log_file, append = T)
+cat('Analysis finished at', as.character(end.time),'\n')
+cat('Analysis duration was', as.character(round(time.taken,2)), attr(time.taken, 'units'), '\n')
+sink()
diff --git a/Scripts/pgs_methods/ldpred2.R b/Scripts/pgs_methods/ldpred2.R
index e57ef353..e168078d 100644
--- a/Scripts/pgs_methods/ldpred2.R
+++ b/Scripts/pgs_methods/ldpred2.R
@@ -105,6 +105,7 @@ log_add(log_file = log_file, message = 'Reading in GWAS.')
 
 # Read in, check and format GWAS summary statistics
 sumstats <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','BP','A1','A2','BETA','SE','N','P'))
+GWAS_CHROMS<-unique(sumstats$CHR)
 
 # Update header for bigsnpr
 names(sumstats)<-c('chr','rsid','pos','a1','a0','beta','beta_se','n_eff','p')
@@ -183,7 +184,7 @@ if(ldsc[["h2"]] < 0.05){
 log_add(log_file = log_file, message = 'Creating genome-wide sparse matrix.')
 
 # Create genome-wide sparse LD matrix
-for (chr in CHROMS) {
+for (chr in GWAS_CHROMS) {
   ## indices in 'sumstats'
   ind.chr <- which(sumstats$chr == chr)
   ## indices in 'map'
diff --git a/Scripts/pgs_methods/megaprs.R b/Scripts/pgs_methods/megaprs.R
index 2cb84590..7539e6a6 100644
--- a/Scripts/pgs_methods/megaprs.R
+++ b/Scripts/pgs_methods/megaprs.R
@@ -30,6 +30,8 @@ option_list = list(
               help="Path to ldak tagging data [required]"),
   make_option("--ldak_highld", action="store", default=NULL, type='character',
               help="Path to ldak highld data [required]"),
+  make_option("--pseudo_only", action="store", default=F, type='logical',
+              help="Logical indicating whether only pseudovalidated model should be output [optional]"),
   make_option("--n_cores", action="store", default=1, type='numeric',
               help="Number of cores for parallel computing [optional]"),
   make_option("--prs_model", action="store", default='mega', type='character',
@@ -99,9 +101,10 @@ log_add(log_file = log_file, message = 'Reading in GWAS.')
 
 # Read in, check and format GWAS summary statistics
 gwas <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('CHR','BP','SNP','A1','A2','BETA','SE','N','FREQ','REF.FREQ'))
+GWAS_CHROMS<-unique(gwas$CHR)
 
 # Check allele frequency difference
-ref_psam<-fread(paste0(opt$ref_plink_chr, CHROMS[1],'.psam'))
+ref_psam<-fread(paste0(opt$ref_plink_chr, GWAS_CHROMS[1],'.psam'))
 names(ref_psam)<-gsub('\\#', '', names(ref_psam))
 
 if(!is.null(opt$ref_keep)){
@@ -131,7 +134,7 @@ fwrite(gwas, paste0(tmp_dir,'/GWAS_sumstats_temp.txt'), sep=' ')
 log_add(log_file = log_file, message = 'Merging per chromosome reference data.')
 
 # Save in plink1 format for MegaPRS
-plink_merge(pfile = opt$ref_plink_chr, chr = CHROMS, plink2 = opt$plink2, keep = opt$ref_keep, extract = snplist, make_bed =T, out = paste0(tmp_dir, '/ref_merge'))
+plink_merge(pfile = opt$ref_plink_chr, chr = GWAS_CHROMS, plink2 = opt$plink2, keep = opt$ref_keep, extract = snplist, make_bed =T, out = paste0(tmp_dir, '/ref_merge'))
 
 # Record start time for test
 if(!is.na(opt$test)){
@@ -168,10 +171,10 @@ system(paste0('cp ', opt$ldak_tag, '/* ', tmp_dir, '/bld/'))
 system(paste0('mv ', tmp_dir, '/sections/weights.short ', tmp_dir,'/bld/bld65'))
 
 # Calculate taggings
-if(length(CHROMS) != 1){
+if(length(GWAS_CHROMS) != 1){
   system(paste0(opt$ldak, ' --calc-tagging ', tmp_dir, '/bld.ldak --bfile ', tmp_dir, '/ref_merge --ignore-weights YES --power -.25 --annotation-number 65 --annotation-prefix ', tmp_dir, '/bld/bld --window-cm 1 --save-matrix YES --max-threads ', opt$n_cores))
 } else {
-  system(paste0(opt$ldak, ' --calc-tagging ', tmp_dir, '/bld.ldak --bfile ', tmp_dir, '/ref_merge --ignore-weights YES --power -.25 --annotation-number 65 --annotation-prefix ', tmp_dir, '/bld/bld --window-cm 1 --chr ', CHROMS, ' --save-matrix YES --max-threads ', opt$n_cores))
+  system(paste0(opt$ldak, ' --calc-tagging ', tmp_dir, '/bld.ldak --bfile ', tmp_dir, '/ref_merge --ignore-weights YES --power -.25 --annotation-number 65 --annotation-prefix ', tmp_dir, '/bld/bld --window-cm 1 --chr ', GWAS_CHROMS, ' --save-matrix YES --max-threads ', opt$n_cores))
 }
 
 # Calculate Per-Predictor Heritabilities.
@@ -192,7 +195,7 @@ log_add(log_file = log_file, message = 'Running using full reference.')
 
 # Calculate predictor-predictor correlations
 log_add(log_file = log_file, message = 'Calculating predictor-predictor correlations.')
-full_cors <- ldak_pred_cor(bfile = paste0(tmp_dir, '/ref_merge'), ldak = opt$ldak, n_cores = opt$n_cores, chr = CHROMS)
+full_cors <- ldak_pred_cor(bfile = paste0(tmp_dir, '/ref_merge'), ldak = opt$ldak, n_cores = opt$n_cores, chr = GWAS_CHROMS)
 
 # Run MegaPRS
 log_add(log_file = log_file, message = paste0('Running MegaPRS: ',opt$prs_model,' model.'))
@@ -217,7 +220,7 @@ system(paste0(opt$ldak, ' --pseudo-summaries ', tmp_dir, '/GWAS_sumstats_temp.ps
 
 # Calculate predictor-predictor correlations
 log_add(log_file = log_file, message = 'Calculating predictor-predictor correlations.')
-subset_cors <- ldak_pred_cor(bfile = paste0(tmp_dir, '/ref_merge'), keep = paste0(tmp_dir, '/keepb'), ldak = opt$ldak, n_cores = opt$n_cores, chr = CHROMS)
+subset_cors <- ldak_pred_cor(bfile = paste0(tmp_dir, '/ref_merge'), keep = paste0(tmp_dir, '/keepb'), ldak = opt$ldak, n_cores = opt$n_cores, chr = GWAS_CHROMS)
 
 # Run megaPRS
 log_add(log_file = log_file, message = paste0('Running MegaPRS: ',opt$prs_model,' model.'))
@@ -255,6 +258,13 @@ ref_pvar <- read_pvar(dat = opt$ref_plink_chr, chr = CHROMS)
 ref_pvar$Predictor<-paste0(ref_pvar$CHR,':',ref_pvar$BP)
 score<-merge(score, ref_pvar[,c('Predictor','SNP'), with=F], by='Predictor')
 score<-score[, c('SNP', 'A1', 'A2', names(score)[grepl('Model', names(score))]), with=F]
+
+print(head(score))
+
+if(opt$pseudo_only){
+  score <- score[,c('SNP','A1','A2', paste0('Model', gsub('Score_','',best_score$V1[1]))), with = F]
+}
+
 names(score)[grepl('Model', names(score))]<-paste0('SCORE_ldak_',names(score)[grepl('Model', names(score))])
 
 # Flip effects to match reference alleles
diff --git a/Scripts/pgs_methods/prscs.R b/Scripts/pgs_methods/prscs.R
index c77bccdf..62c3f44b 100644
--- a/Scripts/pgs_methods/prscs.R
+++ b/Scripts/pgs_methods/prscs.R
@@ -97,14 +97,27 @@ if(!is.na(opt$test)){
 log_add(log_file = log_file, message = 'Reading in GWAS.')
 
 # Read in, check and format GWAS summary statistics
-gwas <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('SNP','A1','A2','BETA','SE','N'))
+gwas <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','A1','A2','BETA','SE','N'))
+
+# Subset CHROMS object to chr present
+gwas_CHROMS <- unique(gwas$CHR)
+gwas$CHR <- NULL
 
 # Store average sample size
 gwas_N <- round(mean(gwas$N), 0)
 
 fwrite(gwas, paste0(tmp_dir, '/GWAS_sumstats_temp.txt'), sep=' ')
 
+# Create a temporary reference bim files for PRS-CS to match to
+pvar <- read_pvar(opt$ref_plink_chr, chr = CHROMS)
+pvar <- pvar[pvar$SNP %in% gwas$SNP,]
+pvar$POS<-0
+for(i in gwas_CHROMS){
+  write.table(pvar[pvar$CHR == i, c('CHR','SNP','POS','BP','A1','A2'), with=F], paste0(tmp_dir,'/ref.chr',i,'.bim'), col.names=F, row.names=F, quote=F)
+}
+
 rm(gwas)
+rm(pvar)
 gc()
 
 # Record start time for test
@@ -116,19 +129,9 @@ if(!is.na(opt$test)){
 # Process sumstats using PRSsc
 #####
 
-# Create a temporary reference bim files for PRS-CS to match to
-pvar <- read_pvar(opt$ref_plink_chr, chr = CHROMS)
-pvar$POS<-0
-for(i in CHROMS){
-  write.table(pvar[pvar$CHR == i, c('CHR','SNP','POS','BP','A1','A2'), with=F], paste0(tmp_dir,'/ref.chr',i,'.bim'), col.names=F, row.names=F, quote=F)
-}
-
-rm(pvar)
-gc()
-
 # Make a data.frame listing chromosome and phi combinations
 jobs<-NULL
-for(i in CHROMS){
+for(i in gwas_CHROMS){
   jobs<-rbind(jobs, data.frame(CHR=i, phi=phi_param))
 }
 
@@ -165,7 +168,7 @@ log <- foreach(i = 1:nrow(jobs), .combine = c, .options.multicore = list(presche
 score_all<-NULL
 for(phi_i in phi_param){
   score_phi<-NULL
-  for(i in CHROMS){
+  for(i in gwas_CHROMS){
     score_phi_i<-fread(paste0(tmp_dir,'/_pst_eff_a1_b0.5_phi',phi_i,'_chr',i,'.txt'))
     score_phi<-rbind(score_phi, score_phi_i)
   }
diff --git a/Scripts/pgs_methods/sbayesr.R b/Scripts/pgs_methods/sbayesr.R
index a360775e..89432e2a 100644
--- a/Scripts/pgs_methods/sbayesr.R
+++ b/Scripts/pgs_methods/sbayesr.R
@@ -91,8 +91,9 @@ if(!is.na(opt$test)){
 log_add(log_file = log_file, message = 'Reading in GWAS.')
 
 # Read in, check and format GWAS summary statistics
-gwas <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('SNP','A1','A2','FREQ','BETA','SE','P','N'))
-
+gwas <- read_sumstats(sumstats = opt$sumstats, chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','A1','A2','FREQ','BETA','SE','P','N'))
+GWAS_CHROMS<-unique(gwas$CHR)
+gwas$CHR<-NULL
 ###
 # Change to COJO format
 ###
@@ -144,7 +145,7 @@ if(per_var_N == F & opt$impute_N == T){
   sbayesr_opt <- paste0(sbayesr_opt, '--impute-n ')
 }
 
-error<-foreach(i = CHROMS, .combine = rbind, .options.multicore = list(preschedule = FALSE)) %dopar% {
+error<-foreach(i = GWAS_CHROMS, .combine = rbind, .options.multicore = list(preschedule = FALSE)) %dopar% {
   log <- system(paste0(opt$gctb, ' --sbayes R --ldm ', opt$ld_matrix_chr, i, '.ldm.sparse --pi 0.95,0.02,0.02,0.01 --gamma 0.0,0.01,0.1,1 --gwas-summary ', tmp_dir, '/GWAS_sumstats_COJO.txt --chain-length 10000 ', sbayesr_opt, '--exclude-mhc --burn-in 2000 --out-freq 1000 --out ', tmp_dir, '/GWAS_sumstats_SBayesR.chr', i),  intern = T)
 
   # Check whether the analysis converged
@@ -173,7 +174,7 @@ if(sum(grepl('Error', error$Log) == T) > 1){
 
 # Combine per chromosome snpRes files
 snpRes<-NULL
-for(i in CHROMS){
+for(i in GWAS_CHROMS){
   snpRes <- rbind(snpRes, fread(paste0(tmp_dir, '/GWAS_sumstats_SBayesR.chr', i, '.snpRes')))
 }
 
@@ -199,14 +200,14 @@ if(!is.na(opt$test)){
 
 # Combine per chromosome parRes files
 parRes_mcmc <- list()
-for(i in CHROMS){
+for(i in GWAS_CHROMS){
 	parRes_mcmc[[i]] <- fread(paste0(tmp_dir, '/GWAS_sumstats_SBayesR.chr', i, '.mcmcsamples.Par'))
 }
 
 parRes <- NULL
 for(par in names(parRes_mcmc[[i]])){
 	parRes_mcmc_par <- NULL
-	for(i in CHROMS){
+	for(i in GWAS_CHROMS){
 		parRes_mcmc_par <- cbind(parRes_mcmc_par, parRes_mcmc[[i]][[par]])
 	}
 
diff --git a/Scripts/pipeline_reports/indiv_report_creator.Rmd b/Scripts/pipeline_reports/indiv_report_creator.Rmd
index 921e640a..3ca744d2 100644
--- a/Scripts/pipeline_reports/indiv_report_creator.Rmd
+++ b/Scripts/pipeline_reports/indiv_report_creator.Rmd
@@ -1,10 +1,10 @@
 ---
 title: "GenoPred Report"
 params:
-  name: ""
-  id: ""
-  config: ""
-  cwd: ""
+  name:
+  id:
+  config:
+  cwd:
 output:
   html_document:
     toc: true
@@ -63,7 +63,11 @@ gwas_groups <- read_param(config = params$config, param = 'gwas_groups')
 score_list <- read_param(config = params$config, param = 'score_list')
 
 # Identify PGS methods to be included
-pgs_methods_list <- read_param(config = params$config, param = 'pgs_methods', return_obj = F)
+if(!is.null(gwas_list)){
+  pgs_methods_list <- read_param(config = params$config, param = 'pgs_methods', return_obj = F)
+} else {
+  pgs_methods_list <- NULL
+}
 
 # If testing, change CHROMS to chr value
 testing <- read_param(config = params$config, param = 'testing', return_obj = F)
@@ -265,10 +269,6 @@ cat0("- ", ifelse(is.null(gwas_list), 0, nrow(gwas_list)), " GWAS summary statis
 cat0("- ", ifelse(is.null(gwas_groups), 0, nrow(gwas_groups)), " GWAS groups were specified.\n")
 cat0("- ", length(pgs_methods_list), " PGS methods were applied, including ", paste0(pgs_method_labels$label[pgs_method_labels$method %in% pgs_methods_list], collapse = ', '), ".\n")
 
-if(any(gwas_list$population != 'EUR') & any(c('ldpred2','sbayesr') %in% pgs_methods_list)){
-  cat0("    - **Note.** `ldpred2` and `sbayesr` are currently only implemented for GWAS of EUR populations.\n\n")
-}
-
 if(is.null(score_list)){
   cat0("- No external score files were provided in score_list.\n\n")
 } else {
@@ -489,7 +489,7 @@ cat0("## Target Polygenic Profile {.tabset .tabset-fade} \n\n")
 # Read in PGS
 # Exclude PGS from multi-source methods as no estimate of R is available 
 single_source_methods <- pgs_methods_list[!(pgs_methods_list %in% pgs_group_methods) & !(grepl('_multi|tlprs_', pgs_methods_list))]
-pgs <- read_pgs(config = params$config, name = params$name, pop = 'TRANS', pseudo_only=T, pgs_method = single_source_methods)[[1]]
+pgs <- read_pgs(config = params$config, name = params$name, pop = 'TRANS', pseudo_only=T, pgs_methods = single_source_methods)[[1]]
 
 # Structure PGS for target individual
 pgs_dat <- NULL
diff --git a/Scripts/pipeline_reports/samp_report_creator.Rmd b/Scripts/pipeline_reports/samp_report_creator.Rmd
index 62450640..a9be7111 100644
--- a/Scripts/pipeline_reports/samp_report_creator.Rmd
+++ b/Scripts/pipeline_reports/samp_report_creator.Rmd
@@ -181,10 +181,6 @@ cat0("- ", ifelse(is.null(gwas_list), 0, nrow(gwas_list)), " GWAS summary statis
 cat0("- ", ifelse(is.null(gwas_groups), 0, nrow(gwas_groups)), " GWAS groups were specified.\n")
 cat0("- ", length(pgs_methods_list), " PGS methods were applied, including ", paste0(pgs_methods_list, collapse = ', '), ".\n")
 
-if(any(gwas_list$population != 'EUR') & any(c('ldpred2','sbayesr') %in% pgs_methods_list)){
-  cat0("    - **Note.** `ldpred2` and `sbayesr` are currently only implemented for GWAS of EUR populations.\n\n")
-}
-
 if(is.null(score_list)){
   cat0("- No external score files were provided in score_list.\n\n")
 } else {
diff --git a/Scripts/target_scoring/target_scoring_pipeline.R b/Scripts/target_scoring/target_scoring_pipeline.R
index fd22343d..63876005 100644
--- a/Scripts/target_scoring/target_scoring_pipeline.R
+++ b/Scripts/target_scoring/target_scoring_pipeline.R
@@ -27,6 +27,7 @@ opt = parse_args(OptionParser(option_list=option_list))
 # Load dependencies
 library(GenoUtils)
 library(data.table)
+library(bigstatsr)
 source('../functions/misc.R')
 source_all('../functions')
 library(foreach)
@@ -180,30 +181,40 @@ for(chr_i in CHROMS){
       score = paste0(tmp_dir,'/all_score.txt'),
       keep = opt$target_keep,
       frq = opt$ref_freq_chr,
-      threads = opt$n_cores
+      threads = opt$n_cores,
+      fbm = T
     )
 
   # Sum scores across chromosomes
   if(chr_i == CHROMS[1]){
-    scores_ids <- scores_i[, 1:2, with = F]
-    current_scores <- as.matrix(scores_i[, -1:-2, with = FALSE])
-    scores <- current_scores
-  } else {
-    current_scores <- as.matrix(scores_i[, -1:-2, with = FALSE])
-    scores <- scores + current_scores
+    scores_ids <- scores_i$ids
+    cols <- scores_i$cols
+    
+    # Initialize a FBM (backed on disk) for running PGS sum
+    file.remove(paste0(tmp_dir, '/PGS_fbm.bk'))
+    scores <- FBM(
+      nrow = nrow(scores_ids),
+      ncol = length(cols),
+      backingfile = paste0(tmp_dir, '/PGS_fbm'),
+      init = 0
+    )
   }
-
-  system(paste0('rm ', tmp_dir, '/all_score.txt'))
-  system(paste0('rm ', tmp_dir, '/row_index.txt'))
-  system(paste0('rm ', tmp_dir, '/map.txt'))
+  
+  # In-place addition: for each score column
+  for (j in cols) {
+    scores[, which(cols == j)] <- scores[, which(cols == j)] + scores_i$scores[,which(scores_i$cols == j)]
+  }
+  
+  file.remove(scores_i$scores$backingfile,
+              scores_i$scores$rds)
   rm(scores_i)
-  rm(current_scores)
   gc()
 }
 
 # Combine score with IDs
-scores<-data.table(scores_ids,
-                   scores)
+scores <- as.data.table(matrix(scores[,], ncol = length(cols)))
+setnames(scores, cols)
+scores <- cbind(scores_ids, scores)
 
 ###
 # Scale the polygenic scores based on the reference
diff --git a/docs/CrossPop.Rmd b/docs/CrossPop.Rmd
index 6d4195ef..1e1a1d52 100644
--- a/docs/CrossPop.Rmd
+++ b/docs/CrossPop.Rmd
@@ -7065,6 +7065,13 @@ cp ~/oliverpainfel/Analyses/crosspop/plots_three_pop/average_r.png /scratch/prj/
 
 Here we will use GWAS sumtats that were used in the original GenoPred paper. These GWAS are from a range of sources, often large meta-analyses, which can lead to greater mispecification in the sumstats, which can impact the performance of some PGS methods. This is to provide more confidence in the performance of SBayesRC and QuickPRS relative to other methods.
 
+<div class="note-box">
+
+**Note**: I am using this opportunity to evaluate lassosum2, which is not included in other analyses in this project.
+
+</div>
+
+
 ***
 
 ### PGS calculation
@@ -7104,7 +7111,7 @@ config<-c(
   "config_file: /users/k1806347/oliverpainfel/Data/ukb/GenoPred/configs/crosspop/config_meta.yaml",
   "gwas_list: /users/k1806347/oliverpainfel/Data/ukb/GenoPred/configs/crosspop/gwas_list_meta.txt",
   "target_list: /users/k1806347/oliverpainfel/Data/ukb/GenoPred/configs/basic/target_list.txt",
-  "pgs_methods: ['quickprs','sbayesrc','ldpred2','ptclump','dbslmm']",
+  "pgs_methods: ['quickprs','sbayesrc','ldpred2','ptclump','dbslmm','lassosum2']",
 #  "pgs_methods: ['ptclump','quickprs','dbslmm','lassosum','megaprs','prscs','ldpred2','sbayesrc']",
   "cores_prep_pgs: 10",
   "cores_target_pgs: 50",
@@ -7382,7 +7389,7 @@ for(pheno_i in phenos){
   
   # Remove pseudo model for methods that don't really have one 
   eval_i <- eval_i[
-    !(eval_i$Method %in%  c('ptclump') & 
+    !(eval_i$Method %in%  c('ptclump','lassosum2') & 
       eval_i$Model %in% c('SumStatTune')),]
   
   res_eval[[pheno_i]]<-eval_i
@@ -7390,7 +7397,7 @@ for(pheno_i in phenos){
 }
 
 # Create vector defining or of methods in plots
-model_order <- c("DBSLMM", "lassosum", "LDpred2", "MegaPRS", "PRS-CS", "pT+clump", "QuickPRS", "SBayesRC") 
+model_order <- c("DBSLMM", "lassosum",'lassosum2', "LDpred2", "MegaPRS", "PRS-CS", "pT+clump", "QuickPRS", "SBayesRC") 
 
 res_eval_simp <- NULL
 for(pheno_i in phenos){
@@ -7407,6 +7414,7 @@ for(pheno_i in phenos){
 # Plot results for each phenotype separately
 dir.create('~/oliverpainfel/Analyses/crosspop/plots_meta')
 
+png(paste0('~/oliverpainfel/Analyses/crosspop/plots_meta/trait_r.png'), res=100, width = 1200, height = 900, units = 'px')
 ggplot(res_eval_simp, aes(x=label, y=R , fill = Model)) +
     geom_errorbar(aes(ymin = R - SE, ymax = R + SE),
                   width = 0,
@@ -7422,6 +7430,7 @@ ggplot(res_eval_simp, aes(x=label, y=R , fill = Model)) +
           legend.position = "top",
           legend.key.spacing.x = unit(1, "cm"),
           legend.justification = "center")
+dev.off()
 
 ####
 # Average results across phenotypes
@@ -7508,9 +7517,11 @@ dev.off()
 
 cp ~/oliverpainfel/Analyses/crosspop/plots_meta/average_r.png /scratch/prj/oliverpainfel/Software/MyGit/GenoPred/docs/Images/CrossPop_2025/average_r_meta.png
 
+cp ~/oliverpainfel/Analyses/crosspop/plots_meta/trait_r.png /scratch/prj/oliverpainfel/Software/MyGit/GenoPred/docs/Images/CrossPop_2025/trait_r_meta.png
+
 ```
 
-<details><summary>Show results</summary>
+<details><summary>Show average results</summary>
 
 <div class="centered-container">
 <div class="rounded-image-container" style="width: 50%;">
@@ -7520,6 +7531,16 @@ cp ~/oliverpainfel/Analyses/crosspop/plots_meta/average_r.png /scratch/prj/olive
 
 </details>
 
+<details><summary>Show trait-specific results</summary>
+
+<div class="centered-container">
+<div class="rounded-image-container" style="width: 100%;">
+<img src="Images/CrossPop_2025/trait_r_meta.png">
+</div>
+</div>
+
+</details>
+
 ***
 
 ## Using downsampled GWAS
diff --git a/docs/CrossPop.html b/docs/CrossPop.html
index 12989093..f433f96b 100644
--- a/docs/CrossPop.html
+++ b/docs/CrossPop.html
@@ -439,6 +439,9 @@
     <li>
       <a href="Absolute_Conversion.html">Translating Polygenic Scores onto the Absolute Scale</a>
     </li>
+    <li>
+      <a href="CrossPop_summary.html">Cross-Ancestry Polygenic Prediction</a>
+    </li>
   </ul>
 </li>
 <li>
@@ -11383,6 +11386,10 @@ <h2>Using external GWAS sumstats</h2>
 sumstats, which can impact the performance of some PGS methods. This is
 to provide more confidence in the performance of SBayesRC and QuickPRS
 relative to other methods.</p>
+<div class="note-box">
+<p><strong>Note</strong>: I am using this opportunity to evaluate
+lassosum2, which is not included in other analyses in this project.</p>
+</div>
 <hr />
 <div id="pgs-calculation-4" class="section level3">
 <h3>PGS calculation</h3>
@@ -11423,7 +11430,7 @@ <h4>
   &quot;config_file: /users/k1806347/oliverpainfel/Data/ukb/GenoPred/configs/crosspop/config_meta.yaml&quot;,
   &quot;gwas_list: /users/k1806347/oliverpainfel/Data/ukb/GenoPred/configs/crosspop/gwas_list_meta.txt&quot;,
   &quot;target_list: /users/k1806347/oliverpainfel/Data/ukb/GenoPred/configs/basic/target_list.txt&quot;,
-  &quot;pgs_methods: [&#39;quickprs&#39;,&#39;sbayesrc&#39;,&#39;ldpred2&#39;,&#39;ptclump&#39;,&#39;dbslmm&#39;]&quot;,
+  &quot;pgs_methods: [&#39;quickprs&#39;,&#39;sbayesrc&#39;,&#39;ldpred2&#39;,&#39;ptclump&#39;,&#39;dbslmm&#39;,&#39;lassosum2&#39;]&quot;,
 #  &quot;pgs_methods: [&#39;ptclump&#39;,&#39;quickprs&#39;,&#39;dbslmm&#39;,&#39;lassosum&#39;,&#39;megaprs&#39;,&#39;prscs&#39;,&#39;ldpred2&#39;,&#39;sbayesrc&#39;]&quot;,
   &quot;cores_prep_pgs: 10&quot;,
   &quot;cores_target_pgs: 50&quot;,
@@ -11689,7 +11696,7 @@ <h4>
   
   # Remove pseudo model for methods that don&#39;t really have one 
   eval_i &lt;- eval_i[
-    !(eval_i$Method %in%  c(&#39;ptclump&#39;) &amp; 
+    !(eval_i$Method %in%  c(&#39;ptclump&#39;,&#39;lassosum2&#39;) &amp; 
       eval_i$Model %in% c(&#39;SumStatTune&#39;)),]
   
   res_eval[[pheno_i]]&lt;-eval_i
@@ -11697,7 +11704,7 @@ <h4>
 }
 
 # Create vector defining or of methods in plots
-model_order &lt;- c(&quot;DBSLMM&quot;, &quot;lassosum&quot;, &quot;LDpred2&quot;, &quot;MegaPRS&quot;, &quot;PRS-CS&quot;, &quot;pT+clump&quot;, &quot;QuickPRS&quot;, &quot;SBayesRC&quot;) 
+model_order &lt;- c(&quot;DBSLMM&quot;, &quot;lassosum&quot;,&#39;lassosum2&#39;, &quot;LDpred2&quot;, &quot;MegaPRS&quot;, &quot;PRS-CS&quot;, &quot;pT+clump&quot;, &quot;QuickPRS&quot;, &quot;SBayesRC&quot;) 
 
 res_eval_simp &lt;- NULL
 for(pheno_i in phenos){
@@ -11714,6 +11721,7 @@ <h4>
 # Plot results for each phenotype separately
 dir.create(&#39;~/oliverpainfel/Analyses/crosspop/plots_meta&#39;)
 
+png(paste0(&#39;~/oliverpainfel/Analyses/crosspop/plots_meta/trait_r.png&#39;), res=100, width = 1200, height = 900, units = &#39;px&#39;)
 ggplot(res_eval_simp, aes(x=label, y=R , fill = Model)) +
     geom_errorbar(aes(ymin = R - SE, ymax = R + SE),
                   width = 0,
@@ -11729,6 +11737,7 @@ <h4>
           legend.position = &quot;top&quot;,
           legend.key.spacing.x = unit(1, &quot;cm&quot;),
           legend.justification = &quot;center&quot;)
+dev.off()
 
 ####
 # Average results across phenotypes
@@ -11810,7 +11819,7 @@ <h4>
 </details>
 <details>
 <summary>
-Show results
+Show average results
 </summary>
 <div class="centered-container">
 <div class="rounded-image-container" style="width: 50%;">
@@ -11818,6 +11827,16 @@ <h4>
 </div>
 </div>
 </details>
+<details>
+<summary>
+Show trait-specific results
+</summary>
+<div class="centered-container">
+<div class="rounded-image-container" style="width: 100%;">
+<p><img src="Images/CrossPop_2025/trait_r_meta.png"></p>
+</div>
+</div>
+</details>
 <hr />
 </div>
 </div>
diff --git a/docs/Images/CrossPop_2025/average_r_meta.png b/docs/Images/CrossPop_2025/average_r_meta.png
index 965ca324..9e8dbc0f 100644
Binary files a/docs/Images/CrossPop_2025/average_r_meta.png and b/docs/Images/CrossPop_2025/average_r_meta.png differ
diff --git a/docs/Images/CrossPop_2025/trait_r_meta.png b/docs/Images/CrossPop_2025/trait_r_meta.png
new file mode 100644
index 00000000..00bc9423
Binary files /dev/null and b/docs/Images/CrossPop_2025/trait_r_meta.png differ
diff --git a/docs/Images/pipeline_readme/pipeline_schematic_groups_lowdef.png b/docs/Images/pipeline_readme/pipeline_schematic_groups_lowdef.png
index 5d56ae02..4b08e619 100644
Binary files a/docs/Images/pipeline_readme/pipeline_schematic_groups_lowdef.png and b/docs/Images/pipeline_readme/pipeline_schematic_groups_lowdef.png differ
diff --git a/docs/more_index.Rmd b/docs/more_index.Rmd
index c47a1fe9..4bfdbc37 100644
--- a/docs/more_index.Rmd
+++ b/docs/more_index.Rmd
@@ -23,6 +23,7 @@ output:
 - Computational time/memory benchmark - <a href="pipeline_benchmark.html" class="inline_button">Link</a>
 - Benchmark in OpenSNP dataset - <a href="opensnp_benchmark.html" class="inline_button">Link</a>
 - Benchmark in UK Biobank dataset - <a href="ukb_benchmark.html" class="inline_button">Link</a>
+- Demonstration of multi-source methods to height GWAS and tested in OpenSNP target sample - <a href="opensnp_benchmark_crosspop.html" class="inline_button">Link</a>
 
 ***
 
@@ -42,5 +43,4 @@ output:
 - Cross-Ancestry Polygenic Prediction
   - Summary - <a href="CrossPop_summary.html" class="inline_button">Link</a>
   - Code - <a href="CrossPop.html" class="inline_button">Link</a>
-  - Application of multi-source methods to height GWAS and tested in OpenSNP target sample - <a href="opensnp_benchmark_crosspop.html" class="inline_button">Link</a>
 
diff --git a/docs/more_index.html b/docs/more_index.html
index 0be2de73..9a5d162a 100644
--- a/docs/more_index.html
+++ b/docs/more_index.html
@@ -411,6 +411,9 @@ <h1>Pipeline</h1>
 <a href="opensnp_benchmark.html" class="inline_button">Link</a></li>
 <li>Benchmark in UK Biobank dataset -
 <a href="ukb_benchmark.html" class="inline_button">Link</a></li>
+<li>Demonstration of multi-source methods to height GWAS and tested in
+OpenSNP target sample -
+<a href="opensnp_benchmark_crosspop.html" class="inline_button">Link</a></li>
 </ul>
 <hr />
 </div>
@@ -445,9 +448,6 @@ <h1>Research</h1>
 <li>Summary -
 <a href="CrossPop_summary.html" class="inline_button">Link</a></li>
 <li>Code - <a href="CrossPop.html" class="inline_button">Link</a></li>
-<li>Application of multi-source methods to height GWAS and tested in
-OpenSNP target sample -
-<a href="opensnp_benchmark_crosspop.html" class="inline_button">Link</a></li>
 </ul></li>
 </ul>
 </div>
diff --git a/docs/opensnp_benchmark_crosspop.Rmd b/docs/opensnp_benchmark_crosspop.Rmd
index b9b1810a..7273027f 100644
--- a/docs/opensnp_benchmark_crosspop.Rmd
+++ b/docs/opensnp_benchmark_crosspop.Rmd
@@ -125,7 +125,7 @@ config <- readLines('misc/opensnp/config.yaml')
 config[grepl('^config_file:', config)]<- 'config_file: misc/opensnp/config_cross_pop.yaml'
 config <- config[!grepl('^score_list:', config)]
 config[grepl('^outdir:', config)]<- 'outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test_cross_pop_2'
-config[grepl('^pgs_methods:', config)]<- "pgs_methods: ['quickprs']"
+config[grepl('^pgs_methods:', config)]<- "pgs_methods: ['quickprs','sbayesrc']"
 config[grepl('^gwas_list:', config)]<- "gwas_list: misc/opensnp/gwas_list_cross_pop.txt"
 config<-c(config, 'gwas_groups: misc/opensnp/gwas_groups.txt')
 config<-c(config, "leopard_methods: ['quickprs']")
diff --git a/docs/pipeline_readme.Rmd b/docs/pipeline_readme.Rmd
index 45885cf5..a46ad6e5 100644
--- a/docs/pipeline_readme.Rmd
+++ b/docs/pipeline_readme.Rmd
@@ -271,7 +271,7 @@ config <- list(
     pgs_methods = list(
       description = 'List of polygenic scoring methods to run',
       example = "`['ptclump','dbslmm']`",
-      note = "Options are: `ptclump`, `dbslmm`, `prscs`, `sbayesr`, `lassosum`, `ldpred2`, `megaprs`. **Note.** `sbayesr` and `ldpred2` are only implemented for GWAS of EUR ancestry."
+      note = "Options are: `ptclump`, `dbslmm`, `prscs`, `sbayesr`, `lassosum`, `lassosum2`, `ldpred2`, `megaprs`. **Note.** By default, `sbayesr`, `lassosum2`, and `ldpred2` are only implemented for GWAS of EUR ancestry."
     ),
     testing = list(
       description = 'Controls testing mode',
@@ -555,7 +555,7 @@ target_list: example_input/target_list.txt
 # Specify location of score_list file
 score_list: example_input/score_list.txt
 
-# Specify pgs_methods ('ptclump','dbslmm','prscs','sbayesr','lassosum','ldpred2','megaprs')
+# Specify pgs_methods
 pgs_methods: ['ptclump','dbslmm']
 
 # Specify if you want test mode. Set to NA if you don't want test mode
@@ -818,7 +818,7 @@ The GenoPred pipeline has many potential outputs. Here is a detailed schematic d
 
 <div class="centered-container">
 <div class="rounded-image-container">
-<img src="Images/pipeline_readme/pipeline_schematic_lowdef.png">
+<img src="Images/pipeline_readme/pipeline_schematic_groups_lowdef.png">
 </div>
 </div>
 
@@ -1334,7 +1334,7 @@ By default, the pipeline allocates 5 cores when running the outlier_detection ru
 
 ## Multi-source PGS methods
 
-The GenoPred pipeline also implements a range of multi-source polygenic scoring methods that can combine GWAS summary statistics from multiple populations. To use these methods, an additional `gwas_groups` file must be provided, indicating which GWAS are to be jointly analyzed. Specify the location of the `gwas_groups` file in the `configfile` using the `gwas_groups` parameter. An example of a gwas_groups file can be found [here](../pipeline/example_input/gwas_groups.txt).
+The GenoPred pipeline also implements a range of multi-source polygenic scoring methods that can combine GWAS summary statistics from multiple populations. To use these methods, an additional `gwas_groups` file must be provided, indicating which GWAS are to be jointly analysed. Specify the location of the `gwas_groups` file in the `configfile` using the `gwas_groups` parameter. An example of a gwas_groups file can be found [here](../pipeline/example_input/gwas_groups.multisource.txt).
 
 <details>
 <summary>View gwas_groups format</summary>
@@ -1376,52 +1376,6 @@ for (column in names(gwas_groups$Column)) {
 kable(gwas_groups_df, 'markdown')
 ```
 
-The pipeline also implements a range of multi-source polygenic scoring methods, that can combine GWAS summary statistics from multiple populations. To use these methods an additional `gwas_groups` file must be provided, indicating which GWAS are to be jointly analysed. The `gwas_groups` file must be specified in the `configfile` using the `gwas_groups` parameter. An example of a gwas_groups file can be found [here](../pipeline/example_input/gwas_groups.txt). 
-
-<details>
-
-<summary>View gwas_groups format</summary>
-
-```{r, eval = T, echo = F, results = 'asis'}
-
-gwas_groups <- list(
-  Column = list(
-    name = list(
-      example = '`height`',
-      description = "ID for the group of GWAS. Cannot contain spaces (' ') or hyphens ('-')"
-    ),
-    gwas = list(
-      example = '`yengo_eur,yengo_eas`',
-      description = "Comma-seperated list of GWAS names, corresponding to the `gwas_list`."
-    ),
-    label = list(
-      example = '`\"Height (EUR+EAS)\"`',
-      description = "A human readable name for the group of GWAS Wrap in double quotes if multiple words."
-    )
-  )
-)
-
-gwas_groups_df <- NULL
-for (column in names(gwas_groups$Column)) {
-  description <- gwas_groups$Column[[column]]$description
-  example <- gwas_groups$Column[[column]]$example
-
-  # Append each parameter's details to the data frame
-  gwas_groups_df <- rbind(
-    gwas_groups_df,
-    data.frame(
-      Column = column,
-      Example = example,
-      Description = description,
-      stringsAsFactors = FALSE
-    )
-  )
-}
-
-kable(gwas_groups_df, 'markdown')
-
-```
-
 </details>
 
 <div class="shallow-break"></div>
@@ -1460,7 +1414,7 @@ gwas_groups: example_input/gwas_groups.multisource.txt
 # Specify location of target_list file
 target_list: example_input/target_list.txt
 
-# Specify pgs_methods ('ptclump','dbslmm','prscs','sbayesr','lassosum','ldpred2','megaprs')
+# Specify pgs_methods
 pgs_methods: ['lassosum']
 
 # Specify methods for which PGS should be combined using LEOPARD+QuickPRS
@@ -1575,7 +1529,7 @@ The relevant `configfile` parameters are:
 
 - `sbayesr_ldref`: for **SBayesR**
 - `sbayesrc_ldref`: for **SBayesRC**
-- `ldpred2_ldref`: for **LDpred2**
+- `ldpred2_ldref`: for **LDpred2** and **lassosum2**
 - `quickprs_ref`: for **QuickPRS**
 - `quickprs_multi_ldref`: for **LEOPARD+QuickPRS**
 
diff --git a/docs/pipeline_readme.html b/docs/pipeline_readme.html
index 13202138..2eaa9aef 100644
--- a/docs/pipeline_readme.html
+++ b/docs/pipeline_readme.html
@@ -570,44 +570,15 @@ <h2>Step 1: Download GenoPred repository</h2>
 </div>
 <div id="step-2-create-conda-environment-for-pipeline"
 class="section level2">
-<<<<<<< HEAD
-<h2>Step 2: Create software environment for pipeline</h2>
-<p>Conda is a software environment management system that simplifies
-installing and managing dependencies. We recommend using Miniforge — a
-minimal conda installer that comes with Mamba, a fast drop-in
-replacement for conda.</p>
-<p>If you don’t already have Miniforge installed, you can install it
-using the commands below:</p>
-<pre class="bash"><code># Download and install Miniforge (for Linux)
-wget https://github.com/conda-forge/miniforge/releases/download/24.11.3-0/Miniforge3-24.11.3-0-Linux-x86_64.sh
-bash Miniforge3-24.11.3-0-Linux-x86_64.sh</code></pre>
-<p>Accept the default installation options. Once installed, you may need
-to run source ~/.bashrc or restart your terminal. You should see (base)
-appear at the beginning of your terminal prompt.</p>
-<p>Now, create the GenoPred environment using Mamba:</p>
-<pre class="bash"><code>mamba env create -f GenoPred/pipeline/envs/pipeline.yaml</code></pre>
-<p>Activate the new <code>genopred</code> environment:</p>
-<pre class="bash"><code>mamba activate genopred</code></pre>
-<div class="note-box">
-<p><strong>Note:</strong> If you are working on an HPC, check whether
-mamba is an available module. Using the centrally installed version of
-mamba may avoid issues down the road.</p>
-</div>
-=======
 <h2>Step 2: Create conda environment for pipeline</h2>
 <p>Conda is a software environment management system which is great way
 for easily downloading and storing software. We will use conda to create
 an environment that the GenoPred pipeline will run in.</p>
 <p>If you don’t already have conda installed, we will install it using
 miniconda.</p>
-<<<<<<< Updated upstream
-<pre class="bash"><code>wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
-sh Miniconda3-latest-Linux-x86_64.sh</code></pre>
-=======
 <pre class="bash"><code># Download and install Miniforge (for Linux)
 wget https://github.com/conda-forge/miniforge/releases/download/24.11.3-0/Miniforge3-24.11.3-0-Linux-x86_64.sh
 bash Miniforge3-24.11.3-0-Linux-x86_64.sh</code></pre>
->>>>>>> Stashed changes
 <p>I would say <code>yes</code> to the default options. You may then
 need to refresh your workspace to initiate conda, by running
 <code>source ~/.bashrc</code>. You should see <code>(base)</code>
@@ -618,16 +589,9 @@ <h2>Step 2: Create conda environment for pipeline</h2>
 <code>GenoPred/pipeline/envs/pipeline.yaml</code> file. This will create
 an environment called <code>genopred</code> with some essential packages
 installed.</p>
-<<<<<<< Updated upstream
-<pre class="bash"><code>conda env create -f GenoPred/pipeline/envs/pipeline.yaml</code></pre>
-<p>Now activate the new <code>genopred</code> environment.</p>
-<pre class="bash"><code>conda activate genopred</code></pre>
-=======
 <pre class="bash"><code>mamba env create -f GenoPred/pipeline/envs/pipeline.yaml</code></pre>
 <p>Now activate the new <code>genopred</code> environment.</p>
 <pre class="bash"><code>mamba activate genopred</code></pre>
->>>>>>> Stashed changes
->>>>>>> 999a324 (Updated docs)
 <hr />
 </div>
 <div id="step-3-download-dependencies" class="section level2">
@@ -743,9 +707,9 @@ <h2><code>configfile</code></h2>
 <table>
 <colgroup>
 <col width="4%" />
-<col width="19%" />
-<col width="11%" />
-<col width="63%" />
+<col width="18%" />
+<col width="10%" />
+<col width="67%" />
 </colgroup>
 <thead>
 <tr class="header">
@@ -808,9 +772,10 @@ <h2><code>configfile</code></h2>
 <td align="left"><code>['ptclump','dbslmm']</code></td>
 <td align="left">Options are: <code>ptclump</code>, <code>dbslmm</code>,
 <code>prscs</code>, <code>sbayesr</code>, <code>lassosum</code>,
-<code>ldpred2</code>, <code>megaprs</code>. <strong>Note.</strong>
-<code>sbayesr</code> and <code>ldpred2</code> are only implemented for
-GWAS of EUR ancestry.</td>
+<code>lassosum2</code>, <code>ldpred2</code>, <code>megaprs</code>.
+<strong>Note.</strong> By default, <code>sbayesr</code>,
+<code>lassosum2</code>, and <code>ldpred2</code> are only implemented
+for GWAS of EUR ancestry.</td>
 </tr>
 <tr class="even">
 <td align="left"><code>testing</code></td>
@@ -1132,7 +1097,7 @@ <h2>Step 2: Run the pipeline</h2>
 # Specify location of score_list file
 score_list: example_input/score_list.txt
 
-# Specify pgs_methods (&#39;ptclump&#39;,&#39;dbslmm&#39;,&#39;prscs&#39;,&#39;sbayesr&#39;,&#39;lassosum&#39;,&#39;ldpred2&#39;,&#39;megaprs&#39;)
+# Specify pgs_methods
 pgs_methods: [&#39;ptclump&#39;,&#39;dbslmm&#39;]
 
 # Specify if you want test mode. Set to NA if you don&#39;t want test mode
@@ -1576,7 +1541,7 @@ <h1>Requesting outputs</h1>
 GenoPred pipeline.</p>
 <div class="centered-container">
 <div class="rounded-image-container">
-<p><img src="Images/pipeline_readme/pipeline_schematic_lowdef.png"></p>
+<p><img src="Images/pipeline_readme/pipeline_schematic_groups_lowdef.png"></p>
 </div>
 </div>
 <div class="note-box">
@@ -2185,11 +2150,11 @@ <h2>Multi-source PGS methods</h2>
 polygenic scoring methods that can combine GWAS summary statistics from
 multiple populations. To use these methods, an additional
 <code>gwas_groups</code> file must be provided, indicating which GWAS
-are to be jointly analyzed. Specify the location of the
+are to be jointly analysed. Specify the location of the
 <code>gwas_groups</code> file in the <code>configfile</code> using the
 <code>gwas_groups</code> parameter. An example of a gwas_groups file can
 be found <a
-href="../pipeline/example_input/gwas_groups.txt">here</a>.</p>
+href="../pipeline/example_input/gwas_groups.multisource.txt">here</a>.</p>
 <details>
 <summary>
 View gwas_groups format
@@ -2228,52 +2193,6 @@ <h2>Multi-source PGS methods</h2>
 </tr>
 </tbody>
 </table>
-<p>The pipeline also implements a range of multi-source polygenic
-scoring methods, that can combine GWAS summary statistics from multiple
-populations. To use these methods an additional <code>gwas_groups</code>
-file must be provided, indicating which GWAS are to be jointly analysed.
-The <code>gwas_groups</code> file must be specified in the
-<code>configfile</code> using the <code>gwas_groups</code> parameter. An
-example of a gwas_groups file can be found <a
-href="../pipeline/example_input/gwas_groups.txt">here</a>.</p>
-<details>
-<summary>
-View gwas_groups format
-</summary>
-<table>
-<colgroup>
-<col width="6%" />
-<col width="19%" />
-<col width="74%" />
-</colgroup>
-<thead>
-<tr class="header">
-<th align="left">Column</th>
-<th align="left">Example</th>
-<th align="left">Description</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td align="left">name</td>
-<td align="left"><code>height</code></td>
-<td align="left">ID for the group of GWAS. Cannot contain spaces (’ ‘)
-or hyphens (’-’)</td>
-</tr>
-<tr class="even">
-<td align="left">gwas</td>
-<td align="left"><code>yengo_eur,yengo_eas</code></td>
-<td align="left">Comma-seperated list of GWAS names, corresponding to
-the <code>gwas_list</code>.</td>
-</tr>
-<tr class="odd">
-<td align="left">label</td>
-<td align="left"><code>"Height (EUR+EAS)"</code></td>
-<td align="left">A human readable name for the group of GWAS Wrap in
-double quotes if multiple words.</td>
-</tr>
-</tbody>
-</table>
 </details>
 <div class="shallow-break">
 
@@ -2329,7 +2248,7 @@ <h2>Multi-source PGS methods</h2>
 # Specify location of target_list file
 target_list: example_input/target_list.txt
 
-# Specify pgs_methods (&#39;ptclump&#39;,&#39;dbslmm&#39;,&#39;prscs&#39;,&#39;sbayesr&#39;,&#39;lassosum&#39;,&#39;ldpred2&#39;,&#39;megaprs&#39;)
+# Specify pgs_methods
 pgs_methods: [&#39;lassosum&#39;]
 
 # Specify methods for which PGS should be combined using LEOPARD+QuickPRS
@@ -2523,7 +2442,8 @@ <h2>Specifying alternative reference data for PGS methods</h2>
 <ul>
 <li><code>sbayesr_ldref</code>: for <strong>SBayesR</strong></li>
 <li><code>sbayesrc_ldref</code>: for <strong>SBayesRC</strong></li>
-<li><code>ldpred2_ldref</code>: for <strong>LDpred2</strong></li>
+<li><code>ldpred2_ldref</code>: for <strong>LDpred2</strong> and
+<strong>lassosum2</strong></li>
 <li><code>quickprs_ref</code>: for <strong>QuickPRS</strong></li>
 <li><code>quickprs_multi_ldref</code>: for
 <strong>LEOPARD+QuickPRS</strong></li>
diff --git a/docs/pipeline_technical.Rmd b/docs/pipeline_technical.Rmd
index 07cd143e..32336190 100644
--- a/docs/pipeline_technical.Rmd
+++ b/docs/pipeline_technical.Rmd
@@ -24,15 +24,19 @@ library(data.table)
 
 This document provides technical details of the GenoPred pipeline. The GenoPred pipeline automates the process of calculating polygenic scores. The pipeline aims to implement the current practises for polygenic scoring. See [here](pipeline_overview.html) more general information regarding the GenoPred pipeline.
 
-Please cite our preprint when using the pipeline:
+Please cite our publication when using the pipeline:
 
-- "Pain, O. et al. "The GenoPred Pipeline: A Comprehensive and Scalable Pipeline for Polygenic Scoring." MedRxiv 2024. https://doi.org/10.1101/2024.06.12.24308843
+- "Pain, O. et al. "The GenoPred Pipeline: A Comprehensive and Scalable Pipeline for Polygenic Scoring." Bioinformatics (2024). https://doi.org/10.1093/bioinformatics/btae551
+
+If using multi-source PGS methods, please also cite our study describing their evaluation and implementation within GenoPred:
+
+- "Pain, O. "Leveraging Global Genetics Resources to Enhance Polygenic Prediction Across Ancestrally Diverse Populations." MedRxiv (2026). https://doi.org/10.1101/2025.03.27.25324773
 
 If relevant, please also cite our paper comparing polygenic scoring methods and describing the reference-standardised approach:
 
-- Pain, O. et al. "Evaluation of polygenic prediction methodology within a reference-standardized framework." PLoS genetics. https://doi.org/10.1371/journal.pgen.1009021
+- Pain, O. et al. "Evaluation of polygenic prediction methodology within a reference-standardized framework." PLoS genetics (2024). https://doi.org/10.1371/journal.pgen.1009021
 
-Please also cite the relevant studies for the tools and data used by the GenoPred pipeline.
+Please also remember to cite the relevant studies for the tools and data used by the GenoPred pipeline.
 
 ***
 
@@ -51,7 +55,7 @@ See [here](pipeline_readme.html#pipeline-configuration) for more information on
 ***
 <div class="centered-container">
 <div class="rounded-image-container">
-![Figure 1. GenoPred pipeline schematic. Shows input files, processes, outputs, and rules.](Images/pipeline_readme/pipeline_schematic_lowdef.png)
+![Figure 1. GenoPred pipeline schematic. Shows input files, processes, outputs, and rules.](Images/pipeline_readme/pipeline_schematic_groups_lowdef.png)
 </div>
 </div>
 
@@ -123,21 +127,22 @@ The GenoPred pipeline currently only implements PGS methods intended for GWAS ba
 ```{r, eval = T, echo = F}
 
 methods_table <- data.frame(
-  Method = c("DBSLMM", "lassosum", "LDpred2", "MegaPRS", "PRS-CS", "pT+clump", "SBayesR"),
-  Software = c("DBSLMM", "lassosum R package", "bigsnpr R package", "LDAK", "PRS-CS", "PLINK", "GCTB"),
-  PubMedID = c(32330416, "28480976", "33326037", "34234142", "30992449", "25722852", "31704910"),
-  PseudoValidationOption = c("Yes (only option)", "Yes", "Yes", "Yes", "Yes", "No", "Yes (only option)"),
+  Method = c("DBSLMM", "lassosum", "lassosum2", "LDpred2", "MegaPRS", "PRS-CS", "pT+clump", "SBayesR"),
+  Software = c("DBSLMM", "lassosum R package",  "bigsnpr R package", "bigsnpr R package", "LDAK", "PRS-CS", "PLINK", "GCTB"),
+  PubMedID = c("32330416", "28480976", "36105883", "33326037", "34234142", "30992449", "25722852", "31704910"),
+  PseudoValidationOption = c("Yes (only option)", "Yes", "No", "Yes", "Yes", "Yes", "No", "Yes (only option)"),
   Parameters = c("SNP-heritability estimated using LD Score Regression (on liability scale for binary outcomes)",
                  "s = 0.2, 0.5, 0.9, 1; lambda = exp(seq(log(0.001), log(0.1), length.out=20))",
+                 "delta = 0.001, 0.01, 0.1, 1; nlambda = 30; lambda.min.ratio = 0.01",
                  "Grid includes 126 combinations of heritability and non-zero effect fractions (p).",
                  "Fits lasso, ridge, bolt and BayesR models, with a total of 148 sets of hyperparameters",
                  "phi = 1e-6, 1e-4, 1e-2, 1, auto",
                  "p-value thresholds: 1e-8, 1e-6, 1e-4, 1e-2, 0.1, 0.2, 0.3, 0.4, 0.5, 1; Clumping: r2 = 0.1; window = 250kb",
                  "NA"),
-  MHCRegion = c("Not excluded", "Not excluded", "Not excluded", "Not excluded", "Not excluded", "Only top variant retained", "Excluded (as recommended)"),
-  LDReference = c("Population-matched 1KG+HGDP", "Population-matched 1KG+HGDP", "EUR UKB (LDpred2-provided)", "Population-matched 1KG+HGDP", "Population-matched UKB (PRS-CS provided)", "Population-matched 1KG+HGDP", "EUR UKB (GCTB-provided)"),
-  CPUUsage = c("40 seconds", "10 seconds", "3 minutes", "5 minutes", "35 minutes", "5 seconds", "3 minutes"),
-  MemoryUsage = c("450 Mb", "400 Mb", "500 Mb", "450Mb", "350 Mb", "100 Mb", "500 Mb")
+  MHCRegion = c("Not excluded", "Not excluded", "Not excluded", "Not excluded", "Not excluded", "Not excluded", "Only top variant retained", "Excluded (as recommended)"),
+  LDReference = c("Population-matched 1KG+HGDP", "Population-matched 1KG+HGDP", "EUR UKB (LDpred2-provided)", "EUR UKB (LDpred2-provided)", "Population-matched 1KG+HGDP", "Population-matched UKB (PRS-CS provided)", "Population-matched 1KG+HGDP", "EUR UKB (GCTB-provided)"),
+  CPUUsage = c("40 seconds", "10 seconds", "NA but it is quick", "3 minutes", "5 minutes", "35 minutes", "5 seconds", "3 minutes"),
+  MemoryUsage = c("450 Mb", "400 Mb", "NA but ~500Mb", "500 Mb", "450Mb", "350 Mb", "100 Mb", "500 Mb")
 )
 
 names(methods_table) <- c('Method','Software','PubMed ID', 'Pseudovalidation Option','Parameters','MHC Region','LD Reference','CPU usage*','Memory Usage*')
@@ -165,6 +170,12 @@ lassosum, an R package, applies the LASSO method for constructing PGS, is implem
 
 ***
 
+#### lassosum2
+
+lassosum2 is an updated version of the original lassosum method and is implemented in the bigsnpr R package. It performs penalised regression on GWAS summary statistics using an elastic net–like approach, balancing L1 and L2 penalties through a grid of hyperparameters: the shrinkage parameter lambda and the sparsity parameter delta. Lassosum2 can be run efficiently on sparse LD matrices derived from reference data, and is particularly suited for traits where variable selection may be beneficial. In GenoPred, lassosum2 is run using the script [pgs_methods/lassosum2.R](https://github.com/opain/GenoPred/blob/master/Scripts/pgs_methods/lassosum2.R). GenoPred uses bigsnpr v1.12.2 with the default grid of lambda and delta values, and performs GWAS quality control as recommended by the authors. LD matrices used by lassosum2 are precomputed from European individuals in the UK Biobank and applied only to GWAS based on a EUR population. Alternative LD reference panels can be specified for other populations ([see here](pipeline_readme.html#specifying-alternative-reference-data-for-pgs-methods)).
+
+***
+
 #### LDpred2
 
 LDpred2, part of the bigsnpr R package, operates in 'inf', 'grid', and 'auto' modes. The 'inf' mode is better suited for highly polygenic traits, whereas 'grid' and 'auto' adjust effect sizes using various hyperparameters, including SNP heritability. In GenoPred, LDpred2 is run using the script [pgs_methods/ldpred2](https://github.com/opain/GenoPred/blob/master/Scripts/pgs_methods/ldpred2.R).R. GenoPred uses bigsnpr v1.12.2, with the default LDpred2 grid search, and recommended GWAS quality control checks. GenoPred employs LDpred2's precomputed LD matrices based on the European individuals from the UK Biobank, and it is applied only to GWAS based on a EUR population. The user can specify alternative LD reference data to include additional populations ([see here](pipeline_readme.html#specifying-alternative-reference-data-for-pgs-methods)). If the SNP-h2 estimated using LDSC is <0.05, the SNP-heritability used by LDpred2 is set to 0.05.
diff --git a/docs/pipeline_technical.html b/docs/pipeline_technical.html
index 49ef0bb9..f678bc41 100644
--- a/docs/pipeline_technical.html
+++ b/docs/pipeline_technical.html
@@ -80,6 +80,41 @@
   gtag('config', 'G-YR18ZB3PR3');
 </script>
 
+<!-- Osano Cookie Consent -->
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/cookieconsent@3/build/cookieconsent.min.css" />
+<script src="https://cdn.jsdelivr.net/npm/cookieconsent@3/build/cookieconsent.min.js"></script>
+<script>
+window.addEventListener("load", function(){
+  window.cookieconsent.initialise({
+    palette: {
+      popup: { background: "#000" },
+      button: { background: "#f1d600" }
+    },
+    theme: "classic",
+    position: "bottom-right",
+    content: {
+      message: "We use a cookie for Google Analytics to understand how people use this site. This helps us improve GenoPred and demonstrate its impact. Please click 'Accept' to help us with this.",
+      dismiss: "Accept",
+      link: "Google's privacy info",
+      href: "https://policies.google.com/technologies/partner-sites"
+    },
+    onStatusChange: function(status) {
+      if (status === 'allow') {
+        // Now load Google Analytics only if user consents
+        var gtagScript = document.createElement('script');
+        gtagScript.setAttribute('async', '');
+        gtagScript.src = 'https://www.googletagmanager.com/gtag/js?id=G-YR18ZB3PR3';
+        document.head.appendChild(gtagScript);
+
+        window.dataLayer = window.dataLayer || [];
+        function gtag(){ dataLayer.push(arguments); }
+        gtag('js', new Date());
+        gtag('config', 'G-YR18ZB3PR3');
+      }
+    }
+  });
+});
+</script>
 
 <style type="text/css">
   code{white-space: pre-wrap;}
@@ -440,23 +475,31 @@ <h1>Introduction</h1>
 scores. The pipeline aims to implement the current practises for
 polygenic scoring. See <a href="pipeline_overview.html">here</a> more
 general information regarding the GenoPred pipeline.</p>
-<p>Please cite our preprint when using the pipeline:</p>
+<p>Please cite our publication when using the pipeline:</p>
 <ul>
 <li>“Pain, O. et al. ”The GenoPred Pipeline: A Comprehensive and
-Scalable Pipeline for Polygenic Scoring.” MedRxiv 2024. <a
-href="https://doi.org/10.1101/2024.06.12.24308843"
-class="uri">https://doi.org/10.1101/2024.06.12.24308843</a></li>
+Scalable Pipeline for Polygenic Scoring.” Bioinformatics (2024). <a
+href="https://doi.org/10.1093/bioinformatics/btae551"
+class="uri">https://doi.org/10.1093/bioinformatics/btae551</a></li>
+</ul>
+<p>If using multi-source PGS methods, please also cite our study
+describing their evaluation and implementation within GenoPred:</p>
+<ul>
+<li>“Pain, O.”Leveraging Global Genetics Resources to Enhance Polygenic
+Prediction Across Ancestrally Diverse Populations.” MedRxiv (2026). <a
+href="https://doi.org/10.1101/2025.03.27.25324773"
+class="uri">https://doi.org/10.1101/2025.03.27.25324773</a></li>
 </ul>
 <p>If relevant, please also cite our paper comparing polygenic scoring
 methods and describing the reference-standardised approach:</p>
 <ul>
 <li>Pain, O. et al. “Evaluation of polygenic prediction methodology
-within a reference-standardized framework.” PLoS genetics. <a
+within a reference-standardized framework.” PLoS genetics (2024). <a
 href="https://doi.org/10.1371/journal.pgen.1009021"
 class="uri">https://doi.org/10.1371/journal.pgen.1009021</a></li>
 </ul>
-<p>Please also cite the relevant studies for the tools and data used by
-the GenoPred pipeline.</p>
+<p>Please also remember to cite the relevant studies for the tools and
+data used by the GenoPred pipeline.</p>
 <hr />
 </div>
 <div id="software" class="section level1">
@@ -486,7 +529,7 @@ <h1>Workflow</h1>
 <div class="centered-container">
 <div class="rounded-image-container">
 <div class="figure">
-<img src="Images/pipeline_readme/pipeline_schematic_lowdef.png"
+<img src="Images/pipeline_readme/pipeline_schematic_groups_lowdef.png"
 alt="" />
 <p class="caption">Figure 1. GenoPred pipeline schematic. Shows input
 files, processes, outputs, and rules.</p>
@@ -713,6 +756,35 @@ <h3>Single Ancestry Methods</h3>
 </tr>
 <tr>
 <td style="text-align:left;">
+lassosum2
+</td>
+<td style="text-align:left;">
+bigsnpr R package
+</td>
+<td style="text-align:left;">
+36105883
+</td>
+<td style="text-align:left;">
+No
+</td>
+<td style="text-align:left;">
+delta = 0.001, 0.01, 0.1, 1; nlambda = 30; lambda.min.ratio = 0.01
+</td>
+<td style="text-align:left;">
+Not excluded
+</td>
+<td style="text-align:left;">
+EUR UKB (LDpred2-provided)
+</td>
+<td style="text-align:left;">
+NA but it is quick
+</td>
+<td style="text-align:left;">
+NA but ~500Mb
+</td>
+</tr>
+<tr>
+<td style="text-align:left;">
 LDpred2
 </td>
 <td style="text-align:left;">
@@ -901,6 +973,28 @@ <h4>lassosum</h4>
 populations, EUR LD block data is used.</p>
 <hr />
 </div>
+<div id="lassosum2" class="section level4">
+<h4>lassosum2</h4>
+<p>lassosum2 is an updated version of the original lassosum method and
+is implemented in the bigsnpr R package. It performs penalised
+regression on GWAS summary statistics using an elastic net–like
+approach, balancing L1 and L2 penalties through a grid of
+hyperparameters: the shrinkage parameter lambda and the sparsity
+parameter delta. Lassosum2 can be run efficiently on sparse LD matrices
+derived from reference data, and is particularly suited for traits where
+variable selection may be beneficial. In GenoPred, lassosum2 is run
+using the script <a
+href="https://github.com/opain/GenoPred/blob/master/Scripts/pgs_methods/lassosum2.R">pgs_methods/lassosum2.R</a>.
+GenoPred uses bigsnpr v1.12.2 with the default grid of lambda and delta
+values, and performs GWAS quality control as recommended by the authors.
+LD matrices used by lassosum2 are precomputed from European individuals
+in the UK Biobank and applied only to GWAS based on a EUR population.
+Alternative LD reference panels can be specified for other populations
+(<a
+href="pipeline_readme.html#specifying-alternative-reference-data-for-pgs-methods">see
+here</a>).</p>
+<hr />
+</div>
 <div id="ldpred2" class="section level4">
 <h4>LDpred2</h4>
 <p>LDpred2, part of the bigsnpr R package, operates in ‘inf’, ‘grid’,
diff --git a/functions/constants.R b/functions/constants.R
index d04161bb..ff8899c9 100644
--- a/functions/constants.R
+++ b/functions/constants.R
@@ -37,13 +37,13 @@ ref_pop <- data.frame(
 
 # Make a data.frame giving labels to the 1KG reference populations
 pgs_method_labels <- data.frame(
-  method = c('ptclump','dbslmm','ldpred2','sbayesr','sbayesrc','lassosum','prscs','megaprs','quickprs','external','prscsx','xwing','ptclump_multi','dbslmm_multi','ldpred2_multi','sbayesr_multi','sbayesrc_multi','lassosum_multi','prscs_multi','megaprs_multi','quickprs_multi','tlprs_dbslmm','tlprs_sbayesrc','tlprs_quickprs','tlprs_ldpred2'),
-  label = c('pT+clump','DBSLMM','LDpred2','SBayesR','SBayesRC','lassosum','PRS-CS','MegaPRS','QuickPRS','External','PRS-CSx','X-Wing','pT+clump-multi','DBSLMM-multi','LDpred2-multi','SBayesR-multi','SBayesRC-multi','lassosum-multi','PRS-CS-multi','MegaPRS-multi','QuickPRS-multi','TL-DBSLMM','TL-SBayesRC','TL-QuickPRS','TL-LDpred2')
+  method = c('ptclump','dbslmm','ldpred2','sbayesr','sbayesrc','lassosum','lassosum2','prscs','megaprs','quickprs','external','prscsx','xwing','ptclump_multi','dbslmm_multi','ldpred2_multi','sbayesr_multi','sbayesrc_multi','lassosum_multi','prscs_multi','megaprs_multi','quickprs_multi','tlprs_dbslmm','tlprs_sbayesrc','tlprs_quickprs','tlprs_ldpred2'),
+  label = c('pT+clump','DBSLMM','LDpred2','SBayesR','SBayesRC','lassosum','lassosum2','PRS-CS','MegaPRS','QuickPRS','External','PRS-CSx','X-Wing','pT+clump-multi','DBSLMM-multi','LDpred2-multi','SBayesR-multi','SBayesRC-multi','lassosum-multi','PRS-CS-multi','MegaPRS-multi','QuickPRS-multi','TL-DBSLMM','TL-SBayesRC','TL-QuickPRS','TL-LDpred2')
 )
 pgs_method_labels<-pgs_method_labels[order(pgs_method_labels$method),]
 
 # Make vector indicating single source pgs_methods that can be applied to non-european GWAS
-pgs_methods_noneur <- c('ptclump','lassosum','megaprs','prscs','dbslmm','ldpred2','quickprs','sbayesrc')
+pgs_methods_noneur <- c('ptclump','lassosum','lassosum2','megaprs','prscs','dbslmm','ldpred2','quickprs','sbayesrc')
 
 # Make vector indicating pgs_methods that are to be applied to gwas_groups
 pgs_group_methods <- c('prscsx','xwing')
diff --git a/functions/pipeline.R b/functions/pipeline.R
index 06af977e..ba80b5ba 100644
--- a/functions/pipeline.R
+++ b/functions/pipeline.R
@@ -369,6 +369,10 @@ find_pseudo <- function(config, gwas, pgs_method, target_pop = NULL){
   if(pgs_method == 'ldpred2'){
     pseudo_val <- 'beta_auto'
   }
+  if(pgs_method == 'lassosum2'){
+    col_names <- names(fread(paste0(outdir,'/reference/pgs_score_files/',pgs_method,'/',gwas,'/ref-',gwas,'.score.gz'), nrows = 0))
+    pseudo_val <- gsub('SCORE_', '', col_names[length(col_names)])
+  }
   if(pgs_method == 'prscs'){
     pseudo_val <- 'phi_auto'
   }
diff --git a/functions/plink.R b/functions/plink.R
index 3e32c325..d79f349e 100644
--- a/functions/plink.R
+++ b/functions/plink.R
@@ -410,7 +410,7 @@ plink_king<-function(bfile=NULL, pfile=NULL, extract = NULL, chr = 1:22, plink2=
   system(paste0('tail -n +2 ', tmp_dir, '/merged.king.cutoff.in.id > ', out, '.unrelated.keep'))
 }
 
-plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, chr=1:22, frq=NULL, plink2=NULL, threads=1){
+plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, chr=1:22, frq=NULL, plink2=NULL, threads=1, fbm = F){
   if(is.null(bfile) & is.null(pfile)){
     stop("bfile or pfile must be specified.")
   }
@@ -462,14 +462,12 @@ plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, ch
 
     # Add up the scores across chromosomes as they are produced
     if (file.exists(paste0(tmp_folder, '/profiles.chr', chr_i, '.sscore'))) {
-      sscore <- fread(paste0(paste0(tmp_folder,'/profiles.chr', chr_i, '.sscore')))
-
-      # Delete file to save disk space
-      system(paste0('rm ', tmp_folder, '/profiles.chr', chr_i, '.sscore'))
-
       if(is.null(scores)){
-        names(sscore)<-gsub('\\#', '', names(sscore))
-        scores_ids <- sscore[, names(sscore) %in% c('FID', 'IID'), with = F]
+        # Read sample IDs once to define rows
+        scores_ids <- fread(cmd = paste0('cut -f 1-2 ', tmp_folder,'/profiles.chr', chr_i, '.sscore'))
+        names(scores_ids)<-gsub('\\#', '', names(scores_ids))
+        scores_ids <- scores_ids[, names(scores_ids) %in% c('FID', 'IID'), with = F]
+        
         if (ncol(scores_ids) == 1) {
           scores_ids <- data.table(FID = scores_ids$IID,
                                    IID = scores_ids$IID)
@@ -477,30 +475,89 @@ plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, ch
           scores_ids <- data.table(FID = scores_ids$FID,
                                    IID = scores_ids$IID)
         }
-
-        # Subset and transform scores as required
-        current_scores <- as.matrix(sscore[, paste0(names(score_small)[-1:-3], '_SUM'), with = FALSE])
-
-        # If scores is not initialized, copy current_scores
-        scores <- current_scores
+        
+        n_samples <- nrow(scores_ids)
+        
+        # Read in the column names to identify _SUM columns
+        scores_cols <- fread(paste0(tmp_folder,'/profiles.chr', chr_i, '.sscore'), nrows = 0)
+        sum_cols <- which(names(scores_cols) %in% paste0(names(score_small)[-1:-3], '_SUM'))
+        n_scores  <- length(sum_cols)
+        scores_cols <- paste0(names(score_small)[-1:-3], '_SUM')
+        
+        if(fbm){
+          # Initialize a FBM (backed on disk) for running PGS sum
+          file.remove(paste0(tmp_folder, '/plink_score_fbm.bk'))
+          scores <- FBM(
+            nrow = n_samples,
+            ncol = n_scores,
+            backingfile = paste0(tmp_folder, '/plink_score_fbm'),
+            init = 0
+          )
+        } else {
+          # Initialize a matrix running PGS sum
+          scores <- matrix(
+            nrow = n_samples,
+            ncol = n_scores,
+            data = 0
+          )
+        }
+      }
+      
+      if(fbm){
+        # Read in sscore file
+        file.remove(paste0(tmp_folder,'/profiles.chr', chr_i, '.bk'))
+        dt_chr <- big_read(
+          paste0(tmp_folder,'/profiles.chr', chr_i, '.sscore'),
+          header = TRUE,
+          select = sum_cols,
+          backingfile = paste0(tmp_folder,'/profiles.chr', chr_i),
+          colClasses = list(numeric = sum_cols)
+        )
+        
+        # Get PGS column names
+        scores_cols_i <- fread(paste0(tmp_folder,'/profiles.chr', chr_i, '.sscore'), nrows = 0)
+        scores_cols_i <- names(scores_cols_i[, names(scores_cols_i) %in% paste0(names(score_small)[-1:-3], '_SUM'), with = F])
+        
+        # In-place addition: for each score column
+        for (j in scores_cols) {
+          scores[, which(scores_cols == j)] <- scores[, which(scores_cols == j)] + dt_chr[,which(scores_cols_i == j)]
+        }
+        
+        rm(dt_chr)
+        gc()
+        file.remove(paste0(tmp_folder,'/profiles.chr', chr_i, ".bk"),
+                    paste0(tmp_folder,'/profiles.chr', chr_i, ".rds"),
+                    paste0(tmp_folder,'/profiles.chr', chr_i, ".sscore"))
       } else {
+        current_scores <- fread(paste0(tmp_folder,'/profiles.chr', chr_i, '.sscore'))
+        
         # Subset and transform scores as required
-        current_scores <- as.matrix(sscore[, paste0(names(score_small)[-1:-3], '_SUM'), with = FALSE])
-
+        current_scores <- as.matrix(current_scores[, paste0(names(score_small)[-1:-3], '_SUM'), with = FALSE])
+        
         # Sum the current scores with the running total
         scores <- scores + current_scores
+        
+        rm(current_scores)
+        gc()
+        file.remove(paste0(tmp_folder, '/profiles.chr', chr_i, '.sscore'))
       }
-    } else {
-      cat('No scores for chromosome ', chr_i, '. Check plink logs file for reason.\n')
     }
   }
-
-  # Combine score with IDs
-  scores<-data.table(scores_ids,
-                     scores)
-
-  # Rename columns
-  names(scores)[-1:-2]<-names(score_small)[-1:-3]
-
+  
+  if(fbm){
+    scores <- list(
+      ids = scores_ids,
+      cols = names(score_small)[-1:-3],
+      scores = scores
+    )
+  } else {
+    # Combine score with IDs
+    scores<-data.table(scores_ids,
+                       scores)
+    
+    # Rename columns
+    names(scores)[-1:-2]<-names(score_small)[-1:-3]
+  }
+  
   return(scores)
 }
diff --git a/pipeline/config.yaml b/pipeline/config.yaml
index 4e115c6e..df434240 100644
--- a/pipeline/config.yaml
+++ b/pipeline/config.yaml
@@ -22,7 +22,7 @@ score_list: NA
 # Specify location of gwas_groups file
 gwas_groups: NA
 
-# Specify pgs_methods ('ptclump','dbslmm','prscs','sbayesr','lassosum','ldpred2','megaprs','quickprs','sbayesrc','prscsx','xwing')
+# Specify pgs_methods
 pgs_methods: NA
 
 # Specify p-value thresholds for ptclump
@@ -49,6 +49,12 @@ quickprs_ldref: NA
 # Specify reference for QuickPRS-Multi
 quickprs_multi_ldref: NA
 
+# Restrict lassosum output to pseudovalidated score only
+lassosum_pseudo_only: false
+
+# Restrict megaprs output to pseudovalidated score only
+megaprs_pseudo_only: false
+
 # Specify reference for LDpred2
 ldpred2_ldref: NA
 
@@ -82,8 +88,11 @@ cores_outlier_detection: 5
 # Specify PGS methods that should be used by TL-PRS
 tlprs_methods: NA
 
-# Specify PGS methods that should be reweighted according to LEOPARD ('ptclump','dbslmm','prscs','lassosum','ldpred2','megaprs','quickprs','sbayesrc')
+# Specify PGS methods that should be reweighted according to LEOPARD
 leopard_methods: NA
 
 # Specify PGS scaling approach
 pgs_scaling: ['continuous']
+
+# Specify target populations for polygenic scoring (these should match reference population codes. E.g. ['EUR','EAS']).
+target_populations: NA
diff --git a/pipeline/example_input/config.multisource.yaml b/pipeline/example_input/config.multisource.yaml
index c78e070b..00126683 100644
--- a/pipeline/example_input/config.multisource.yaml
+++ b/pipeline/example_input/config.multisource.yaml
@@ -13,7 +13,7 @@ gwas_groups: example_input/gwas_groups.multisource.txt
 # Specify location of target_list file
 target_list: example_input/target_list.txt
 
-# Specify pgs_methods ('ptclump','dbslmm','prscs','sbayesr','lassosum','ldpred2','megaprs')
+# Specify pgs_methods
 pgs_methods: ['lassosum']
 
 # Specify methods for which PGS should be combined using LEOPARD+QuickPRS
diff --git a/pipeline/example_input/config.yaml b/pipeline/example_input/config.yaml
index 4e79d58d..9729d9c3 100644
--- a/pipeline/example_input/config.yaml
+++ b/pipeline/example_input/config.yaml
@@ -1,5 +1,5 @@
 # Specify output directory
-outdir: test_data/output/test1
+outdir: test_data/output/test_only_score
 
 # Location of this config file
 config_file: example_input/config.yaml
@@ -13,7 +13,7 @@ target_list: example_input/target_list.txt
 # Specify location of score_list file
 score_list: example_input/score_list.txt
 
-# Specify pgs_methods ('ptclump','dbslmm','prscs','sbayesr','lassosum','ldpred2','megaprs')
+# Specify pgs_methods
 pgs_methods: ['ptclump','dbslmm']
 
 # Specify if you want test mode. Set to NA if you don't want test mode
diff --git a/pipeline/rules/dependencies.smk b/pipeline/rules/dependencies.smk
index d3571dff..e3c62e27 100644
--- a/pipeline/rules/dependencies.smk
+++ b/pipeline/rules/dependencies.smk
@@ -100,7 +100,8 @@ def check_config_parameters(config):
 
     missing_params = []
     for param in required_params:
-        if config.get(param) is None:
+        val = config.get(param)
+        if val is None or (param == "config_file" and val == "NA"):
             missing_params.append(param)
 
     if missing_params:
@@ -270,48 +271,70 @@ else:
 # Set ldpred2 reference path
 if config['ldpred2_ldref'] == 'NA':
   ldpred2_ldref=f"{resdir}/data/ldpred2_ref"
+    
+  if 'ldpred2' in config['pgs_methods']:
+    # Check if gwas_list contains invalid populations
+    valid_pops = {'EUR'}
+    invalid_pops = set(gwas_list_df['population'].unique()) - valid_pops
+  
+    if invalid_pops:
+      raise ValueError(
+        f"Default ldpred2 reference data is only available for EUR populations. For other populations, please provide your own ldpred2 reference data using the ldpred2_ldref parameter. Download links to LDpred2 reference data for EUR, EAS and AFR populations can be found in this section of the website: https://opain.github.io/GenoPred/pipeline_readme.html#Specifying_alternative_reference_data_for_PGS_methods"
+      )
+
 else:
   ldpred2_ldref=config['ldpred2_ldref']
 
-# Check the ldpred2 ldref data is present for the required populations in the pgwas_list
-if 'ldpred2' in config['pgs_methods']:
-  for pop in gwas_list_df['population'].unique():
-    path = f"{ldpred2_ldref}/{pop}"
-    # Check if map.rds file exists
-    map_file = os.path.join(path, "map.rds")
-    if not os.path.exists(map_file):
-      print(f"File not found: {map_file}")
-      raise FileNotFoundError(f"Required file not found: {map_file}. LDpred2 reference data must include map.rds for all populations.")
-
-    # Check if LD_with_blocks_chr${chr}.rds files exist for chr 1 to 22
-    for chr in range(1, 23):
-      ld_file = os.path.join(path, f"LD_with_blocks_chr{chr}.rds")
-      if not os.path.exists(ld_file):
-        print(f"File not found: {ld_file}")
-        raise FileNotFoundError(f"Required file not found: {ld_file}. LDpred2 reference data must include files for all chromosomes.")
+  # Check the ldpred2 ldref data is present for the required populations in the gwas_list
+  if 'ldpred2' in config['pgs_methods']:
+    for pop in gwas_list_df['population'].unique():
+      path = f"{ldpred2_ldref}/{pop}"
+      # Check if map.rds file exists
+      map_file = os.path.join(path, "map.rds")
+      if not os.path.exists(map_file):
+        print(f"File not found: {map_file}")
+        raise FileNotFoundError(f"Required file not found: {map_file}. LDpred2 reference data must include map.rds for all populations.")
+  
+      # Check if LD_with_blocks_chr${chr}.rds files exist for chr 1 to 22
+      for chr in range(1, 23):
+        ld_file = os.path.join(path, f"LD_with_blocks_chr{chr}.rds")
+        if not os.path.exists(ld_file):
+          print(f"File not found: {ld_file}")
+          raise FileNotFoundError(f"Required file not found: {ld_file}. LDpred2 reference data must include files for all chromosomes.")
 
 # Set sbayesr reference path
 if config['sbayesr_ldref'] == 'NA':
   sbayesr_ldref=f"{resdir}/data/gctb_ref/ukbEURu_hm3_shrunk_sparse/ukbEURu_hm3_v3_50k_chr"
+  
+  if 'sbayesr' in config['pgs_methods']:
+    # Check if gwas_list contains invalid populations
+    valid_pops = {'EUR'}
+    invalid_pops = set(gwas_list_df['population'].unique()) - valid_pops
+  
+    if invalid_pops:
+      raise ValueError(
+        f"Default sbayesr reference data is only available for EUR populations. For other populations, please provide your own sbayesr reference data using the sbayesr_ldref parameter."
+      )
+
 else:
   sbayesr_ldref=config['sbayesr_ldref']
 
-# Check the sbayesr ldref data is present for the required populations in the gwas_list
-if 'sbayesr' in config['pgs_methods']:
-  for pop in gwas_list_df['population'].unique():
-    path = f"{sbayesr_ldref}/{pop}"
-    # Check if map.rds file exists
-    map_file = os.path.join(path, "map.rds")
-    if not os.path.exists(map_file):
-      print(f"File not found: {map_file}")
-      raise FileNotFoundError(f"Required file not found: {map_file}. SBayesR reference data must include map.rds for all populations.")
-
-    # Check if LD_with_blocks_chr${chr}.rds files exist for chr 1 to 22
-    for chr in range(1, 23):
-      ld_file = os.path.join(path, f"LD_with_blocks_chr{chr}.rds")
-      if not os.path.exists(ld_file):
-        print(f"File not found: {ld_file}")
-        raise FileNotFoundError(f"Required file not found: {ld_file}. SBayesR reference data must include files for all chromosomes.")
+  # Check the sbayesr ldref data is present for the required populations in the gwas_list
+  if 'sbayesr' in config['pgs_methods']:
+    for pop in gwas_list_df['population'].unique():
+      path = f"{sbayesr_ldref}/{pop}"
+      # Check if map.rds file exists
+      map_file = os.path.join(path, "map.rds")
+      if not os.path.exists(map_file):
+        print(f"File not found: {map_file}")
+        raise FileNotFoundError(f"Required file not found: {map_file}. SBayesR reference data must include map.rds for all populations.")
+  
+      # Check if LD_with_blocks_chr${chr}.rds files exist for chr 1 to 22
+      for chr in range(1, 23):
+        ld_file = os.path.join(path, f"LD_with_blocks_chr{chr}.rds")
+        if not os.path.exists(ld_file):
+          print(f"File not found: {ld_file}")
+          raise FileNotFoundError(f"Required file not found: {ld_file}. SBayesR reference data must include files for all chromosomes.")
 
 # Set quickprs reference path
 if (config["leopard_methods"] and config["leopard_methods"] != "NA") or "quickprs" in config["pgs_methods"]:
@@ -444,7 +467,7 @@ def check_pgs_methods(x):
         return
 
     valid_pgs_methods = {
-        "ptclump", "dbslmm", "prscs", "sbayesr","sbayesrc", "lassosum", "ldpred2", "megaprs", "quickprs", "xwing", "prscsx", "bridgeprs"
+        "ptclump", "dbslmm", "prscs", "sbayesr","sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs", "xwing", "prscsx", "bridgeprs"
     }
 
     invalid_methods = [method for method in x if method not in valid_pgs_methods]
@@ -458,7 +481,7 @@ check_pgs_methods(config['pgs_methods'])
 # Check valid tlprs_methods are specified
 def check_tlprs_methods(config):
     valid_tlprs_methods = {
-        "ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "megaprs", "quickprs"
+        "ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs"
     }
 
     # Check if 'tlprs_methods' is empty
@@ -475,7 +498,7 @@ check_tlprs_methods(config)
 # Check valid leopard_methods are specified
 def check_leopard_methods(config):
     valid_leopard_methods = {
-        "ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "megaprs","quickprs"
+        "ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs","quickprs"
     }
 
     # Check if 'leopard_methods' is empty
@@ -886,17 +909,18 @@ rule download_sbayesrc_annot:
     {{
       rm -r -f {resdir}/data/sbayesrc_annot; \
       mkdir -p {resdir}/data/sbayesrc_annot; \
-      wget --no-check-certificate -O {resdir}/data/sbayesrc_annot/annot_baseline2.2.zip https://sbayes.pctgplots.cloud.edu.au/data/SBayesRC/resources/v2.0/Annotation/annot_baseline2.2.zip; \
+      gdown 1-dUPvduYB1zZewsItCNKcM7RvOAOojlP -O {resdir}/data/sbayesrc_annot/annot_baseline2.2.zip; \
       unzip {resdir}/data/sbayesrc_annot/annot_baseline2.2.zip -d {resdir}/data/sbayesrc_annot; \
       rm {resdir}/data/sbayesrc_annot/annot_baseline2.2.zip
     }} > {log} 2>&1
     """
 
 # Download SBayesRC reference data
-sbayesrc_ref_urls = {
-    'EUR': 'https://sbayes.pctgplots.cloud.edu.au/data/SBayesRC/resources/v2.0/LD/HapMap3/ukbEUR_HM3.zip',
-    'EAS': 'https://sbayes.pctgplots.cloud.edu.au/data/SBayesRC/resources/v2.0/LD/HapMap3/ukbEAS_HM3.zip',
-    'AFR': 'https://sbayes.pctgplots.cloud.edu.au/data/SBayesRC/resources/v2.0/LD/HapMap3/ukbAFR_HM3.zip'
+# Links to developer gdrive
+sbayesrc_ref_dev_urls = {
+    'EUR': '1HkxMT2UJKK__TfqNcZQoO7Q9XGnVtI28',
+    'EAS': '1qytY1H3ZracD2OoyqjsPjsTjxl9qdQ-c',
+    'AFR': '1kPOp2mc1odJBInd66sC40aKNwOhC--5A'
 }
 
 rule download_sbayesrc_ref:
@@ -907,16 +931,16 @@ rule download_sbayesrc_ref:
   log:
     f"{resdir}/data/logs/download_sbayesrc_ref-{{population}}.log"
   params:
-    url=lambda w: sbayesrc_ref_urls.get(w.population)
+    id=lambda w: sbayesrc_ref_dev_urls.get(w.population)
   shell:
     """
     {{
       mkdir -p {resdir}/data/sbayesrc_ref; \
       rm -r -f {resdir}/data/sbayesrc_ref/{wildcards.population}; \
-      wget --no-check-certificate -O {resdir}/data/sbayesrc_ref/{wildcards.population}.zip {params.url}; \
-      unzip {resdir}/data/sbayesrc_ref/{wildcards.population}.zip -d {resdir}/data/sbayesrc_ref/{wildcards.population}; \
-      rm {resdir}/data/sbayesrc_ref/{wildcards.population}.zip; \
-      mv {resdir}/data/sbayesrc_ref/{wildcards.population}/ukb{wildcards.population}_HM3/* {resdir}/data/sbayesrc_ref/{wildcards.population}/
+      gdown {params.id} -O {resdir}/data/sbayesrc_ref/{wildcards.population}.xz; \
+      tar -xJf {resdir}/data/sbayesrc_ref/{wildcards.population}.xz -C {resdir}/data/sbayesrc_ref/; \
+      mv {resdir}/data/sbayesrc_ref/ukb{wildcards.population}_HM3 {resdir}/data/sbayesrc_ref/{wildcards.population}; \
+      rm {resdir}/data/sbayesrc_ref/{wildcards.population}.xz
     }} > {log} 2>&1
     """
 
@@ -1036,9 +1060,9 @@ rule download_ldak_map:
     {{
       rm -r {resdir}/data/ldak_map; \
       mkdir -p {resdir}/data/ldak_map; \
-      wget --no-check-certificate -O {resdir}/data/ldak_map/genetic_map_b37.zip https://www.dropbox.com/s/slchsd0uyd4hii8/genetic_map_b37.zip; \
-      unzip {resdir}/data/ldak_map/genetic_map_b37.zip -d {resdir}/data/ldak_map/; \
-      rm {resdir}/data/ldak_map/genetic_map_b37.zip
+      gdown 1mtw5Mx-F-Ws7lKLFqMh6nN4OrDGZTkZG -O {resdir}/data/ldak_map.tar.gz; \
+      tar -zxvf {resdir}/data/ldak_map.tar.gz -C {resdir}/data/; \
+      rm {resdir}/data/ldak_map.tar.gz
     }} > {log} 2>&1
     """
     
@@ -1178,7 +1202,7 @@ rule download_default_ref:
     {{
       rm -r {resdir}/data/ref; \
       mkdir -p {resdir}/data/ref; \
-      gdown --id 1vYH6V-7F68Ji1vy9TaH0ysjmdYJFef-f -O resources/data/ref/genopred_1kg_hgdp.tar.gz; \
+      gdown --id 1vYH6V-7F68Ji1vy9TaH0ysjmdYJFef-f -O {resdir}/data/ref/genopred_1kg_hgdp.tar.gz; \
       tar -xzvf {resdir}/data/ref/genopred_1kg_hgdp.tar.gz -C {resdir}/data/ref/; \
       mv {resdir}/data/ref/ref/* {resdir}/data/ref/; \
       rm -r {resdir}/data/ref/ref; \
@@ -1304,19 +1328,19 @@ rule download_xwing_software:
     rules.install_xpass.output,
     rules.install_genoutils_xwing.output
   output:
-    "resources/software/xwing/block_partition.txt"
+    f"{resdir}/software/xwing/block_partition.txt"
   conda:
     "../envs/xwing.yaml"
   benchmark:
-    "resources/data/benchmarks/download_xwing_software.txt"
+    f"{resdir}/data/benchmarks/download_xwing_software.txt"
   log:
-    "resources/data/logs/download_xwing_software.log"
+    f"{resdir}/data/logs/download_xwing_software.log"
   shell:
     """
     {{
-      rm -r -f resources/software/xwing; \
-      git clone https://github.com/opain/X-Wing resources/software/xwing; \
-      cd resources/software/xwing; \
+      rm -r -f {resdir}/software/xwing; \
+      git clone https://github.com/opain/X-Wing {resdir}/software/xwing; \
+      cd {resdir}/software/xwing; \
       git reset --hard e9fcc264266e0e884323311816bfe20053fd3f7a
     }} > {log} 2>&1
     """
diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk
index 7ec1fa9a..85351b16 100644
--- a/pipeline/rules/pgs_methods.smk
+++ b/pipeline/rules/pgs_methods.smk
@@ -277,7 +277,8 @@ rule prep_pgs_lassosum_i:
     "../envs/analysis.yaml"
   params:
     population= lambda w: gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'population'].iloc[0],
-    testing=config["testing"]
+    testing=config["testing"],
+    pseudo_only_flag = lambda wildcards: "T" if config.get("lassosum_pseudo_only", False) else "F"
   shell:
     "Rscript ../Scripts/pgs_methods/lassosum.R \
      --ref_plink_chr {refdir}/ref.chr \
@@ -288,6 +289,7 @@ rule prep_pgs_lassosum_i:
      --output {outdir}/reference/pgs_score_files/lassosum/{wildcards.gwas}/ref-{wildcards.gwas} \
      --n_cores {threads} \
      --pop_data {refdir}/ref.pop.txt \
+     --pseudo_only {params.pseudo_only_flag} \
      --test {params.testing} > {log} 2>&1"
 
 rule prep_pgs_lassosum:
@@ -340,6 +342,49 @@ rule prep_pgs_ldpred2_i:
 rule prep_pgs_ldpred2:
   input: expand(f"{outdir}/reference/pgs_score_files/ldpred2/{{gwas}}/ref-{{gwas}}.score.gz", gwas=gwas_list_df['name'])
 
+##
+# lassosum2
+##
+
+rule prep_pgs_lassosum2_i:
+  resources:
+    mem_mb=30000,
+    time_min=2800
+  threads: config['cores_prep_pgs']
+  input:
+    f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz",
+    lambda w: f"{ldpred2_ldref}/" + gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'population'].iloc[0] + "/map.rds",
+    f"{outdir}/reference/pc_score_files/TRANS/ref-TRANS-pcs.EUR.scale"
+  output:
+    f"{outdir}/reference/pgs_score_files/lassosum2/{{gwas}}/ref-{{gwas}}.score.gz"
+  benchmark:
+    f"{outdir}/reference/benchmarks/prep_pgs_lassosum2_i-{{gwas}}.txt"
+  log:
+    f"{outdir}/reference/logs/prep_pgs_lassosum2_i-{{gwas}}.log"
+  conda:
+    "../envs/analysis.yaml"
+  params:
+    population= lambda w: gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'population'].iloc[0],
+    sampling= lambda w: gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'sampling'].iloc[0],
+    binary=lambda w: 'T' if not pd.isna(gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'sampling'].iloc[0]) else 'F',
+    testing=config["testing"]
+  shell:
+    "export OPENBLAS_NUM_THREADS=1; \
+    Rscript ../Scripts/pgs_methods/lassosum2.R \
+      --ref_plink_chr {refdir}/ref.chr \
+      --ldpred2_ref_dir {ldpred2_ldref}/{params.population} \
+      --ref_pcs {outdir}/reference/pc_score_files/TRANS/ref-TRANS-pcs.profiles \
+      --sumstats {outdir}/reference/gwas_sumstat/{wildcards.gwas}/{wildcards.gwas}-cleaned.gz \
+      --n_cores {threads} \
+      --output {outdir}/reference/pgs_score_files/lassosum2/{wildcards.gwas}/ref-{wildcards.gwas} \
+      --pop_data {refdir}/ref.pop.txt \
+      --binary {params.binary} \
+      --sample_prev {params.sampling} \
+      --test {params.testing} > {log} 2>&1"
+
+rule prep_pgs_lassosum2:
+  input: expand(f"{outdir}/reference/pgs_score_files/lassosum2/{{gwas}}/ref-{{gwas}}.score.gz", gwas=gwas_list_df['name'])
+
 ##
 # LDAK MegaPRS
 ##
@@ -366,7 +411,8 @@ rule prep_pgs_megaprs_i:
     "../envs/analysis.yaml"
   params:
     population= lambda w: gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'population'].iloc[0],
-    testing=config["testing"]
+    testing=config["testing"],
+    pseudo_only_flag = lambda wildcards: "T" if config.get("megaprs_pseudo_only", False) else "F"
   shell:
     "Rscript ../Scripts/pgs_methods/megaprs.R \
       --ref_plink_chr {refdir}/ref.chr \
@@ -380,6 +426,7 @@ rule prep_pgs_megaprs_i:
       --n_cores {threads} \
       --output {outdir}/reference/pgs_score_files/megaprs/{wildcards.gwas}/ref-{wildcards.gwas} \
       --pop_data {refdir}/ref.pop.txt \
+      --pseudo_only {params.pseudo_only_flag} \
       --test {params.testing} > {log} 2>&1"
 
 rule prep_pgs_megaprs:
@@ -489,7 +536,7 @@ rule prep_pgs_sbayesrc_i:
   threads: config['cores_prep_pgs']
   input:
     f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz",
-    lambda w: f"{sbayesrc_ldref}/{gwas_list_df.loc[gwas_list_df['name'] == w.gwas, 'population'].iloc[0]}/ldm.info",
+    lambda w: f"{sbayesrc_ldref}/{gwas_list_df.loc[gwas_list_df['name'] == w.gwas, 'population'].iloc[0]}/block148.eigen.bin",
     rules.download_gctb252_software.output,
     rules.download_sbayesrc_annot.output,
     rules.install_genoutils_sbayesrc.output,
@@ -626,6 +673,7 @@ rule leopard_quickprs_i:
     lambda w: expand(f"{quickprs_ldref}/{{population}}/{{population}}.cors.bin", population=[pop for pop in get_populations(w.gwas_group)]),
     lambda w: expand(f"{quickprs_multi_ldref}/{{population}}/{{population}}.subset_1.bed", population=[pop for pop in get_populations(w.gwas_group)]),
     lambda w: expand(f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz", gwas=get_gwas_names(w.gwas_group)),
+    rules.download_xwing_software.output,
     rules.download_ldak_highld.output,
     rules.download_ldak5_2.output,
     rules.download_ldak_map.output,
@@ -666,7 +714,7 @@ rule leopard_quickprs:
 ####
 
 # Define the single_source methods that can be applied to non-EUR data
-single_source_methods = {"ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "megaprs", "quickprs"}
+single_source_methods = {"ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs"}
 
 # Find which single source methods have been requested
 requested_single_source_methods = list(single_source_methods.intersection(pgs_methods_all))
@@ -961,6 +1009,8 @@ if 'lassosum' in pgs_methods_all:
   pgs_methods_input.append(rules.prep_pgs_lassosum.input)
 if 'ldpred2' in pgs_methods_all:
   pgs_methods_input.append(rules.prep_pgs_ldpred2.input)
+if 'lassosum2' in pgs_methods_all:
+  pgs_methods_input.append(rules.prep_pgs_lassosum2.input)
 if 'megaprs' in pgs_methods_all:
   pgs_methods_input.append(rules.prep_pgs_megaprs.input)
 if 'quickprs' in pgs_methods_all:
diff --git a/pipeline/rules/target_scoring.smk b/pipeline/rules/target_scoring.smk
index a27281ff..f237ff46 100644
--- a/pipeline/rules/target_scoring.smk
+++ b/pipeline/rules/target_scoring.smk
@@ -1,5 +1,16 @@
-def ancestry_munge(x, scaling='continuous'):
-    # Ensure scaling is a list
+def ancestry_munge(x, scaling=['continuous'], target_populations=None):
+    """
+    Process ancestry report and return list of populations based on scaling type and target population filter.
+
+    Parameters:
+    - x (str): Name of the ancestry checkpoint
+    - scaling (list): ['discrete', 'continuous'] specifying how to scale ancestry effects
+    - target_populations (list or None): Optional list of population codes to include (e.g., ['EUR', 'EAS'])
+
+    Returns:
+    - list: A list of populations to be used
+    """
+
     if not isinstance(scaling, list):
         raise ValueError("The scaling parameter must be a list (e.g., ['discrete', 'continuous']).")
 
@@ -9,23 +20,28 @@ def ancestry_munge(x, scaling='continuous'):
 
     # Read ancestry report
     ancestry_report_df = pd.read_table(checkpoint_output, sep=' ')
-
+    available_populations = ancestry_report_df['population'].tolist()
+    
     # Extract population list
     population_list = ancestry_report_df['population'].tolist()
 
+    # Filter based on target_populations if specified
+    if target_populations not in [None, "NA"]:
+        missing = [pop for pop in target_populations if pop not in available_populations]
+        if missing:
+            print(f"[ancestry_munge] The following target populations were not found in ancestry report: {missing}")
+        population_list = [pop for pop in target_populations if pop in available_populations]
+    else:
+        population_list = available_populations
+
     # Handle scaling logic
     if 'continuous' in scaling and 'discrete' not in scaling:
-        # Only continuous scaling
         return ['TRANS']
     elif 'continuous' in scaling and 'discrete' in scaling:
-        # Both continuous and discrete scaling
-        population_list.append('TRANS')
-        return population_list
+        return population_list + ['TRANS']
     elif 'discrete' in scaling and 'continuous' not in scaling:
-        # Only discrete scaling
         return population_list
     else:
-        # Raise an error for invalid scaling input
         raise ValueError("Invalid value for scaling. Must include 'continuous', 'discrete', or both.")
 
 # Define which pgs_methods are can be applied to any GWAS population
@@ -106,7 +122,7 @@ rule target_pgs_i:
 
 rule target_pgs_all:
   input:
-    lambda w: expand(f"{outdir}/reference/target_checks/{{name}}/target_pgs-{{population}}.done", name=w.name, population = ancestry_munge(w.name, scaling = config["pgs_scaling"]))
+    lambda w: expand(f"{outdir}/reference/target_checks/{{name}}/target_pgs-{{population}}.done", name=w.name, population = ancestry_munge(w.name, scaling = config["pgs_scaling"], target_populations = config['target_populations']))
   output:
     touch(f"{outdir}/reference/target_checks/{{name}}/target_pgs.done")