### This appendix gives the code used for bioinformatic analyses associated with Bull et al 2023 "Different rearing environments result in minor, but detectable, differences in selection at later life history stages in hatchery and wild coho salmon (Oncorhynchus kisutch) sharing a common environment."

### Section 1 - Quality control, trimming and mapping of reads for each of the three read sets. Run for each sample for each read set (read set = sequencing run).

### Section 1.1 - Quality control

	## Load required modules
	module purge
	module load fastqc/0.11.8

	## Copy data to scratch (480GB SSD per node) and navigate there
	cp $inputdir/*_R1.fastq.gz $SLURM_TMPDIR
	cp $inputdir/*_R2.fastq.gz $SLURM_TMPDIR

	cd $SLURM_TMPDIR

	## Run fastqc for each set of reads

	mkdir 1_FastQC 

	for i in *.fastq.gz;do
		fastqc $i -o $SLURM_TMPDIR/1_FastQC/
	done
	
	## Run multiQC to summarize FastQC output
	cd 1_FastQC
	
	source $multiqc_virtualenv # <- virtual environment with multiqc and umi_tools installed
	
	multiqc .
	multiqc *R1* -n multiqc_report_R1
	multiqc *R2* -n multiqc_report_R2
	
	deactivate
	
	## Copy analyses back to storage
	
	cp -r . $inputdir/
	
### Section 1.2 - UMI extraction

	## Load required modules	
	module purge
	module load python

	## Load virtual environment

	source $multiqc_virtualenv # <- virtual environment with multiqc and umi_tools installed

	## Perform UMI_TOOLS extraction

	cd $SLURM_TMPDIR
	cp $inputdir/ids* $SLURM_TMPDIR

	n=1

	while IFS= read -r line;do
	
		# Extract sample name
		j=$(echo $line | cut -d '_' -f1)
		k=$(echo $j | cut -d '.' -f5)

		echo "starting sample $n id $j"

		# Define input files
		read1=$j\_R1.fastq.gz
		read2=$j\_R2.fastq.gz
		umis_full=$(ls $inputdir/UMIs.2/*$k*)
		umis_short=$(echo $umis_full | cut -d '/' -f11)

		cp $inputdir/*$j* $SLURM_TMPDIR
		cp $umis_full $SLURM_TMPDIR

		# Process read 1
		umi_tools extract -I $umis_short --bc-pattern=NNNNNN --read2-in=$read1 --stdout=out.out.gz --read2-out=${read1%.*.*}.UMI.fastq.gz
		cp ${read1%.*.*}.UMI.fastq.gz $inputdir

		# Process read 2
		umi_tools extract -I $umis_short --bc-pattern=NNNNNN --read2-in=$read2 --stdout=out.out.gz --read2-out=${read2%.*.*}.UMI.fastq.gz
		cp ${read2%.*.*}.UMI.fastq.gz $inputdir

		# Clean up tmpdir for next sample and advance counter
		rm *$k*
		rm out.out.gz
	
		n=$(($n+1))

	done < ids01 # <- plain text file with names of read 1s files to process.

	deactivate
	
### Section 1.3 - Dediversification and trimming

	## Load required modules

	module purge
	module load nixpkgs/16.09
	module load python/2.7.14

	## Do the trimming
	mkdir $inputdir/TrimGaloreReports
	cd $SLURM_TMPDIR

	## Extract sample name and define read 1 and read 2 files
	
	$j=SAMPLENAME
	read1=$j\_R1.UMI.fastq.gz
	read2=$j\_R2.UMI.fastq.gz

	## Copy relevant sample to local node

	cp $inputdir/*$j*UMI* $SLURM_TMPDIR

	### Initial trimming

	#this filters based on 35bp minimum length and quality of phred 20

	echo "### Step 1 - initial trim - ID $j ###"
	source $cutadapt_virtualenv # <- virtual environment with cutadapt installed
	perl $trimgalore --paired --length 35 --retain_unpaired --nextseq 20 -a AGATCGGAAGAGC -a2 AAATCAAAAAAAC $read1 $read2
	deactivate

	#renaming, 1U/2U ided as "group1" to seperate from unpaired reads generated later in this pipeline step
	mv $j\_R1.UMI_val_1.fq.gz $j\.UMI.trimgalore.1P.fastq.gz
	mv $j\_R2.UMI_val_2.fq.gz $j\.UMI.trimgalore.2P.fastq.gz
	mv $j\_R1.UMI_unpaired_1.fq.gz $j\.UMI.trimgalore.1U_group1.fastq.gz
	mv $j\_R2.UMI_unpaired_2.fq.gz $j\.UMI.trimgalore.2U_group1.fastq.gz

	### Trim off diversity bases (needs to come after trimming as will also be on 3' end of read when read-through occurs) - loses reads with Ns in CGG/TGG site but <1% in checked files

	## Reads still paired
	echo "### Step 2.1 - dediversify - paired reads - ID $j ###"
	python $diversitytrimscript -1 $j.UMI.trimgalore.1P.fastq.gz -2 $j.UMI.trimgalore.2P.fastq.gz # <- dediversification script provided by NuGen
	mv $j.UMI.trimgalore.1P.fastq_trimmed.fq.gz $j.UMI.trimgalore.dediversified.1P.fastq.gz
	mv $j.UMI.trimgalore.2P.fastq_trimmed.fq.gz $j.UMI.trimgalore.dediversified.2P.fastq.gz

	echo "### Step 2.2 - dediversify - unpaired foreward reads - ID $j ###"
	## Forward only reads
	python $diversitytrimscript -1 $j.UMI.trimgalore.1U_group1.fastq.gz
	mv $j.UMI.trimgalore.1U_group1.fastq_trimmed.fq.gz $j.UMI.trimgalore.dediversified.1U_group1.fastq.gz

	## Reverse only reads - need to dummy forward reads to get the script to handle these
	echo "### Step 2.3 - dediversify - unpaired reverse reads - ID $j ###"
	cp $j.UMI.trimgalore.2U_group1.fastq.gz $j.UMI.trimgalore.2U_group1.2.fastq.gz
	gunzip $j.UMI.trimgalore.2U_group1.2.fastq.gz
	grep @ $j.UMI.trimgalore.2U_group1.2.fastq > temp01 ### "temp##" NEEDS TO BE UNIQUE PER SCRIPT FOR THIS TO WORK ###
	rm $j.UMI.trimgalore.2U_group1.2.fastq
	while IFS= read -r line; do echo $line >> $j.UMI.trimgalore.2U_group1.dummy.fastq; echo "CGGTTTTTTTTTTTTTTT" >> $j.UMI.trimgalore.2U_group1.dummy.fastq; echo "+" >> $j.UMI.trimgalore.2U_group1.dummy.fastq; echo "HHHHHHHHHHHHHHHHHH" >> $j.UMI.trimgalore.2U_group1.dummy.fastq; done < temp01
	gzip $j.UMI.trimgalore.2U_group1.dummy.fastq
	python $diversitytrimscript -1 $j.UMI.trimgalore.2U_group1.dummy.fastq.gz -2 $j.UMI.trimgalore.2U_group1.fastq.gz
	mv $j.UMI.trimgalore.2U_group1.fastq_trimmed.fq.gz $j.UMI.trimgalore.dediversified.2U_group1.fastq.gz
	rm *dummy*

	### Secondary trimming (accounts for bases lost to dediversification), paired reads and unpaired reads, and hard trims 3 bases off the end of each read

	echo "### Step 3.1 - secondary trim - paired reads - ID $j ###"
	source $cutadapt_virtualenv
	perl $trimgalore --paired --length 35 --nextseq 20 --retain_unpaired --three_prime_clip_R1 3 --three_prime_clip_R2 3 $j.UMI.trimgalore.dediversified.1P.fastq.gz $j.UMI.trimgalore.dediversified.2P.fastq.gz

	#renaming, 1U/2U ided as "group2" to seperate from unpaired reads generated earlier in this pipeline step
	mv $j.UMI.trimgalore.dediversified.1P_val_1.fq.gz $j.UMI.trimgalore2.dediversified.1P.fastq.gz
	mv $j.UMI.trimgalore.dediversified.2P_val_2.fq.gz $j.UMI.trimgalore2.dediversified.2P.fastq.gz
	mv $j.UMI.trimgalore.dediversified.1P_unpaired_1.fq.gz $j.UMI.trimgalore2.dediversified.1U_group2.fastq.gz
	mv $j.UMI.trimgalore.dediversified.2P_unpaired_2.fq.gz $j.UMI.trimgalore2.dediversified.2U_group2.fastq.gz

	echo "### Step 3.2 - secondary trim - unpaired foreward reads - ID $j ###"
	perl $trimgalore --length 35 --nextseq 20 --three_prime_clip_R1 3 --three_prime_clip_R2 3 $j.UMI.trimgalore.dediversified.1U_group1.fastq.gz
	mv $j.UMI.trimgalore.dediversified.1U_group1_trimmed.fq.gz $j.UMI.trimgalore2.dediversified.1U_group1.fastq.gz

	echo "### Step 3.2 - secondary trim - unpaired foreward reads - ID $j ###"
	perl $trimgalore --length 35 --nextseq 20 --three_prime_clip_R1 3 --three_prime_clip_R2 3 $j.UMI.trimgalore.dediversified.2U_group1.fastq.gz
	mv $j.UMI.trimgalore.dediversified.2U_group1_trimmed.fq.gz $j.UMI.trimgalore2.dediversified.2U_group1.fastq.gz

	deactivate

	### Copy output back

	echo "### Copying output back to cluster - ID $j ###"

	cp $j.UMI.trimgalore2.dediversified.1P.fastq.gz $inputdir
	cp $j.UMI.trimgalore2.dediversified.2P.fastq.gz $inputdir
	cp $j.UMI.trimgalore2.dediversified.1U_group2.fastq.gz $inputdir
	cp $j.UMI.trimgalore2.dediversified.2U_group2.fastq.gz $inputdir
	cp $j.UMI.trimgalore2.dediversified.1U_group1.fastq.gz $inputdir
	cp $j.UMI.trimgalore2.dediversified.2U_group1.fastq.gz $inputdir

	cp *$j*trimming_report* $inputdir/TrimGaloreReports

	## Clean up scratch

	cd $SLURM_TMPDIR
	rm -r *
	
### Section 1.4 - Quality control post trimming

	## Load required modules
	
	module purge
	module load fastqc/0.11.8

	## Copy data to scratch (480GB SSD per node) and navigate there
	cp $inputdir/*UMI.trimgalore2.dediversified.1P.fastq.gz $SLURM_TMPDIR
	cp $inputdir/*UMI.trimgalore2.dediversified.2P.fastq.gz $SLURM_TMPDIR

	cd $SLURM_TMPDIR

	## Run fastqc for each set of reads

	mkdir 1_FastQC 

	for i in *.fastq.gz;do
		fastqc $i -o $SLURM_TMPDIR/1_FastQC/
	done
	
	## Run multiQC to summarize FastQC output
	cd 1_FastQC
	
	source $multiqc_virtualenv # <- virtual environment with multiqc and umi_tools installed
	
	multiqc .
	multiqc *P1* -n multiqc_report_P1
	multiqc *P2* -n multiqc_report_P2
	
	deactivate
	
	## Copy analyses back to storage
	
	cp -r . $inputdir/
	
### Section 1.5 - Align reads to reference genome

	j=???? # <- define which sample to run this for
	ref_coho_dir=???? # <- define location of reference genome
	
	## Load required modules

	module purge

	module load nixpkgs/16.09
	module load gcc/7.3.0
	module load bismark/0.22.1

	module load bowtie2
	module load samtools/1.9

	## Copy data to local node (480GB SSD per node)

	cp $inputdir/$j.UMI.trimgalore2.dediversified.1P.fastq.gz $SLURM_TMPDIR
	cp $inputdir/$j.UMI.trimgalore2.dediversified.2P.fastq.gz $SLURM_TMPDIR

	## Loop over sample to perform alignment
	
	cd $SLURM_TMPDIR
	
	echo "starting sample $j"
	
	read1P=$j.UMI.trimgalore2.dediversified.1P.fastq.gz
	read2P=$j.UMI.trimgalore2.dediversified.2P.fastq.gz
	
	## Do alignment of paired reads, in all cases output bam files are sorted to allow merging. Rename and copy back to work directory

	perl $EBROOTBISMARK/bismark --genome $ref_coho_dir -1 $read1P -2 $read2P

	samtools sort $j.UMI.trimgalore2.dediversified.1P_bismark_bt2_pe.bam -o $j.UMI.trimgalore2.dediversified.1P_bismark_bt2_pe.sorted.bam

	mv $j.UMI.trimgalore2.dediversified.1P_bismark_bt2_pe.sorted.bam $j.UMI.trimgalore2.dediversified.PE.bismark.bam
	mv $j.UMI.trimgalore2.dediversified.1P_bismark_bt2_PE_report.txt $j.UMI.trimgalore2.dediversified.PE.bismark_report.txt
	
	cp $j.UMI.trimgalore2.dediversified.PE.bismark.bam $inputdir
	cp $j.UMI.trimgalore2.dediversified.PE.bismark_report.txt $inputdir/5_AlignmentReports
	
	## Clean up scratch
	
	cd $SLURM_TMPDIR
	rm -r *
	
### Section 2 - Combine reads across sequencing runs for each sample, deduplication, and score SNPS.

### Section 2.1 - Combining reads for each sample. Run once. Loops over all samples

	inputdirHS=???? # <- directory containing processed reads from sequencing run 1
	inputdirNS2020=???? # <- directory containing processed reads from sequencing run 2
	inputdirNS2021=???? # <- directory containing processed reads from sequencing run 3
	inputdirALL3=???? # <- directory to write out combined reads

	## Set up environment

	module purge
	module load python

	module load samtools

	## Loop over samples and run code

	n=1

	cd $SLURM_TMPDIR
	
	for i in $inputdirHS/*.UMI.trimgalore2.dediversified.PE.bismark.bam; do
	
		j=$(echo $i | cut -d '/' -f9)
		k=${j%.*.*.*.*.*.*}
		l=$(echo $k | cut -d '.' -f5)
	
		echo "starting sample $n id $k"
	
		cp $inputdirHS/*.$l.UMI.trimgalore2.dediversified.PE.bismark.bam $SLURM_TMPDIR/HS.bam
		cp $inputdirNS2020/*.$l.UMI.trimgalore2.dediversified.PE.bismark.bam $SLURM_TMPDIR/NS2020.bam
		cp $inputdirNS2021/*.$l.UMI.trimgalore2.dediversified.PE.bismark.bam $SLURM_TMPDIR/NS2021.bam
	
		date
		samtools merge -fr ALL3.bam HS.bam NS2020.bam NS2021.bam
		samtools sort -o $l.ALL3.sorted.bam ALL3.bam
		date
	
		cp $l.ALL3.sorted.bam $inputdirALL3
	
		rm $l.ALL3.sorted.bam
		rm ALL3.bam
		rm HS.bam
		rm NS2020.bam
		rm NS2021.bam

		n=$(($n+1))

	done

	## Clean up scratch

	cd $SLURM_TMPDIR
	rm -r *
	
### Section 2.2 - Deduplicate reads using UMIs. Run for each sample

	j=???? # <- bam file from last step
	k=???? # <- short sample ID
	
	## Set input and output directories 

	inputdirALL3=???? # <- directory with combined read bam files
	
	## Set up environment

	module purge
	module load python

	module load samtools
	module load bedtools

	## Set up to run code

	cd $SLURM_TMPDIR
	cp $inputdirALL3/$j $SLURM_TMPDIR

	source $multiqcdir/ENV/bin/activate # <- virtual environment with multiqc and umi_tools installed

	# Process paired reads. First convert to sam then use sed to remove barcode IDS from read names (interfers with umi_tools) and then convert back to bam (using samtools sort). Index bam and then run deduplicating

	samtools view -h -o $k.ALL3.sorted.sam $j
	sed -i 's/_.:N:0:........//g' $k.ALL3.sorted.sam
	samtools sort -o $j $k.ALL3.sorted.sam
	rm $k.ALL3.sorted.sam
	samtools index $j
	umi_tools dedup -I $j --paired -S $k.ALL3.deduped.bam --output-stats=$k.ALL3.deduped.stats
	samtools index $k.ALL3.deduped.bam

	# Calculate coverage (deduped)
	bedtools genomecov -ibam $k.ALL3.deduped.bam > $k.ALL3.deduped.depth
	grep -w genome $k.ALL3.deduped.depth > $k.ALL3.deduped.depth_genome

	# Copy output back
	cp $k.ALL3.deduped.bam $inputdirALL3
	cp $k.ALL3.deduped.bam.bai $inputdirALL3
	cp $k.ALL3.deduped.stats_* $inputdirALL3/2_All3_DedupReports+Depths
	cp $k.ALL3.deduped.depth $inputdirALL3/2_All3_DedupReports+Depths
	cp $k.ALL3.deduped.depth_genome $inputdirALL3/2_All3_DedupReports+Depths

	## Close virtual environment

	deactivate

	## Clean up scratch

	cd $SLURM_TMPDIR
	rm -r *
	
### Section 2.3 -  Call SNPs using bssnper2. Loops over samples

	inputdirALL3=???? # <- directory with deduplicated bam files
	bssnper2=???? # <- bssnper2 install location
	refnozip=???? # <- unzipped reference genome location

	## Load required modules

	module purge

	## Copy BAM files to scratch

	n=1
	
	cd $SLURM_TMPDIR
	
	for i in $inputdirALL3/*.ALL3.deduped.bam; do
	
		## Copy over files and define variablescd
		cp $i $SLURM_TMPDIR
		j=$(echo $i | cut -d '/' -f7)
		k=${j%.*.*.*}
	
		## Print counter and current sample name
		echo "starting sample $n id $k"
	
		## Run 
		$bssnper2 $j --sampleName=$k --ref $refnozip --vcf $k.bssnper2.vcf --homrefInVCF --minBaseQ=15 --minMapQ=20 --minCoverage=6 --maxCoverage=200 --minHomFreq=0.85 --minHetFreq=0.10 --errorRate=0.02 --minAltCoverage=2

		## Copy output back to server
		cp $k.bssnper2.vcf $inputdirALL3/3_All3_bssnper2
		
		## Remove files before next sample
		rm -f *

		## Increase counter
		n=$(($n+1))
		
	done
	
### Section 2.4 - Combine per-sample VCF files into one VCF file.

	inputdirALL3=???? # <- directory with per-sample VCF files

	## Load required modules

	module purge

	module load nixpkgs/16.09
	module load intel/2018.3
	module load tabix

	## Copy per-sample vcf files to scratch

	cd $inputdirALL3/3_All3_bssnper2/

	## Run 

	for i in *.bssnper2.vcf;do
		bgzip $i # ~ 3 min per sample
		tabix $i.gz # ~ 0.5 min per sample
	done

	list=$(ls *.vcf.gz | sed -z 's/\n/ /g')
	
	module --force purge
	module load StdEnv/2020
	module load vcftools/0.1.16
	
	vcf-merge $list > Coho.ALL.bssnper2.raw.vcf
	
	## Clean up scratch

	cd $SLURM_TMPDIR
	rm -r *

### Section 3 - Filter SNPs and individuals for quality. NOT run as a job, but rather directly on the console as this code runs quickly.

	module load vcftools/0.1.16

	#remove sites scored in less than 50 % of individuals, need to be biallelic
	vcftools --vcf Coho.ALL.bssnper2.raw.vcf --out Coho.ALL.bssnper2.filt1 --max-missing 0.5 --minDP 10 --maxDP 250 --min-alleles 2 --max-alleles 2  --recode 
	#1515453 SNPs retained
	
	#calculate missingness for retained SNPs, and remove those individuals with scores for < 50 % of sites
	vcftools --vcf Coho.ALL.bssnper2.filt1.recode.vcf --out Coho.ALL.bssnper2.filt1 --missing-indv
	# check output file manually in excel
	# REMOVE 5 INDIVIDUALS - G3854, G3856, G3862, G3868, G3871. NEED TO MANUALLY LIST
	vcftools --vcf Coho.ALL.bssnper2.filt1.recode.vcf --out Coho.ALL.bssnper2.filt2 --remove-indv G3854 --remove-indv G3856 --remove-indv G3862 --remove-indv G3868 --remove-indv G3871 --recode

	#remove sites scored in less than 75 % of individuals, with maf < 0.05, thin to max one SNP per 400 bp.
	vcftools --vcf Coho.ALL.bssnper2.filt2.recode.vcf --out Coho.ALL.bssnper2.filt3 --max-missing 0.75 --maf 0.05 --thin 400 --recode
	
	#Remove sites with het 0.65 or greater (using -missing-site function in vcftools to count number of scored individuals)
	#Count number of hets per SNP. file.vcf here is the filt3 file (no hwe, no nosibs)
	paste <(awk -F"\t" 'BEGIN {print "CHR\tPOS\tREF\tALT"} !/^#/ {print $1"\t"$2"\t"$4"\t"$5}' Coho.ALL.bssnper2.filt3.recode.vcf) \
	<(awk 'BEGIN {print "nHet"} !/^#/ {print gsub(/0\|1|1\|0|0\/1|1\/0|0\|2|2\|0|0\/2|2\/0|0\|3|3\|0|0\/3|3\/0|0\|4|4\|0|0\/4|4\/0/, "")}' Coho.ALL.bssnper2.filt3.recode.vcf) > Coho.ALL.bssnper2.filt3.recode.hetcount
	# 475 SNPs to be removed
	vcftools --vcf Coho.ALL.bssnper2.filt3.recode.vcf --out Coho.ALL.bssnper2.filt3.nofixed --exclude-positions SNPS_het-over-065 --recode

	#LD trimming in bcftools (prune function). Calculate LD first for plotting
	vcftools --vcf Coho.ALL.bssnper2.filt3.nofixed.recode.vcf --out --vcf Coho.ALL.bssnper2.filt3.nofixed --geno-r2 --ld-window-bp 500000
	bcftools +prune -m 0.2 -w 500000 Coho.ALL.bssnper2.filt3.nofixed.recode.vcf -Ov -o Coho.ALL.bssnper2.filt3.nofixed.LD.vcf

	#Check if individuals need filtering, ie < 75% of SNPs scored
	vcftools --vcf Coho.ALL.bssnper2.filt3.nofixed.LD.vcf --out Coho.ALL.bssnper2.filt3.nofixed.LD --missing-indv
	#No individuals need to be removed

	#ID siblings and trim to one individual per family at random
	vcftools --vcf Coho.ALL.bssnper2.filt3.nofixed.LD.vcf --out Coho.ALL.bssnper2.filt3.nofixed.LD --relatedness
	#Reduced set of 1 member per family, remove 28 individuals
	vcftools --vcf Coho.ALL.bssnper2.filt3.nofixed.LD.vcf --out Coho.ALL.bssnper2.filt4.nosibs --remove-indv F163 --remove-indv F35 --remove-indv F41 --remove-indv F45 --remove-indv F48 --remove-indv F50 --remove-indv G3848 --remove-indv G3849 --remove-indv G3850 --remove-indv G3851 --remove-indv G3858 --remove-indv G3859 --remove-indv G3863 --remove-indv G3865 --remove-indv H656 --remove-indv H657 --remove-indv H659 --remove-indv H663 --remove-indv H664 --remove-indv H666 --remove-indv H667 --remove-indv H670 --remove-indv H675 --remove-indv H677 --remove-indv M161 --remove-indv M162 --remove-indv M169 --remove-indv M77 --recode

	### 61 individuals, 23643 SNPs

	#HWE version, because why not. 20318 SNPs retained
	vcftools --vcf Coho.ALL.bssnper2.filt4.nosibs.recode.vcf --out Coho.ALL.bssnper2.filt4.nosibs.HWE --hwe 0.05 --recode
	
### Section 4 - Downstream analyses.

### Section 4.1 - BayPass. XTX model

	inputdir=???? # <- directory with BayPass files produced using PGDSpider2 and geste2baypass.py script. Produce one for each contrast of interest.

	## Load required modules

	module purge

	module load baypass/2.2
	
	## Copy BAYPASS file to scratch  - NEEDS BAYSCAN FILE CREATED AND THEN CONVERTED USING SCRIPT

	cp $inputdir/*BAYPASS* $SLURM_TMPDIR

	cd $SLURM_TMPDIR

	## Run

	n=1

	for i in *BAYPASS*;do

		echo "starting combination $n of 8, file $i"
	
		i_baypass -gfile $i -outprefix out_$i
	
		cp out_$i* $inputdir

		n=$(($n+1))
	
	done

	## Clean up scratch
	
	cd $SLURM_TMPDIR
	rm -r *
	
### Section 4.2 - BayPass. BF/contrast model

	inputdir=???? # <- directory with BayPass files produced using PGDSpider2 and geste2baypass.py script. Produce one for each contrast of interest.

	## Load required modules

	module purge

	module load baypass/2.2

	## Copy BAYESCAN file to scratch  - NEEDS BAYESCAN FILE CREATED FROM VCF USING PGDSpider with population definition file

	cp $inputdir/Coho.2POP.covariate $SLURM_TMPDIR # Defines the two groups being compares as "0" and "1" for the contrast analyses
	cp $inputdir/*BAYPASS* $SLURM_TMPDIR

	cd $SLURM_TMPDIR

	## Run
	
	n=1
	
	for i in *BAYPASS*;do

		echo "starting combination $n of 8, file $i"
	
		i_baypass -gfile $i -efile Coho.2POP.covariate -outprefix out_covariate_$i
	
		cp out_covariate_$i* $inputdir

		n=$(($n+1))
	
	done

	## Clean up scratch

	cd $SLURM_TMPDIR
	rm -r *
	
### Section 4.3 - Structure. Run for K values 1-10 with n = 1-10 reps each.

	k=1
	n=1

	## Set input and output directories 

	inputdir=???? # <- location of filtered dataset. created from combined VCF using PGDSpider2
	
	## Load required modules

	module load  StdEnv/2020  gcc/9.3.0

	module load structure/2.3.4

	## Copy STRUCTURE file to scratch  - NEEDS STRUCTURE FILE CREATED FROM VCF USING PGDSpider with population definition file

	cp $inputdir/project_data_nosibs $SLURM_TMPDIR
	cp $inputdir/mainparams $SLURM_TMPDIR
	cp $inputdir/extraparams $SLURM_TMPDIR

	cd $SLURM_TMPDIR

	dos2unix * #needed for STRUCTURE to read the input file

	## Run

	structure -i project_data_nosibs -o Coho.ALL.bssnper.filt4.recode.STRUCTURE_k$k\n$n -K $k

	## Copy files back to normal storage

	cp *_f $inputdir

	## Clean up scratch
	
	cd $SLURM_TMPDIR
	rm -r *

### Section 4.4 - PCA in adegenet. R CODE

	#adegenet PCA

	#clear workspace
	rm(list=ls())
	
	#set working directory
	setwd("?????")

	#install required packages - run first time only
	#install.packages(c("adegenet","vcfR"))
	
	#load required packages
	library("adegenet")
	library("vcfR")
	library("ggplot2")
	library("cowplot")

	#import vcf file and convert to genlight format
	vcf<-read.vcfR("Coho.ALL.bssnper2.filt4.nosibs.recode.vcf")
	vcf_genlight<-vcfR2genlight(vcf)
	
	#convert to tabulated data for pca
	vcf_tab <- tab(vcf_genlight, freq = TRUE, NA.method = "mean")
	
	#perform PCA and plot
	vcf_pca <- dudi.pca(vcf_tab, scale = FALSE, scannf = FALSE, nf = 3)
	barplot(vcf_pca$eig[1:50],main = "PCA eigenvalues", col = heat.colors(50)) #barplot of eigenvalues
	
	memberships<-read.table("GroupMemberships_bssnper2.csv",header=TRUE,sep=",",fileEncoding="UTF-8-BOM")
	Group<-memberships$GROUP
	
	plot1<-qplot(x=vcf_pca$li[,1],y=vcf_pca$li[,2],geom="point",color=Group,xlab="PC 1 (2.44 %)",ylab="PC 2 (2.40 %)",size=2)+theme_classic()+theme(legend.position="none")+theme(axis.text=element_text(size=16),axis.title=element_text(size=20))

	plot2<-qplot(x=vcf_pca$li[,1],y=vcf_pca$li[,3],geom="point",color=Group,xlab="PC 1 (2.44 %)",ylab="PC 3 (2.14 %)",size=2)+theme_classic()+theme(legend.position="none")+theme(axis.text=element_text(size=16),axis.title=element_text(size=20))

	plot_grid(plot1,plot2,labels=c("A","B"))
	ggsave2("combined_nolegend_PERCENTs.jpg",width=10,height=5)
	
### Section 4.5 - FSTs. R CODE

	#clear workspace
	rm(list=ls())

	#set working directory
	setwd("????)")

	#load required package
	library("diveRsity")

	### run analyses, all SNPS ###

	#pairwise all 6 groups
	### DONE ###
	out<-diffCalc(infile="Coho.ALL.bssnper2.filt4.nosibs.recode.GENEPOP.6POPS.txt",fst=TRUE,pairwise=TRUE,boots=1000,bs_pairwise=TRUE,para=TRUE)
	write.csv(out$bs_pairwise$Fst,"bssnper2_diversity_FST_pairwise.csv")
	write.csv(out$pw_locus,"bssnper2_diversity_FST_pairwise_perSNP.csv")

	#global all 6 groups	
	### DONE ###
	out_global<-diffCalc(infile="Coho.ALL.bssnper2.filt4.nosibs.recode.GENEPOP.6POPS.txt",fst=TRUE,pairwise=FALSE,boots=1000,bs_locus=TRUE,para=TRUE)
	write.csv(out_global$global_bs,"bssnper2_diversity_FST_global.csv")
	write.csv(out_global$bs_locus,"bssnper2_diversity_FST_global_perSNP.csv")

### Section 4.6 - Randomization based p-values for "parallelism" tests. R CODE

	#clear workspace
	rm(list=ls())

	#set working directory
	setwd("?")

	#load required package
	library("diveRsity")
	library("adegenet")
	library("vcfR")

	vcf<-read.vcfR("Coho.ALL.bssnper2.filt4.nosibs.recode.BF100.HEADER.vcf") # VCF file containing the loci being tested
	vcf_genlight<-vcfR2genlight(vcf)
	vcf_tab <- tab(vcf_genlight, freq = TRUE,NA.method="asis")

	GROUPS<-read.table("../GroupMemberships_bssnper2_6POPS.txt",header=TRUE) # Text file defining group memberships

	cor_coefficients_ET<-numeric(length=1000) # E = enriched (semi-natural in the manuscript), T = traditional (conventional in the manuscript), W = wild)
	cor_coefficients_EW<-numeric(length=1000)
	cor_coefficients_TW<-numeric(length=1000)
	
	#loop to shuffle rows

	for(i in 1:1000){
		vcf_shuffled<-vcf_tab[sample(1:61),]

		vcf_shuffled_ER<-vcf_shuffled[1:12,] #12
		vcf_shuffled_TR<-vcf_shuffled[13:19,] #7
		vcf_shuffled_WR<-vcf_shuffled[20:25,] #WR #6
		vcf_shuffled_ES<-vcf_shuffled[26:35,] #ES #10
		vcf_shuffled_TS<-vcf_shuffled[36:44,] #TS #9
		vcf_shuffled_WS<-vcf_shuffled[45:61,] #WS #17

		difsE<-colMeans(vcf_shuffled_ER,na.rm=TRUE)-colMeans(vcf_shuffled_ES,na.rm=TRUE)
		difsT<-colMeans(vcf_shuffled_TR,na.rm=TRUE)-colMeans(vcf_shuffled_TS,na.rm=TRUE)
		difsW<-colMeans(vcf_shuffled_WR,na.rm=TRUE)-colMeans(vcf_shuffled_WS,na.rm=TRUE)

		cor_coefficients_ET[i]<-cor.test(difsE,difsT)$estimate
		cor_coefficients_EW[i]<-cor.test(difsE,difsW)$estimate
		cor_coefficients_TW[i]<-cor.test(difsT,difsW)$estimate
	}

	#actual values
	vcf_ER<-vcf_tab[GROUPS[,2]=="Enriched_Adult",]
	vcf_TR<-vcf_tab[GROUPS[,2]=="Trad_Adult",]
	vcf_WR<-vcf_tab[GROUPS[,2]=="Wild_Adult",]
	vcf_ES<-vcf_tab[GROUPS[,2]=="Enriched_Smolt",]
	vcf_TS<-vcf_tab[GROUPS[,2]=="Trad_Smolt",]
	vcf_WS<-vcf_tab[GROUPS[,2]=="Wild_Smolt",]

	difsE_actual<-colMeans(vcf_ER,na.rm=TRUE)-colMeans(vcf_ES,na.rm=TRUE)
	difsT_actual<-colMeans(vcf_TR,na.rm=TRUE)-colMeans(vcf_TS,na.rm=TRUE)
	difsW_actual<-colMeans(vcf_WR,na.rm=TRUE)-colMeans(vcf_WS,na.rm=TRUE)

	cor_coefficients_actual_ET<-cor.test(difsE_actual,difsT_actual)$estimate #0.51
	cor_coefficients_actual_EW<-cor.test(difsE_actual,difsW_actual)$estimate #0.49
	cor_coefficients_actual_TW<-cor.test(difsT_actual,difsW_actual)$estimate #0.44

	max(cor_coefficients_ET) # 0.34
	max(cor_coefficients_EW) # 0.36
	max(cor_coefficients_TW) # 0.45
	quantile(cor_coefficients_ET,0.95)
	quantile(cor_coefficients_EW,0.95)
	quantile(cor_coefficients_TW,0.95) # 0.0.192