Alignment and mapping workflow
1. Prepare your data
1.1 download files to from your service agent
(base) qianjianghu@QJ-Ubuntu:/media/qianjianghu/Udata/DataAnaDriver/RNA_seq/Mareike$ wget -r -np -nH -R *.html -c --user webreader --password m-sbs2+78=AN1sa http://xena.lechnerlab.de:3080/bertrams/NexusDNA_GSK/Bulk-RNA%20sequencing_03032023/
1.2 upload data to Pitt CRC server
# create folders in Pitt CRC server
[huqj@login0b ~]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
[huqj@login0b ML_mouse_PCLS]$
[huqj@login0b ML_mouse_PCLS]$ mkdir Counts Data Jobs Mapping QC
[huqj@login0b ML_mouse_PCLS]$ ls
Counts Data Jobs Mapping QC
# upload the data to Pitt CRC server
# from local Ubuntu terminal
(base) qianjianghu@QJ-Ubuntu:/media/qianjianghu/Udata/DataAnaDriver/RNA_seq/Mareike/Mareike_mouse_PCLS$ scp -r ./ huqj@htc.crc.pitt.edu:/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Data/raw/
1.3 check md5sum
[huqj@login0b raw]$ grep -f Mouse_PCLS.txt md5sums.txt | md5sum -c
./Sample_23L000907/23L000907_S5_L002_R2_001.fastq.gz: OK
./Sample_23L000907/23L000907_S5_L002_R1_001.fastq.gz: OK
./Sample_23L000907/23L000907_S5_L001_R1_001.fastq.gz: OK
./Sample_23L000907/23L000907_S5_L001_R2_001.fastq.gz: OK
./Sample_23L000914/23L000914_S12_L002_R1_001.fastq.gz: OK
./Sample_23L000914/23L000914_S12_L001_R2_001.fastq.gz: OK
./Sample_23L000905/23L000905_S3_L001_R1_001.fastq.gz: OK
./Sample_23L000914/23L000914_S12_L001_R1_001.fastq.gz: OK
./Sample_23L000913/23L000913_S11_L001_R2_001.fastq.gz: OK
./Sample_23L000905/23L000905_S3_L002_R1_001.fastq.gz: OK
./Sample_23L000905/23L000905_S3_L001_R2_001.fastq.gz: OK
./Sample_23L000905/23L000905_S3_L002_R2_001.fastq.gz: OK
./Sample_23L000914/23L000914_S12_L002_R2_001.fastq.gz: OK
./Sample_23L000913/23L000913_S11_L002_R2_001.fastq.gz: OK
./Sample_23L000909/23L000909_S7_L001_R1_001.fastq.gz: OK
./Sample_23L000913/23L000913_S11_L001_R1_001.fastq.gz: OK
./Sample_23L000909/23L000909_S7_L001_R2_001.fastq.gz: OK
./Sample_23L000913/23L000913_S11_L002_R1_001.fastq.gz: OK
./Sample_23L000909/23L000909_S7_L002_R2_001.fastq.gz: OK
./Sample_23L000909/23L000909_S7_L002_R1_001.fastq.gz: OK
./Sample_23L000920/23L000920_S18_L001_R2_001.fastq.gz: OK
./Sample_23L000920/23L000920_S18_L002_R1_001.fastq.gz: OK
./Sample_23L000920/23L000920_S18_L001_R1_001.fastq.gz: OK
./Sample_23L000920/23L000920_S18_L002_R2_001.fastq.gz: OK
./Sample_23L000903/23L000903_S1_L001_R1_001.fastq.gz: OK
./Sample_23L000903/23L000903_S1_L002_R1_001.fastq.gz: OK
./Sample_23L000903/23L000903_S1_L002_R2_001.fastq.gz: OK
./Sample_23L000904/23L000904_S2_L002_R2_001.fastq.gz: OK
./Sample_23L000911/23L000911_S9_L002_R1_001.fastq.gz: OK
./Sample_23L000910/23L000910_S8_L002_R1_001.fastq.gz: OK
./Sample_23L000903/23L000903_S1_L001_R2_001.fastq.gz: OK
./Sample_23L000904/23L000904_S2_L001_R2_001.fastq.gz: OK
./Sample_23L000910/23L000910_S8_L002_R2_001.fastq.gz: OK
./Sample_23L000911/23L000911_S9_L002_R2_001.fastq.gz: OK
./Sample_23L000911/23L000911_S9_L001_R1_001.fastq.gz: OK
./Sample_23L000904/23L000904_S2_L002_R1_001.fastq.gz: OK
./Sample_23L000904/23L000904_S2_L001_R1_001.fastq.gz: OK
./Sample_23L000910/23L000910_S8_L001_R2_001.fastq.gz: OK
./Sample_23L000910/23L000910_S8_L001_R1_001.fastq.gz: OK
./Sample_23L000911/23L000911_S9_L001_R2_001.fastq.gz: OK
./Sample_23L000908/23L000908_S6_L002_R1_001.fastq.gz: OK
./Sample_23L000919/23L000919_S17_L002_R2_001.fastq.gz: OK
./Sample_23L000919/23L000919_S17_L001_R1_001.fastq.gz: OK
./Sample_23L000908/23L000908_S6_L002_R2_001.fastq.gz: OK
./Sample_23L000908/23L000908_S6_L001_R1_001.fastq.gz: OK
./Sample_23L000908/23L000908_S6_L001_R2_001.fastq.gz: OK
./Sample_23L000912/23L000912_S10_L002_R1_001.fastq.gz: OK
./Sample_23L000919/23L000919_S17_L002_R1_001.fastq.gz: OK
./Sample_23L000912/23L000912_S10_L001_R2_001.fastq.gz: OK
./Sample_23L000906/23L000906_S4_L001_R1_001.fastq.gz: OK
./Sample_23L000906/23L000906_S4_L002_R2_001.fastq.gz: OK
./Sample_23L000906/23L000906_S4_L001_R2_001.fastq.gz: OK
./Sample_23L000906/23L000906_S4_L002_R1_001.fastq.gz: OK
./Sample_23L000919/23L000919_S17_L001_R2_001.fastq.gz: OK
./Sample_23L000912/23L000912_S10_L001_R1_001.fastq.gz: OK
./Sample_23L000912/23L000912_S10_L002_R2_001.fastq.gz: OK
2. Know more your samples
How the Library preparation was done, including library preparation kit, adapters, purification
sequencing platform, Sequencing length, depth, aimed reads,Paired-end or single-end sequencing reads, strand-specific or non-strand-specific
3. QC
3.1 Run FastQC
a. Move to FastQC folder under /Jobs and open fastqc.job using vim editor.
[huqj@login0b]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/FastQC
[huqj@login0b FastQC]$ vim fastqc.job
#!/bin/bash
#SBATCH -J fastqc
#SBATCH -c 12
#SBATCH -t 2:00:00
#SBATCH -o OUT/fastqc-%A_%a.out
#SBATCH --array=0-13 # job array index
#SBATCH --mail-type=END,FAIL
#SBATCH --mail-user=huqj@pitt.edu
###########
####### set-up fastqc
module load fastqc/0.11.7
set -x
################
project=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
names=($(cat $project/Data/raw/Mouse_PCLS.txt))
echo ${names[${SLURM_ARRAY_TASK_ID}]}
sample=${names[${SLURM_ARRAY_TASK_ID}]}
fastq=$project/Data/merged_fastq
out=$project/QC/FastQC/Raw
#################
mkdir -p $out
########
fastqc -o $out $fastq/${sample}_1.fastq.gz
fastqc -o $out $fastq/${sample}_2.fastq.gz
check report
[huqj@login0b OUT]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/FastQC/OUT
[huqj@login0b OUT]$ grep "Analysis complete for" *.out
fastqc-1938004_0.out:Analysis complete for 23L000903_1.fastq.gz
fastqc-1938004_0.out:Analysis complete for 23L000903_2.fastq.gz
fastqc-1938004_10.out:Analysis complete for 23L000913_1.fastq.gz
fastqc-1938004_10.out:Analysis complete for 23L000913_2.fastq.gz
fastqc-1938004_11.out:Analysis complete for 23L000914_1.fastq.gz
fastqc-1938004_11.out:Analysis complete for 23L000914_2.fastq.gz
fastqc-1938004_12.out:Analysis complete for 23L000919_1.fastq.gz
fastqc-1938004_12.out:Analysis complete for 23L000919_2.fastq.gz
fastqc-1938004_13.out:Analysis complete for 23L000920_1.fastq.gz
fastqc-1938004_13.out:Analysis complete for 23L000920_2.fastq.gz
fastqc-1938004_1.out:Analysis complete for 23L000904_1.fastq.gz
fastqc-1938004_1.out:Analysis complete for 23L000904_2.fastq.gz
fastqc-1938004_2.out:Analysis complete for 23L000905_1.fastq.gz
fastqc-1938004_2.out:Analysis complete for 23L000905_2.fastq.gz
fastqc-1938004_3.out:Analysis complete for 23L000906_1.fastq.gz
fastqc-1938004_3.out:Analysis complete for 23L000906_2.fastq.gz
fastqc-1938004_4.out:Analysis complete for 23L000907_1.fastq.gz
fastqc-1938004_4.out:Analysis complete for 23L000907_2.fastq.gz
fastqc-1938004_5.out:Analysis complete for 23L000908_1.fastq.gz
fastqc-1938004_5.out:Analysis complete for 23L000908_2.fastq.gz
fastqc-1938004_6.out:Analysis complete for 23L000909_1.fastq.gz
fastqc-1938004_6.out:Analysis complete for 23L000909_2.fastq.gz
fastqc-1938004_7.out:Analysis complete for 23L000910_1.fastq.gz
fastqc-1938004_7.out:Analysis complete for 23L000910_2.fastq.gz
fastqc-1938004_8.out:Analysis complete for 23L000911_1.fastq.gz
fastqc-1938004_8.out:Analysis complete for 23L000911_2.fastq.gz
fastqc-1938004_9.out:Analysis complete for 23L000912_1.fastq.gz
fastqc-1938004_9.out:Analysis complete for 23L000912_2.fastq.gz
3.2 Run MultiQC
3.2.1 Move to the FastQC resultes folder
[huqj@login0b OUT]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/
[huqj@login0b Raw]$ ls
23L000903_1_fastqc.html 23L000906_1_fastqc.html 23L000909_1_fastqc.html 23L000912_1_fastqc.html 23L000919_1_fastqc.html
23L000903_1_fastqc.zip 23L000906_1_fastqc.zip 23L000909_1_fastqc.zip 23L000912_1_fastqc.zip 23L000919_1_fastqc.zip
23L000903_2_fastqc.html 23L000906_2_fastqc.html 23L000909_2_fastqc.html 23L000912_2_fastqc.html 23L000919_2_fastqc.html
23L000903_2_fastqc.zip 23L000906_2_fastqc.zip 23L000909_2_fastqc.zip 23L000912_2_fastqc.zip 23L000919_2_fastqc.zip
23L000904_1_fastqc.html 23L000907_1_fastqc.html 23L000910_1_fastqc.html 23L000913_1_fastqc.html 23L000920_1_fastqc.html
23L000904_1_fastqc.zip 23L000907_1_fastqc.zip 23L000910_1_fastqc.zip 23L000913_1_fastqc.zip 23L000920_1_fastqc.zip
23L000904_2_fastqc.html 23L000907_2_fastqc.html 23L000910_2_fastqc.html 23L000913_2_fastqc.html 23L000920_2_fastqc.html
23L000904_2_fastqc.zip 23L000907_2_fastqc.zip 23L000910_2_fastqc.zip 23L000913_2_fastqc.zip 23L000920_2_fastqc.zip
23L000905_1_fastqc.html 23L000908_1_fastqc.html 23L000911_1_fastqc.html 23L000914_1_fastqc.html
23L000905_1_fastqc.zip 23L000908_1_fastqc.zip 23L000911_1_fastqc.zip 23L000914_1_fastqc.zip
23L000905_2_fastqc.html 23L000908_2_fastqc.html 23L000911_2_fastqc.html 23L000914_2_fastqc.html
23L000905_2_fastqc.zip 23L000908_2_fastqc.zip 23L000911_2_fastqc.zip 23L000914_2_fastqc.zip
3.2.2 load the MultiQC module on HTC
[huqj@login0b Raw]$ module spider multiqc
----------------------------------------------------------------------------
multiqc:
----------------------------------------------------------------------------
Description:
Aggregate results from bioinformatics analyses across many samples
into a single report.
Versions:
multiqc/1.7
multiqc/1.8
multiqc/1.10.1
multiqc/1.12
multiqc/1.13
----------------------------------------------------------------------------
For detailed information about a specific "multiqc" module (including how to load the modules) use the module's full name.
For example:
$ module spider multiqc/1.8
----------------------------------------------------------------------------
[huqj@login0b Raw]$ module load multiqc/1.12
[huqj@login0b Raw]$
3.2.3 Run multiqc on the above FastQX files to summarize the resultes.
[huqj@login0b Raw]$ multiqc *.zip
/// MultiQC 🔍 | v1.12
| multiqc | MultiQC Version v1.14 now available!
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000903_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000903_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000904_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000904_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000905_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000905_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000906_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000906_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000907_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000907_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000908_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000908_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000909_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000909_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000910_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000910_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000911_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000911_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000912_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000912_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000913_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000913_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000914_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000914_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000919_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000919_2_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000920_1_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Raw/23L000920_2_fastqc.zip
| searching | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 28/28
| fastqc | Found 28 reports
| multiqc | Compressing plot data
| multiqc | Report : multiqc_report.html
| multiqc | Data : multiqc_data
| multiqc | MultiQC complete
[huqj@login0b Raw]$ ^C
[huqj@login0b Raw]$ ls
23L000903_1_fastqc.html 23L000906_1_fastqc.html 23L000909_1_fastqc.html 23L000912_1_fastqc.html 23L000919_1_fastqc.html
23L000903_1_fastqc.zip 23L000906_1_fastqc.zip 23L000909_1_fastqc.zip 23L000912_1_fastqc.zip 23L000919_1_fastqc.zip
23L000903_2_fastqc.html 23L000906_2_fastqc.html 23L000909_2_fastqc.html 23L000912_2_fastqc.html 23L000919_2_fastqc.html
23L000903_2_fastqc.zip 23L000906_2_fastqc.zip 23L000909_2_fastqc.zip 23L000912_2_fastqc.zip 23L000919_2_fastqc.zip
23L000904_1_fastqc.html 23L000907_1_fastqc.html 23L000910_1_fastqc.html 23L000913_1_fastqc.html 23L000920_1_fastqc.html
23L000904_1_fastqc.zip 23L000907_1_fastqc.zip 23L000910_1_fastqc.zip 23L000913_1_fastqc.zip 23L000920_1_fastqc.zip
23L000904_2_fastqc.html 23L000907_2_fastqc.html 23L000910_2_fastqc.html 23L000913_2_fastqc.html 23L000920_2_fastqc.html
23L000904_2_fastqc.zip 23L000907_2_fastqc.zip 23L000910_2_fastqc.zip 23L000913_2_fastqc.zip 23L000920_2_fastqc.zip
23L000905_1_fastqc.html 23L000908_1_fastqc.html 23L000911_1_fastqc.html 23L000914_1_fastqc.html multiqc_data
23L000905_1_fastqc.zip 23L000908_1_fastqc.zip 23L000911_1_fastqc.zip 23L000914_1_fastqc.zip multiqc_report.html
23L000905_2_fastqc.html 23L000908_2_fastqc.html 23L000911_2_fastqc.html 23L000914_2_fastqc.html
23L000905_2_fastqc.zip 23L000908_2_fastqc.zip 23L000911_2_fastqc.zip 23L000914_2_fastqc.zip
4. Run Cutadapt
[huqj@login0b ~]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/Cutadapt
[huqj@login0b Cutadapt]$ ls
cutadapt.job OUT
[huqj@login0b Cutadapt]$ vim cutadapt.job
[huqj@login0b Cutadapt]$ sbatch cutadapt.job
#! /bin/bash
#SBATCH -N 1
#SBATCH -J cutadapt
#SBATCH -c 2
#SBATCH -t 2:00:00
#SBATCH -o OUT/cutadapt-%A_%a.out
#SBATCH --array=0-13 # job array index
#SBATCH --mail-type=ALL
#SBATCH --mail-user=huqj@pitt.edu
########################################
## Cutadapt set-up
module purge
module load cutadapt/2.10
set -x
#########################
project=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
names=($(cat $project/Data/raw/Mouse_PCLS.txt))
echo ${names[${SLURM_ARRAY_TASK_ID}]}
sample=${names[${SLURM_ARRAY_TASK_ID}]}
fastq=$project/Data/merged_fastq
out=$project/Data/Cutadapt
##########################
mkdir -p $out
cutadapt -m 50 -q 15 \
-a CTGTCTCTTATA \
-A CTGTCTCTTATA \
-u 5 \
-U 5 \
--pair-filter=any \
-o $out/${sample}_1.cutadapt.fastq.gz \
-p $out/${sample}_2.cutadapt.fastq.gz \
$fastq/${sample}_1.fastq.gz $fastq/${sample}_2.fastq.gz
check report
[huqj@login0b OUT]$ grep 'Total written (filtered)' *
cutadapt-1938038_0.out:Total written (filtered): 5,737,433,977 bp (92.6%)
cutadapt-1938038_10.out:Total written (filtered): 6,503,310,479 bp (91.7%)
cutadapt-1938038_11.out:Total written (filtered): 5,726,695,218 bp (91.6%)
cutadapt-1938038_12.out:Total written (filtered): 5,273,527,593 bp (92.5%)
cutadapt-1938038_13.out:Total written (filtered): 5,565,266,902 bp (92.6%)
cutadapt-1938038_1.out:Total written (filtered): 5,531,526,492 bp (91.8%)
cutadapt-1938038_2.out:Total written (filtered): 5,597,951,508 bp (92.3%)
cutadapt-1938038_3.out:Total written (filtered): 5,887,894,091 bp (91.9%)
cutadapt-1938038_4.out:Total written (filtered): 4,839,106,610 bp (92.6%)
cutadapt-1938038_5.out:Total written (filtered): 5,631,820,340 bp (92.7%)
cutadapt-1938038_6.out:Total written (filtered): 6,610,218,044 bp (92.7%)
cutadapt-1938038_7.out:Total written (filtered): 5,979,813,844 bp (92.8%)
cutadapt-1938038_8.out:Total written (filtered): 6,135,924,682 bp (92.5%)
cutadapt-1938038_9.out:Total written (filtered): 5,743,278,028 bp (92.7%)
5. QC after cutadapt
5.1 Run fastq_screen
5.1.1 edit fastq_screen.jon
[huqj@login0b fastq_screen]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/fastq_screen
[huqj@login0b fastq_screen]$ vim fastq_screen.job
#!/bin/bash
#
#SBATCH --job-name=fastq_screen
#SBATCH -N 1
#SBATCH --cpus-per-task=2 # Request that ncpus be allocated per process.
#SBATCH -t 1-00:00 # Runtime in D-HH:MM
#SBATCH -o OUT/fastq_screen-%A_%a.out
#SBATCH --mail-type=ALL
#SBATCH --mail-user=huqj@pitt.edu
module load bowtie2/2.4.5
module load fastq_screen/0.13.0
project=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
files=$project/Data/Cutadapt
outfile=$project/QC/fastq_screen
confile=$project/Jobs/fastq_screen/fastq_screen.conf
fastq_screen --conf $confile $files/*cutadapt.fastq.gz --outdir $outfile
5.1.2 edit fastq_screen.conf to add aligner and database
[huqj@login0b fastq_screen]$ vim fastq_screen.conf
# This is an example configuration file for FastQ Screen
############################
## Bowtie, Bowtie 2 or BWA #
############################
## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set
## this value to tell the program where to find your chosen aligner. Uncomment
## the relevant line below and set the appropriate location. Please note,
## this path should INCLUDE the executable filename.
#BOWTIE /usr/local/bin/bowtie/bowtie
BOWTIE2 /ihome/crc/install/bowtie2/bowtie2-2.4.5-linux-x86_64/bowtie2
#BWA /usr/local/bwa/bwa
############################################
## Bismark (for bisulfite sequencing only) #
############################################
## If the Bismark binary is not in your PATH then you can set this value to
## tell the program where to find it. Uncomment the line below and set the
## appropriate location. Please note, this path should INCLUDE the executable
## filename.
#BISMARK /usr/local/bin/bismark/bismark
############
## Threads #
############
## Genome aligners can be made to run across multiple CPU cores to speed up
## searches. Set this value to the number of cores you want for mapping reads.
THREADS 8
##############
## DATABASES #
##############
## This section enables you to configure multiple genomes databases (aligner index
## files) to search against in your screen. For each genome you need to provide a
## database name (which can't contain spaces) and the location of the aligner index
## files.
##
## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.)
## are found in a folder named 'GRCh37'.
##
## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in
## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices. The index
## used will be the one compatible with the chosen aligner (as specified using the
## --aligner flag).
##
## The entries shown below are only suggested examples, you can add as many DATABASE
## sections as required, and you can comment out or remove as many of the existing
## entries as desired. We suggest including genomes and sequences that may be sources
## of contamination either because they where run on your sequencer previously, or may
## have contaminated your sample during the library preparation step.
##
## Human - sequences available from
## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
DATABASE Human /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/fastq_screen_database/Human/GRCh38_noalt_as/GRCh38_noalt_as
##
## Mouse - sequence available from
## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
DATABASE Mouse /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/fastq_screen_database/Mouse/GRCm39/GRCm39
##
## Ecoli- sequence available from EMBL accession U00096.2
#DATABASE Ecoli /data/public/Genomes/Ecoli/Ecoli
##
## PhiX - sequence available from Refseq accession NC_001422.1
#DATABASE PhiX /data/public/Genomes/PhiX/phi_plus_SNPs
##
## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
#DATABASE Adapters /data/public/Genomes/Contaminants/Contaminants
##
## Vector - Sequence taken from the UniVec database
## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
#DATABASE Vectors /data/public/Genomes/Vectors/Vectors
5.1.3 submit job
[huqj@login0b fastq_screen]$ sbatch fastq_screen.job
5.1.4 download the report from HTC cluster to local
qianjianghu@Qianjiangs-MacBook-Pro: ~
$ scp -r huqj@htc.crc.pitt.edu:/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_human_PCLS/QC/fastq_screen/ ./Desktop/ML_PCLS
huqj@htc.crc.pitt.edu's password:
23L000915_1.cutadapt_screen.txt 100% 476 8.4KB/s 00:00
23L000918_2.cutadapt_screen.txt 100% 474 8.4KB/s 00:00
23L000922_1.cutadapt_screen.txt 100% 474 6.5KB/s 00:00
23L000919_1.cutadapt_screen.txt 100% 475 10.0KB/s 00:00
23L000922_1.cutadapt_screen.html 100% 3281KB 1.4MB/s 00:02
23L000915_2.cutadapt_screen.html 100% 3281KB 2.6MB/s 00:01
23L000919_2.cutadapt_screen.txt 100% 475 6.4KB/s 00:00
23L000917_2.cutadapt_screen.html 100% 3281KB 3.2MB/s 00:01
23L000919_1.cutadapt_screen.html 100% 3281KB 2.2MB/s 00:01
23L000915_2.cutadapt_screen.txt 100% 476 10.0KB/s 00:00
23L000920_1.cutadapt_screen.html 100% 3281KB 1.8MB/s 00:01
23L000918_1.cutadapt_screen.txt 100% 474 6.3KB/s 00:00
23L000922_2.cutadapt_screen.txt 100% 474 4.2KB/s 00:00
23L000921_1.cutadapt_screen.html 100% 3281KB 2.0MB/s 00:01
23L000918_1.cutadapt_screen.html 100% 3281KB 2.1MB/s 00:01
23L000916_2.cutadapt_screen.html 100% 3281KB 1.8MB/s 00:01
23L000917_1.cutadapt_screen.txt 100% 476 7.1KB/s 00:00
23L000920_2.cutadapt_screen.html 100% 3281KB 2.3MB/s 00:01
23L000920_1.cutadapt_screen.txt 100% 473 5.6KB/s 00:00
23L000921_2.cutadapt_screen.txt 100% 473 3.8KB/s 00:00
23L000919_2.cutadapt_screen.html 100% 3281KB 2.3MB/s 00:01
23L000917_1.cutadapt_screen.html 100% 3281KB 2.3MB/s 00:01
23L000916_2.cutadapt_screen.txt 100% 473 3.3KB/s 00:00
23L000916_1.cutadapt_screen.html 100% 3281KB 2.3MB/s 00:01
23L000918_2.cutadapt_screen.html 100% 3281KB 2.1MB/s 00:01
23L000921_2.cutadapt_screen.html 100% 3281KB 1.9MB/s 00:01
23L000921_1.cutadapt_screen.txt 100% 473 2.3KB/s 00:00
23L000916_1.cutadapt_screen.txt 100% 474 4.2KB/s 00:00
23L000917_2.cutadapt_screen.txt 100% 476 6.7KB/s 00:00
23L000920_2.cutadapt_screen.txt 100% 474 4.1KB/s 00:00
23L000915_1.cutadapt_screen.html 100% 3281KB 2.2MB/s 00:01
23L000922_2.cutadapt_screen.html 100% 3281KB 2.0MB/s 00:01
(base)
5.2 Run FastQC
[huqj@login0b fastq_screen]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/FastQC/
[huqj@login0b FastQC]$ vim fastqc_cutadapt.job
[huqj@login0b FastQC]$ sbatch fastqc_cutadapt.job
#!/bin/bash
#SBATCH -J fastqc_cutadapt
#SBATCH -c 12
#SBATCH -t 2:00:00
#SBATCH -o OUT_cutadapt/fastqc-%A_%a.out
#SBATCH --array=0-13 # job array index
#SBATCH --mail-type=ALL
#SBATCH --mail-user=huqj@pitt.edu
###########
####### set-up fastqc
module load fastqc/0.11.7
set -x
project=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
names=($(cat $project/Data/raw/Mouse_PCLS.txt))
echo ${names[${SLURM_ARRAY_TASK_ID}]}
sample=${names[${SLURM_ARRAY_TASK_ID}]}
fastq=$project/Data/Cutadapt
out=$project/QC/FastQC/Cutadapt/
################
mkdir -p $out
fastqc -o $out $fastq/${sample}_*_1.cutadapt.fastq.gz
fastqc -o $out $fastq/${sample}_*_2.cutadapt.fastq.gz
check report
[huqj@login0b OUT_cutadapt]$ grep 'Analysis complete for' *.out
fastqc-1938131_0.out:Analysis complete for 23L000903_1.cutadapt.fastq.gz
fastqc-1938131_0.out:Analysis complete for 23L000903_2.cutadapt.fastq.gz
fastqc-1938131_10.out:Analysis complete for 23L000913_1.cutadapt.fastq.gz
fastqc-1938131_10.out:Analysis complete for 23L000913_2.cutadapt.fastq.gz
fastqc-1938131_11.out:Analysis complete for 23L000914_1.cutadapt.fastq.gz
fastqc-1938131_11.out:Analysis complete for 23L000914_2.cutadapt.fastq.gz
fastqc-1938131_12.out:Analysis complete for 23L000919_1.cutadapt.fastq.gz
fastqc-1938131_12.out:Analysis complete for 23L000919_2.cutadapt.fastq.gz
fastqc-1938131_13.out:Analysis complete for 23L000920_1.cutadapt.fastq.gz
fastqc-1938131_13.out:Analysis complete for 23L000920_2.cutadapt.fastq.gz
fastqc-1938131_1.out:Analysis complete for 23L000904_1.cutadapt.fastq.gz
fastqc-1938131_1.out:Analysis complete for 23L000904_2.cutadapt.fastq.gz
fastqc-1938131_2.out:Analysis complete for 23L000905_1.cutadapt.fastq.gz
fastqc-1938131_2.out:Analysis complete for 23L000905_2.cutadapt.fastq.gz
fastqc-1938131_3.out:Analysis complete for 23L000906_1.cutadapt.fastq.gz
fastqc-1938131_3.out:Analysis complete for 23L000906_2.cutadapt.fastq.gz
fastqc-1938131_4.out:Analysis complete for 23L000907_1.cutadapt.fastq.gz
fastqc-1938131_4.out:Analysis complete for 23L000907_2.cutadapt.fastq.gz
fastqc-1938131_5.out:Analysis complete for 23L000908_1.cutadapt.fastq.gz
fastqc-1938131_5.out:Analysis complete for 23L000908_2.cutadapt.fastq.gz
fastqc-1938131_6.out:Analysis complete for 23L000909_1.cutadapt.fastq.gz
fastqc-1938131_6.out:Analysis complete for 23L000909_2.cutadapt.fastq.gz
fastqc-1938131_7.out:Analysis complete for 23L000910_1.cutadapt.fastq.gz
fastqc-1938131_7.out:Analysis complete for 23L000910_2.cutadapt.fastq.gz
fastqc-1938131_8.out:Analysis complete for 23L000911_1.cutadapt.fastq.gz
fastqc-1938131_8.out:Analysis complete for 23L000911_2.cutadapt.fastq.gz
fastqc-1938131_9.out:Analysis complete for 23L000912_1.cutadapt.fastq.gz
fastqc-1938131_9.out:Analysis complete for 23L000912_2.cutadapt.fastq.gz
5.3 Run MultiQC
[huqj@login0b]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt
[huqj@login0b Cutadapt]$ module load multiqc/1.12
[huqj@login0b Cutadapt]$ multiqc *.zip
/// MultiQC 🔍 | v1.12
| multiqc | MultiQC Version v1.14 now available!
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000903_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000903_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000904_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000904_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000905_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000905_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000906_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000906_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000907_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000907_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000908_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000908_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000909_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000909_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000910_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000910_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000911_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000911_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000912_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000912_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000913_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000913_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000914_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000914_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000919_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000919_2.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000920_1.cutadapt_fastqc.zip
| multiqc | Search path : /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/FastQC/Cutadapt/23L000920_2.cutadapt_fastqc.zip
| searching | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 28/28
| fastqc | Found 28 reports
| multiqc | Compressing plot data
| multiqc | Report : multiqc_report.html
| multiqc | Data : multiqc_data
| multiqc | MultiQC complete
5.4 download the report to local
qianjianghu@Qianjiangs-MacBook-Pro: ~
$ scp -r huqj@htc.crc.pitt.edu:/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/QC/ ./Desktop/ML_mouse_PCLS/
huqj@htc.crc.pitt.edu's password:
23L000910_2.cutadapt_screen.html 100% 3281KB 1.6MB/s 00:02
23L000912_1.cutadapt_screen.txt 100% 472 6.9KB/s 00:00
23L000909_1.cutadapt_screen.html 100% 3281KB 2.9MB/s 00:01
23L000919_2.cutadapt_screen.txt 100% 475 7.7KB/s 00:00
23L000907_2.cutadapt_screen.html 100% 3281KB 3.0MB/s 00:01
23L000914_1.cutadapt_screen.txt 100% 473 6.8KB/s 00:00
23L000913_2.cutadapt_screen.txt 100% 474 9.3KB/s 00:00
23L000903_2.cutadapt_screen.txt 100% 472 7.4KB/s 00:00
23L000905_2.cutadapt_screen.txt 100% 476 9.0KB/s 00:00
23L000908_1.cutadapt_screen.txt 100% 472 7.5KB/s 00:00
23L000906_2.cutadapt_screen.html 100% 3281KB 3.7MB/s 00:00
23L000903_1.cutadapt_screen.html 100% 3281KB 4.0MB/s 00:00
23L000909_2.cutadapt_screen.txt 100% 474 9.2KB/s 00:00
23L000908_1.cutadapt_screen.html 100% 3281KB 3.6MB/s 00:00
23L000904_1.cutadapt_screen.txt 100% 472 8.2KB/s 00:00
23L000911_2.cutadapt_screen.html 100% 3281KB 3.5MB/s 00:00
23L000914_1.cutadapt_screen.html 100% 3281KB 4.0MB/s 00:00
23L000913_1.cutadapt_screen.txt 100% 474 9.8KB/s 00:00
23L000904_2.cutadapt_screen.html 100% 3281KB 4.3MB/s 00:00
23L000913_2.cutadapt_screen.html 100% 3281KB 4.5MB/s 00:00
23L000919_1.cutadapt_screen.txt 100% 475 10.1KB/s 00:00
23L000914_2.cutadapt_screen.txt 100% 475 6.2KB/s 00:00
23L000912_2.cutadapt_screen.txt 100% 472 6.4KB/s 00:00
23L000909_1.cutadapt_screen.txt 100% 474 9.3KB/s 00:00
23L000904_2.cutadapt_screen.txt 100% 472 7.2KB/s 00:00
23L000920_2.cutadapt_screen.html 100% 3281KB 5.8MB/s 00:00
23L000905_1.cutadapt_screen.txt 100% 476 12.0KB/s 00:00
23L000919_2.cutadapt_screen.html 100% 3281KB 4.4MB/s 00:00
23L000912_2.cutadapt_screen.html 100% 3281KB 5.0MB/s 00:00
23L000908_2.cutadapt_screen.txt 100% 472 4.7KB/s 00:00
23L000905_2.cutadapt_screen.html 100% 3281KB 5.7MB/s 00:00
23L000903_1.cutadapt_screen.txt 100% 472 10.4KB/s 00:00
23L000910_1.cutadapt_screen.txt 100% 474 6.1KB/s 00:00
23L000904_1.cutadapt_screen.html 100% 3281KB 6.6MB/s 00:00
23L000911_2.cutadapt_screen.txt 100% 477 4.3KB/s 00:00
23L000913_1.cutadapt_screen.html 100% 3281KB 5.6MB/s 00:00
23L000920_2.cutadapt_screen.txt 100% 474 6.5KB/s 00:00
23L000919_1.cutadapt_screen.html 100% 3281KB 5.0MB/s 00:00
23L000912_1.cutadapt_screen.html 100% 3281KB 4.0MB/s 00:00
23L000907_2.cutadapt_screen.txt 100% 473 9.5KB/s 00:00
23L000905_1.cutadapt_screen.html 100% 3281KB 4.4MB/s 00:00
23L000920_1.cutadapt_screen.html 100% 3281KB 4.6MB/s 00:00
23L000906_1.cutadapt_screen.txt 100% 471 8.0KB/s 00:00
23L000920_1.cutadapt_screen.txt 100% 473 3.9KB/s 00:00
23L000911_1.cutadapt_screen.txt 100% 474 7.8KB/s 00:00
23L000910_2.cutadapt_screen.txt 100% 475 3.7KB/s 00:00
23L000910_1.cutadapt_screen.html 100% 3281KB 4.9MB/s 00:00
23L000907_1.cutadapt_screen.html 100% 3281KB 5.2MB/s 00:00
23L000909_2.cutadapt_screen.html 100% 3281KB 4.3MB/s 00:00
23L000903_2.cutadapt_screen.html 100% 3281KB 3.4MB/s 00:00
23L000908_2.cutadapt_screen.html 100% 3281KB 4.0MB/s 00:00
23L000906_2.cutadapt_screen.txt 100% 472 9.3KB/s 00:00
23L000906_1.cutadapt_screen.html 100% 3281KB 3.3MB/s 00:00
23L000914_2.cutadapt_screen.html 100% 3281KB 2.8MB/s 00:01
23L000911_1.cutadapt_screen.html 100% 3281KB 2.7MB/s 00:01
23L000907_1.cutadapt_screen.txt 100% 472 3.9KB/s 00:00
23L000919_1.cutadapt_fastqc.zip 100% 707KB 2.6MB/s 00:00
23L000914_2.cutadapt_fastqc.zip 100% 703KB 2.4MB/s 00:00
23L000912_2.cutadapt_fastqc.zip 100% 706KB 2.7MB/s 00:00
23L000913_1.cutadapt_fastqc.zip 100% 704KB 3.4MB/s 00:00
23L000910_1.cutadapt_fastqc.html 100% 238KB 1.2MB/s 00:00
23L000909_2.cutadapt_fastqc.html 100% 239KB 1.2MB/s 00:00
23L000907_1.cutadapt_fastqc.html 100% 244KB 1.0MB/s 00:00
23L000905_1.cutadapt_fastqc.zip 100% 705KB 2.4MB/s 00:00
23L000906_1.cutadapt_fastqc.html 100% 238KB 1.7MB/s 00:00
23L000908_2.cutadapt_fastqc.zip 100% 704KB 1.9MB/s 00:00
23L000903_2.cutadapt_fastqc.html 100% 241KB 1.3MB/s 00:00
23L000908_2.cutadapt_fastqc.html 100% 238KB 1.7MB/s 00:00
23L000903_1.cutadapt_fastqc.zip 100% 711KB 1.9MB/s 00:00
23L000911_1.cutadapt_fastqc.html 100% 244KB 1.4MB/s 00:00
23L000914_2.cutadapt_fastqc.html 100% 238KB 1.4MB/s 00:00
23L000909_1.cutadapt_fastqc.zip 100% 707KB 2.0MB/s 00:00
23L000904_2.cutadapt_fastqc.zip 100% 717KB 2.5MB/s 00:00
23L000913_2.cutadapt_fastqc.zip 100% 705KB 2.6MB/s 00:00
23L000912_1.cutadapt_fastqc.zip 100% 712KB 2.2MB/s 00:00
23L000904_1.cutadapt_fastqc.html 100% 246KB 2.0MB/s 00:00
23L000919_2.cutadapt_fastqc.zip 100% 707KB 2.1MB/s 00:00
23L000913_1.cutadapt_fastqc.html 100% 238KB 1.5MB/s 00:00
23L000914_1.cutadapt_fastqc.zip 100% 704KB 2.5MB/s 00:00
23L000919_1.cutadapt_fastqc.html 100% 240KB 1.7MB/s 00:00
23L000912_1.cutadapt_fastqc.html 100% 245KB 1.7MB/s 00:00
23L000909_2.cutadapt_fastqc.zip 100% 704KB 2.8MB/s 00:00
23L000904_1.cutadapt_fastqc.zip 100% 717KB 3.1MB/s 00:00
23L000905_1.cutadapt_fastqc.html 100% 238KB 1.7MB/s 00:00
23L000903_2.cutadapt_fastqc.zip 100% 707KB 2.3MB/s 00:00
23L000905_2.cutadapt_fastqc.zip 100% 706KB 2.5MB/s 00:00
23L000920_1.cutadapt_fastqc.html 100% 245KB 1.6MB/s 00:00
23L000908_1.cutadapt_fastqc.zip 100% 705KB 2.6MB/s 00:00
23L000910_2.cutadapt_fastqc.zip 100% 705KB 2.2MB/s 00:00
23L000904_2.cutadapt_fastqc.html 100% 247KB 1.7MB/s 00:00
23L000913_2.cutadapt_fastqc.html 100% 239KB 1.4MB/s 00:00
23L000920_1.cutadapt_fastqc.zip 100% 712KB 2.2MB/s 00:00
23L000911_1.cutadapt_fastqc.zip 100% 711KB 2.4MB/s 00:00
23L000907_1.cutadapt_fastqc.zip 100% 712KB 2.4MB/s 00:00
23L000920_2.cutadapt_fastqc.html 100% 239KB 1.8MB/s 00:00
23L000919_2.cutadapt_fastqc.html 100% 241KB 1.6MB/s 00:00
23L000906_2.cutadapt_fastqc.zip 100% 702KB 2.4MB/s 00:00
23L000912_2.cutadapt_fastqc.html 100% 240KB 1.5MB/s 00:00
multiqc_report.html 100% 1466KB 3.0MB/s 00:00
23L000905_2.cutadapt_fastqc.html 100% 240KB 1.6MB/s 00:00
23L000910_2.cutadapt_fastqc.html 100% 240KB 1.6MB/s 00:00
23L000911_2.cutadapt_fastqc.zip 100% 710KB 2.2MB/s 00:00
multiqc_citations.txt 100% 62 1.3KB/s 00:00
multiqc_data.json 100% 1672KB 3.1MB/s 00:00
multiqc_general_stats.txt 100% 2545 47.1KB/s 00:00
multiqc_fastqc.txt 100% 6159 101.2KB/s 00:00
multiqc_sources.txt 100% 4010 81.6KB/s 00:00
multiqc.log 100% 11KB 191.7KB/s 00:00
23L000907_2.cutadapt_fastqc.html 100% 243KB 2.2MB/s 00:00
23L000909_1.cutadapt_fastqc.html 100% 241KB 1.5MB/s 00:00
23L000920_2.cutadapt_fastqc.zip 100% 705KB 2.2MB/s 00:00
23L000910_1.cutadapt_fastqc.zip 100% 704KB 2.7MB/s 00:00
23L000906_1.cutadapt_fastqc.zip 100% 701KB 2.8MB/s 00:00
23L000903_1.cutadapt_fastqc.html 100% 243KB 1.7MB/s 00:00
23L000908_1.cutadapt_fastqc.html 100% 239KB 1.6MB/s 00:00
23L000906_2.cutadapt_fastqc.html 100% 238KB 1.4MB/s 00:00
23L000907_2.cutadapt_fastqc.zip 100% 710KB 2.9MB/s 00:00
23L000914_1.cutadapt_fastqc.html 100% 238KB 2.0MB/s 00:00
23L000911_2.cutadapt_fastqc.html 100% 243KB 1.6MB/s 00:00
23L000907_1_fastqc.zip 100% 756KB 2.8MB/s 00:00
23L000920_2_fastqc.html 100% 253KB 1.4MB/s 00:00
23L000909_1_fastqc.html 100% 259KB 1.8MB/s 00:00
23L000906_1_fastqc.zip 100% 752KB 2.5MB/s 00:00
23L000908_2_fastqc.html 100% 253KB 2.3MB/s 00:00
23L000919_2_fastqc.zip 100% 745KB 2.9MB/s 00:00
23L000904_1_fastqc.zip 100% 760KB 3.2MB/s 00:00
23L000905_1_fastqc.zip 100% 752KB 2.3MB/s 00:00
23L000919_2_fastqc.html 100% 252KB 1.9MB/s 00:00
23L000903_1_fastqc.html 100% 260KB 2.4MB/s 00:00
23L000907_2_fastqc.html 100% 253KB 2.1MB/s 00:00
23L000912_1_fastqc.html 100% 261KB 1.9MB/s 00:00
23L000903_1_fastqc.zip 100% 757KB 3.6MB/s 00:00
23L000913_2_fastqc.html 100% 251KB 1.6MB/s 00:00
23L000906_1_fastqc.html 100% 255KB 1.9MB/s 00:00
23L000905_2_fastqc.html 100% 250KB 1.2MB/s 00:00
23L000910_1_fastqc.html 100% 257KB 2.0MB/s 00:00
23L000914_2_fastqc.zip 100% 742KB 2.6MB/s 00:00
23L000909_1_fastqc.zip 100% 754KB 3.0MB/s 00:00
23L000911_2_fastqc.html 100% 256KB 1.6MB/s 00:00
23L000904_1_fastqc.html 100% 264KB 2.3MB/s 00:00
23L000908_1_fastqc.zip 100% 754KB 2.9MB/s 00:00
23L000914_2_fastqc.html 100% 250KB 1.8MB/s 00:00
23L000920_1_fastqc.zip 100% 757KB 2.5MB/s 00:00
23L000913_2_fastqc.zip 100% 743KB 2.7MB/s 00:00
23L000912_2_fastqc.zip 100% 748KB 2.7MB/s 00:00
23L000910_2_fastqc.zip 100% 749KB 2.7MB/s 00:00
multiqc_sources.txt 100% 3618 62.7KB/s 00:00
multiqc_general_stats.txt 100% 2213 32.6KB/s 00:00
multiqc_citations.txt 100% 62 1.4KB/s 00:00
multiqc.log 100% 10KB 174.1KB/s 00:00
multiqc_data.json 100% 1710KB 3.2MB/s 00:00
multiqc_fastqc.txt 100% 5578 105.2KB/s 00:00
23L000911_2_fastqc.zip 100% 753KB 3.2MB/s 00:00
multiqc_report.html 100% 1484KB 3.6MB/s 00:00
23L000903_2_fastqc.zip 100% 748KB 2.7MB/s 00:00
23L000914_1_fastqc.html 100% 257KB 1.7MB/s 00:00
23L000907_2_fastqc.zip 100% 748KB 2.8MB/s 00:00
23L000906_2_fastqc.zip 100% 742KB 3.0MB/s 00:00
23L000911_1_fastqc.html 100% 261KB 1.7MB/s 00:00
23L000904_2_fastqc.html 100% 258KB 2.0MB/s 00:00
23L000904_2_fastqc.zip 100% 754KB 3.1MB/s 00:00
23L000919_1_fastqc.zip 100% 755KB 3.3MB/s 00:00
23L000905_1_fastqc.html 100% 257KB 1.8MB/s 00:00
23L000910_2_fastqc.html 100% 255KB 1.5MB/s 00:00
23L000905_2_fastqc.zip 100% 743KB 2.5MB/s 00:00
23L000913_1_fastqc.html 100% 258KB 2.1MB/s 00:00
23L000906_2_fastqc.html 100% 251KB 2.0MB/s 00:00
23L000913_1_fastqc.zip 100% 755KB 2.5MB/s 00:00
23L000907_1_fastqc.html 100% 261KB 1.9MB/s 00:00
23L000912_2_fastqc.html 100% 254KB 1.8MB/s 00:00
23L000912_1_fastqc.zip 100% 757KB 3.1MB/s 00:00
23L000910_1_fastqc.zip 100% 752KB 2.5MB/s 00:00
23L000903_2_fastqc.html 100% 254KB 1.4MB/s 00:00
23L000911_1_fastqc.zip 100% 759KB 2.9MB/s 00:00
23L000909_2_fastqc.zip 100% 745KB 3.1MB/s 00:00
23L000914_1_fastqc.zip 100% 754KB 3.0MB/s 00:00
23L000908_2_fastqc.zip 100% 744KB 3.1MB/s 00:00
23L000919_1_fastqc.html 100% 259KB 1.8MB/s 00:00
23L000920_1_fastqc.html 100% 260KB 1.8MB/s 00:00
23L000920_2_fastqc.zip 100% 748KB 2.9MB/s 00:00
23L000908_1_fastqc.html 100% 257KB 2.1MB/s 00:00
23L000909_2_fastqc.html 100% 251KB 1.6MB/s 00:00
(base)
qianjianghu@Qianjiangs-MacBook-Pro: ~
$
6. Run Hisat2
6.1 Build Index
download the genome ./Refs/Genome/Mus_musculus/GRCm39_release_108
[huqj@login0b GRCm39_release_108]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Genome/Mus_musculus/GRCm39_release_108 [huqj@login0b GRCm39_release_108]$ wget ftp://ftp.ensembl.org/pub/release-108/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz --2023-04-22 18:26:03-- https://ftp.ensembl.org/pub/release-108/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz Resolving ftp.ensembl.org (ftp.ensembl.org)... 193.62.193.139 Connecting to ftp.ensembl.org (ftp.ensembl.org)|193.62.193.139|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 881211416 (840M) [application/x-gzip] Saving to: ‘Mus_musculus.GRCm39.dna.primary_assembly.fa.gz’ 100%[========================================================>] 881,211,416 664KB/s in 21m 41s 2023-04-22 18:47:45 (662 KB/s) - ‘Mus_musculus.GRCm39.dna.primary_assembly.fa.gz’ saved [881211416/881211416]
Download the Annotation file into ./Refs/Annotation/Mus_musculus/GRCm39_release_108
[huqj@login0b GRCm39_release_108]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Annotation/Mus_musculus/GRCm39_release_108 [huqj@login0b GRCm39_release_108]$ wget ftp://ftp.ensembl.org/pub/release-108/gtf/mus_musculus/Mus_musculus.GRCm39.108.gtf.gz --2023-04-22 18:31:51-- ftp://ftp.ensembl.org/pub/release-108/gtf/mus_musculus/Mus_musculus.GRCm39.108.gtf.gz => ‘Mus_musculus.GRCm39.108.gtf.gz’ Resolving ftp.ensembl.org (ftp.ensembl.org)... 193.62.193.139 Connecting to ftp.ensembl.org (ftp.ensembl.org)|193.62.193.139|:21... connected. Logging in as anonymous ... Logged in! ==> SYST ... done. ==> PWD ... done. ==> TYPE I ... done. ==> CWD (1) /pub/release-108/gtf/mus_musculus ... done. ==> SIZE Mus_musculus.GRCm39.108.gtf.gz ... 32316315 ==> PASV ... done. ==> RETR Mus_musculus.GRCm39.108.gtf.gz ... done. Length: 32316315 (31M) (unauthoritative) 100%[================================================================================================================>] 32,316,315 1.78MB/s in 22s 2023-01-31 01:04:46 (1.40 MB/s) - ‘Mus_musculus.GRCm39.108.gtf.gz’ saved [32316315]
extract .exon and .ss
[huqj@login0b GRCm39_release_108]$ module load hisat2/2.2.1 [huqj@login0b GRCm39_release_108]$ gzip -d Mus_musculus.GRCm39.108.gtf.gz [huqj@login0b GRCm39_release_108]$ ls Mus_musculus.GRCm39.108.gtf [huqj@login0b GRCm39_release_108]$ extract_splice_sites.py Mus_musculus.GRCm39.108.gtf > /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus/splice_sites/grcm39.ss [huqj@login0b GRCm39_release_108]$ extract_exons.py Mus_musculus.GRCm39.108.gtf > /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus/exon/grcm39.exon
build index
[huqj@login0b]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus [huqj@login0b Mus_musculus]$ vim hisat2_build.sbatch
#! /bin/bash # #SBATCH --job-name=hisat2_index_mus #SBATCH -N 1 # Ensure that all cores are on one machine #SBATCH -t 3-00:00 # Runtime in D-HH:MM #SBATCH --cpus-per-task=16 # Request that ncpus be allocated per process #SBATCH --mem=230g # Memory pool for all cores (see also --mem-per-cpu) #SBATCH -o OUT/htseq-%A_%a.out #SBATCH --mail-type=ALL #SBATCH --mail-user=huqj@pitt.edu module load hisat2/2.2.1 input=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus out=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus/index/GRCm39_release_108 ref=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Genome/Mus_musculus/GRCm39_release_108 hisat2-build -p 16 \ --ss $input/splice_sites/grcm39.ss \ --exon $input/exon/grcm39.exon \ $ref/Mus_musculus.GRCm39.dna.primary_assembly.fa \ $out/grcm39_tran
[huqj@login0b Mus_musculus]$ sbatch hisat2_build.sbatch
check index
[huqj@login0b grcm39_tran]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus/index/GRCm39_release_108/grcm39_tran [huqj@login0b grcm39_tran]$ ll total 3758388 -rw-r--r-- 1 huqj mkoenigshoff 1641097441 Apr 22 22:24 grcm39_tran.1.ht2 -rw-r--r-- 1 huqj mkoenigshoff 664918148 Apr 22 22:24 grcm39_tran.2.ht2 -rw-r--r-- 1 huqj mkoenigshoff 3374 Apr 22 21:28 grcm39_tran.3.ht2 -rw-r--r-- 1 huqj mkoenigshoff 663655446 Apr 22 21:28 grcm39_tran.4.ht2 -rw-r--r-- 1 huqj mkoenigshoff 1527534119 Apr 22 22:32 grcm39_tran.5.ht2 -rw-r--r-- 1 huqj mkoenigshoff 676124278 Apr 22 22:32 grcm39_tran.6.ht2 -rw-r--r-- 1 huqj mkoenigshoff 11437272 Apr 22 21:29 grcm39_tran.7.ht2 -rw-r--r-- 1 huqj mkoenigshoff 2279891 Apr 22 21:29 grcm39_tran.8.ht2
6.2 run Hisat2
[huqj@login0b]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/Hisat2
[huqj@login0b Hisat2]$ vim hisat2.job
[huqj@login0b Hisat2]$ sbatch hisat2.job
#! /bin/bash
#
#BATCH -N 1
#SBATCH -J HISAT2
#SBATCH -t 2:00:00
#SBATCH -c 8
#SBATCH -o OUT/hisat2-%A_%a.out
#SBATCH --array=0-13 # job array index
#SBATCH --mail-type=ALL
#SBATCH --mail-user=huqj@pitt.edu
#############################
## HISAT2 set-up
module load gcc/8.2.0
module load hisat2/2.2.1
module load samtools/1.9
set -x
################################
project=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
names=($(cat $project/Data/raw/Mouse_PCLS.txt))
echo ${names[${SLURM_ARRAY_TASK_ID}]}
sample=${names[${SLURM_ARRAY_TASK_ID}]}
trimfastq=$project/Data/Cutadapt
out=$project/Mapping/HISAT2
out_sort=$project/Mapping/HISAT2_sort
ref=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Index/Hisat2_Index/Mus_musculus/index/GRCm39_release_108/grcm39_tran/grcm39_tran
####################################
mkdir -p $out
mkdir -p $out_sort
###################
hisat2 -x $ref \
-S $out/${sample}.sam \
-p 8 \
--dta \
-1 $trimfastq/${sample}_1.cutadapt.fastq.gz \
-2 $trimfastq/${sample}_2.cutadapt.fastq.gz
samtools view -@ 3 -h -o $out/${sample}.bam $out/${sample}.sam
samtools sort $out/${sample}.bam -n -o $out_sort/${sample}.sorted.query.bam
check alignment results
[huqj@login0b Hisat2]$ cd OUT/
[huqj@login0b OUT]$ ls
hisat2-1938175_0.out hisat2-1938175_12.out hisat2-1938175_2.out hisat2-1938175_5.out hisat2-1938175_8.out
hisat2-1938175_10.out hisat2-1938175_13.out hisat2-1938175_3.out hisat2-1938175_6.out hisat2-1938175_9.out
hisat2-1938175_11.out hisat2-1938175_1.out hisat2-1938175_4.out hisat2-1938175_7.out
[huqj@login0b OUT]$ grep "overall alignment rate" *.out
hisat2-1938175_0.out:97.05% overall alignment rate
hisat2-1938175_10.out:97.25% overall alignment rate
hisat2-1938175_11.out:97.52% overall alignment rate
hisat2-1938175_12.out:97.04% overall alignment rate
hisat2-1938175_13.out:97.01% overall alignment rate
hisat2-1938175_1.out:92.22% overall alignment rate
hisat2-1938175_2.out:97.09% overall alignment rate
hisat2-1938175_3.out:97.77% overall alignment rate
hisat2-1938175_4.out:95.67% overall alignment rate
hisat2-1938175_5.out:97.53% overall alignment rate
hisat2-1938175_6.out:97.62% overall alignment rate
hisat2-1938175_7.out:97.23% overall alignment rate
hisat2-1938175_8.out:94.78% overall alignment rate
hisat2-1938175_9.out:97.31% overall alignment rate
7. Run HTseq
7.1 submit the HTseq job
[huqj@login0b]$ cd /bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Jobs/HT-Seq
[huqj@login0b HT-Seq]$ vim htseq.job
#! /bin/bash
#SBATCH -N 1
#SBATCH -t 1:00:00
#SBATCH -J htseq
#SBATCH -c 6
#SBATCH -o OUT/htseq-%A_%a.out
#SBATCH --array=0-13 # job array index
#SBATCH --mail-type=ALL
#SBATCH --mail-user=huqj@pitt.edu
######################################
############ htseq set-up
module load htseq/0.13.5
set -x
###########################
project=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS
names=($(cat $project/Data/raw/Mouse_PCLS.txt))
echo ${names[${SLURM_ARRAY_TASK_ID}]}
sample=${names[${SLURM_ARRAY_TASK_ID}]}
#########################################
BAM=$project/Mapping/HISAT2_sort
gtf=/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/Refs/Annotation/Mus_musculus/GRCm39_release_108/Mus_musculus.GRCm39.108.gtf
out=$project/Counts/HT-Seq
#############################
mkdir -p $out
htseq-count -f bam \
-r name \
-s reverse \ # this is the correct one for this seq data
-t exon \
-m union \
-i gene_id \
$BAM/$sample.sorted.query.bam \
$gtf > $out/$sample.counts.txt
7.2 check results
[huqj@login0b HT-Seq]$ cat 23L000903.counts.txt | head
ENSMUSG00000000001 3551
ENSMUSG00000000003 0
ENSMUSG00000000028 110
ENSMUSG00000000031 24
ENSMUSG00000000037 15
ENSMUSG00000000049 1
ENSMUSG00000000056 997
ENSMUSG00000000058 2324
ENSMUSG00000000078 7886
ENSMUSG00000000085 921
8. download counts files to local MBP
qianjianghu@Qianjiangs-MacBook-Pro: ~
$ scp -r huqj@htc.crc.pitt.edu:/bgfs/mkoenigshoff/huqj/DataAnalysis/rna_seq/ML_mouse_PCLS/Counts/ /Users/qianjianghu/Desktop/ML_mouse_PCLS/
huqj@htc.crc.pitt.edu's password:
23L000910.counts.txt 100% 1202KB 3.9MB/s 00:00
23L000906.counts.txt 100% 1202KB 2.7MB/s 00:00
23L000919.counts.txt 100% 1201KB 3.3MB/s 00:00
23L000904.counts.txt 100% 1201KB 3.4MB/s 00:00
23L000912.counts.txt 100% 1201KB 4.1MB/s 00:00
23L000903.counts.txt 100% 1202KB 3.0MB/s 00:00
23L000908.counts.txt 100% 1202KB 4.3MB/s 00:00
23L000914.counts.txt 100% 1201KB 4.7MB/s 00:00
23L000909.counts.txt 100% 1203KB 4.8MB/s 00:00
23L000905.counts.txt 100% 1202KB 4.7MB/s 00:00
23L000913.counts.txt 100% 1203KB 6.2MB/s 00:00
23L000920.counts.txt 100% 1202KB 4.7MB/s 00:00
23L000911.counts.txt 100% 1202KB 3.8MB/s 00:00
23L000907.counts.txt 100% 1200KB 4.2MB/s 00:00
23L000903.txt 100% 20MB 14.3MB/s 00:01
23L000914.txt.summary 100% 472 5.3KB/s 00:00
23L000904.txt 100% 20MB 20.3MB/s 00:01
23L000904.txt.summary 100% 473 5.3KB/s 00:00
23L000909.txt.summary 100% 473 7.8KB/s 00:00
23L000906.txt.summary 100% 472 7.2KB/s 00:00
23L000920.txt 100% 20MB 14.7MB/s 00:01
23L000919.txt.summary 100% 472 6.5KB/s 00:00
23L000912.txt 100% 20MB 19.1MB/s 00:01
23L000920.txt.summary 100% 472 5.3KB/s 00:00
23L000910.txt.summary 100% 472 3.8KB/s 00:00
23L000905.txt 100% 20MB 20.3MB/s 00:01
23L000913.txt 100% 20MB 20.4MB/s 00:01
23L000914.txt 100% 20MB 18.4MB/s 00:01
23L000912.txt.summary 100% 472 7.4KB/s 00:00
23L000910.txt 100% 20MB 16.0MB/s 00:01
23L000919.txt 100% 20MB 9.9MB/s 00:02
23L000911.txt.summary 100% 474 3.4KB/s 00:00
23L000913.txt.summary 100% 473 4.1KB/s 00:00
23L000903.txt.summary 100% 472 3.2KB/s 00:00
23L000908.txt 100% 20MB 11.1MB/s 00:01
23L000906.txt 100% 20MB 12.3MB/s 00:01
23L000905.txt.summary 100% 472 8.3KB/s 00:00
23L000911.txt 100% 20MB 12.6MB/s 00:01
23L000907.txt 100% 20MB 10.5MB/s 00:01
23L000909.txt 100% 20MB 13.1MB/s 00:01
23L000907.txt.summary 100% 472 4.0KB/s 00:00
23L000908.txt.summary 100% 472 3.8KB/s 00:00
(base)