# check read length in fasta and fastq files
seqkit fx2tab -nl reads_R1.fastq.gz
seqkit fx2tab -nl reads_R2.fastq.gz
fx2tab tab-separated table
-n output sequence name (ID).
-l output sequence length.
# summarizes stats of read length
seqkit stats reads_R1.fastq.gz reads_R2.fastq.gz
file format type num_seqs sum_len min_len avg_len max_len
reads_R1.fastq.gz FASTQ DNA 1,000,000 120,000,000 80 120.0 150
reads_R2.fastq.gz FASTQ DNA 1,000,000 120,000,000 80 120.0 150
# Keep reads with length >=100 and <=500
seqkit seq -m 100 -M 500 reads.fastq > filtered.fastq
-m 100 min length
-M 500 max length
# keep paired reads with length >=100 bp in both reads of a pair
seqkit pair \
-1 <(seqkit seq -m 100 reads_R1.fastq.gz) \
-2 <(seqkit seq -m 100 reads_R2.fastq.gz) \
-o R1.filtered.fastq -O R2.filtered.fastq