#!/bin/bash

input_file="All_20180418.vcf.gz"
output_file="All_20180418_chr.vcf.gz"
header_file="All_20180418_chr.header.vcf.gz"

# Decompress the input file, process it with awk, and recompress the output
zcat "$input_file" | awk '
BEGIN { OFS="\t" }
{
    if ($0 ~ /^#/) {
        # If the line is a header, print it to both outputs
        print $0 > "header.tmp"
        print $0
    } else {
        # Add "chr" to the first column and print to the main output
        $1 = "chr" $1
        print $0
    }
}' | bgzip > "$output_file"

# Compress the header lines to the header file
bgzip header.tmp -c > "$header_file"

# Clean up the temporary file
rm header.tmp
tabix -p vcf All_20180418_chr.vcf.gz
