#!/usr/bin/env bash
set -euo pipefail

input_file="${1:-clinvar_20251019.vcf.gz}"
output_file="${2:-clinvar_20251019_chr.vcf.gz}"
header_file="${3:-clinvar_20251019_chr.header.vcf.gz}"

if command -v gzcat >/dev/null 2>&1; then
  DECOMP="gzcat"
else
  DECOMP="gunzip -c"
fi

command -v bgzip >/dev/null || { echo "bgzip not found (brew install htslib)"; exit 1; }
command -v tabix >/dev/null || { echo "tabix not found (brew install htslib)"; exit 1; }
[ -s "$input_file" ] || { echo "Input VCF $input_file missing or empty"; exit 1; }

hdr_tmp="$(mktemp)"
trap 'rm -f "$hdr_tmp"' EXIT

$DECOMP "$input_file" | awk -v HDR="$hdr_tmp" 'BEGIN{OFS="\t"}
{
  if ($0 ~ /^#/) { print $0 >> HDR; print $0; }
  else {
    if ($1 == "MT") $1 = "M";
    if ($1 !~ /^chr/) $1 = "chr"$1;
    print $0;
  }
}' | bgzip -c > "$output_file"

bgzip -c "$hdr_tmp" > "$header_file"
tabix -f -p vcf "$output_file"
echo "Wrote $output_file (and .tbi) and $header_file"
