Skip to content

merge_vcfs

Merge a minos VCF with a GVCF at certain positions (driven by the catalogue). Will only include null calls from the GVCF.

check_gvcf_row(row, min_dp)

Check if a GVCF row is just a null call (and so should be included).

Parameters:

Name Type Description Default
row str

The VCF row.

required
min_dp int

Minimum DP to consider a call valid.

required

Returns: bool: True if the row is just null calls, False otherwise.

Source code in gnomonicus/merge_vcfs.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def check_gvcf_row(row: str, min_dp: int) -> bool:
    """Check if a GVCF row is just a null call (and so should be included).

    Args:
        row (str): The VCF row.
        min_dp (int): Minimum DP to consider a call valid.
    Returns:
        bool: True if the row is _just_ null calls, False otherwise.
    """
    with open(".temp_gvcf_row.vcf", "w") as f:
        f.write("##fileformat=VCFv4.2\n")
        f.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample\n")
        f.write(row + "\n")
    vcf = grumpy.VCFFile(".temp_gvcf_row.vcf", False, min_dp)
    valid = True
    for position, calls in vcf.calls.items():
        for call in calls:
            if call.call_type != grumpy.AltType.NULL:
                valid = False
    Path(".temp_gvcf_row.vcf").unlink()
    return valid

fetch_minos_positions(minos_path, min_dp)

Given a minos VCF, return the positions to exclude from the gvcf.

Parameters:

Name Type Description Default
minos_vcf Path

Path to the minos VCF file.

required
min_dp int

Minimum DP to consider a call valid.

required

Returns: set[int]: The positions to exclude.

Source code in gnomonicus/merge_vcfs.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def fetch_minos_positions(minos_path: Path, min_dp: int) -> set[int]:
    """Given a minos VCF, return the positions to exclude from the gvcf.

    Args:
        minos_vcf (Path): Path to the minos VCF file.
        min_dp (int): Minimum DP to consider a call valid.
    Returns:
        set[int]: The positions to exclude.
    """
    vcf = grumpy.VCFFile(minos_path.as_posix(), False, min_dp)
    positions: set[int] = set()
    for position, calls in vcf.calls.items():
        for call in calls:
            if call.call_type == grumpy.AltType.DEL:
                # Deletion - need to exclude all bases deleted
                positions.update(range(position, position + len(call.alt)))
            else:
                positions.add(position)
    return positions