Source code for drive.models.generate_indices

from dataclasses import dataclass
from typing import Protocol

from pandas import DataFrame


# general protocol that defines that every class needs the get_haplotype_id method
[docs]class FileIndices(Protocol):
[docs] def get_haplotype_id( self, data: DataFrame, ind_id_indx: int, phase_col_indx: int, col_name: str ) -> None: ...
[docs]@dataclass class HapIBD(FileIndices): id1_indx: int = 0 hap1_indx: int = 1 id2_indx: int = 2 hap2_indx: int = 3 chr_indx: int = 4 str_indx: int = 5 end_indx: int = 6 cM_indx: int = 7
[docs] def get_haplotype_id( self, data: DataFrame, ind_id_indx: int, phase_col_indx: int, col_name: str ) -> None: data.loc[:, col_name] = ( data[ind_id_indx] + "." + data[phase_col_indx].astype(str) )
def __str__(self): """Custom string message used for debugging""" return f"HapIBD: id1_index={self.id1_indx}, id2_index={self.id2_indx}, haplotype_1_index={self.hap1_indx}, haplotype_2_index={self.hap2_indx}, chromosome_index={self.chr_indx}, start_position_index={self.str_indx}, end_position_index={self.end_indx}, centimorgan_index={self.cM_indx}" # noqa: E501
[docs]@dataclass class Germline(FileIndices): id1_indx: int = 0 hap1_indx: int = 1 id2_indx: int = 2 hap2_indx: int = 3 chr_indx: int = 4 str_indx: int = 5 end_indx: int = 6 cM_indx: int = 10 unit: int = 11
[docs] def get_haplotype_id( self, data: DataFrame, ind_id_indx: int, phase_col_indx: int, col_name: str ) -> None: data.loc[:, col_name] = data[phase_col_indx]
def __str__(self): """Custom string message used for debugging""" return f"Germline: id1_index={self.id1_indx}, id2_index={self.id2_indx}, haplotype_1_index={self.hap1_indx}, haplotype_2_index={self.hap2_indx}, chromosome_index={self.chr_indx}, start_position_index={self.str_indx}, end_position_index={self.end_indx}, centimorgan_index={self.cM_indx}, unit_index={self.unit}" # noqa: E501
[docs]@dataclass class iLASH(FileIndices): id1_indx: int = 0 hap1_indx: int = 1 id2_indx: int = 2 hap2_indx: int = 3 chr_indx: int = 4 str_indx: int = 5 end_indx: int = 6 cM_indx: int = 9
[docs] def get_haplotype_id( self, data: DataFrame, ind_id_indx: int, phase_col_indx: int, col_name: str ) -> None: data.loc[:, col_name] = data[phase_col_indx]
def __str__(self): """Custom string message used for debugging""" return f"iLASH: id1_index={self.id1_indx}, id2_index={self.id2_indx}, haplotype_1_index={self.hap1_indx}, haplotype_2_index={self.hap2_indx}, chromosome_index={self.chr_indx}, start_position_index={self.str_indx}, end_position_index={self.end_indx}, centimorgan_index={self.cM_indx}" # noqa: E501
[docs]@dataclass class Rapid(FileIndices): id1_indx: int = 1 hap1_indx: int = 3 id2_indx: int = 2 hap2_indx: int = 4 chr_indx: int = 0 cM_indx: int = 7 str_indx: int = 5 end_indx: int = 6
[docs] def get_haplotype_id( self, data: DataFrame, ind_id_indx: int, phase_col_indx: int, col_name: str ) -> None: data.loc[:, col_name] = ( data[ind_id_indx] + "." + data[phase_col_indx].astype(str) )
def __str__(self): """Custom string message used for debugging""" return f"Rapid: id1_index={self.id1_indx}, id2_index={self.id2_indx}, haplotype_1_index={self.hap1_indx}, haplotype_2_index={self.hap2_indx}, chromosome_index={self.chr_indx}, start_position_index={self.str_indx}, end_position_index={self.end_indx}, centimorgan_index={self.cM_indx}" # noqa: E501
[docs]def create_indices(ibd_file_format: str) -> FileIndices: """Factory method to generate the proper file indice object based on the ibd program Parameters ---------- ibd_file_format: str string indicating what ibd program was used identify IBD segments. EX: hapibd, ilash, rapid, and germline. expects this value to be lower case Returns ------- FileIndices returns an object that conforms to the FileIndices protocol. It will have the method getHAPID. It will also have the correct indices for the ibd program Raises ------ ValueError Raises a value error if the user passes an ibd_file_format that is not hapibd, hap-ibd, germline, ilash, rapid """ format_selector = { "germline": Germline(), "ilash": iLASH(), "hapibd": HapIBD(), "rapid": Rapid(), } return format_selector.get(ibd_file_format)