Source code for drive.helper_funcs.split_region_str

import re

from drive.models import Genes


[docs]def split_target_string(chromo_pos_str: str) -> Genes: """Function that will split the target string provided by the user. Parameters ---------- chromo_pos_str : str String that has the region of interest in base pairs. This string will look like 10:1234-1234 where the first number is the chromosome number, then the start position, and then the end position of the region of interest. Returns ------- Genes returns a namedtuple that has the chromosome number, the start position, and the end position Raises ------ ValueError raises a value error if the string was formatted any other way than chromosome:start_position-end_position. Also raises a value error if the start position is larger than the end position """ split_str = re.split(":|-", chromo_pos_str) if len(split_str) != 3: error_msg = f"Expected the gene position string to be formatted like chromosome:start_position-end_position. Instead it was formatted as {chromo_pos_str}" # noqa: E501 raise ValueError(error_msg) integer_split_str = [int(value) for value in split_str] if integer_split_str[1] > integer_split_str[2]: raise ValueError( f"expected the start position of the target string to be <= the end position. Instead the start position was {integer_split_str[1]} and the end position was {integer_split_str[2]}" # noqa: E501 ) return Genes(*integer_split_str)