Source code for drive.helper_funcs.split_region_str
import re
from drive.models import Genes
[docs]def split_target_string(chromo_pos_str: str) -> Genes:
"""Function that will split the target string provided by the user.
Parameters
----------
chromo_pos_str : str
String that has the region of interest in base pairs.
This string will look like 10:1234-1234 where the
first number is the chromosome number, then the start
position, and then the end position of the region of
interest.
Returns
-------
Genes
returns a namedtuple that has the chromosome number,
the start position, and the end position
Raises
------
ValueError
raises a value error if the string was formatted any
other way than chromosome:start_position-end_position.
Also raises a value error if the start position is
larger than the end position
"""
split_str = re.split(":|-", chromo_pos_str)
if len(split_str) != 3:
error_msg = f"Expected the gene position string to be formatted like chromosome:start_position-end_position. Instead it was formatted as {chromo_pos_str}" # noqa: E501
raise ValueError(error_msg)
integer_split_str = [int(value) for value in split_str]
if integer_split_str[1] > integer_split_str[2]:
raise ValueError(
f"expected the start position of the target string to be <= the end position. Instead the start position was {integer_split_str[1]} and the end position was {integer_split_str[2]}" # noqa: E501
)
return Genes(*integer_split_str)