refactor: string

use-py · Aug 10, 2024 · 78d95e1 · 78d95e1
1 parent f909fd1
commit 78d95e1
Show file tree

Hide file tree

Showing 13 changed files with 412 additions and 0 deletions.
diff --git a/src/usepy/string/__init__.py b/src/usepy/string/__init__.py
@@ -0,0 +1,23 @@
+from .camel_case import camel_case
+from .capitalize import capitalize
+from .kebab_case import kebab_case
+from .left import left
+from .lower_case import lower_case
+from .middle import middle
+from .middle_batch import middle_batch
+from .pascal_case import pascal_case
+from .right import right
+from .snake_case import snake_case
+
+__all__ = [
+    "camel_case",
+    "capitalize",
+    "kebab_case",
+    "left",
+    "lower_case",
+    "middle",
+    "middle_batch",
+    "pascal_case",
+    "right",
+    "snake_case",
+]
diff --git a/src/usepy/string/_get_section.py b/src/usepy/string/_get_section.py
@@ -0,0 +1,48 @@
+from typing import Optional, Tuple
+
+
+def get_section(
+        original_str: str, start_str: Optional[str] = None, end_str: Optional[str] = None
+) -> Tuple[Optional[str], Optional[int], Optional[int]]:
+    """
+    Get the substring between two given substrings in a string.
+
+    Args:
+        original_str (str): The original string to search in.
+        start_str (Optional[str], optional): The substring to start the search from.
+            If not provided, the search starts from the beginning of the string.
+        end_str (Optional[str], optional): The substring to end the search at.
+            If not provided, the search ends at the end of the string.
+
+    Returns:
+        Tuple[Optional[str], Optional[int], Optional[int]]:
+            - The substring between the start and end substrings, or None if not found.
+            - The starting index of the substring, or None if not found.
+            - The ending index of the substring, or None if not found.
+
+    Examples:
+        >>> get_section('abc123def', 'abc', 'def')
+        ('123', 3, 6)
+        >>> get_section('abc123def', 'abc')
+        ('123def', 3, 9)
+        >>> get_section('abc123def', end_str='def')[0]
+        'abc123'
+    """
+    if start_str is None:
+        start_index = 0
+    else:
+        start_index = original_str.find(start_str)
+        if start_index >= 0:
+            start_index += len(start_str)
+        else:
+            return None, start_index, None
+
+    if end_str is None:
+        end_index = len(original_str)
+    else:
+        end_index = original_str.find(end_str, start_index)
+
+    if end_index >= 0:
+        return original_str[start_index:end_index], start_index, end_index
+
+    return None, None, None
diff --git a/src/usepy/string/_get_words.py b/src/usepy/string/_get_words.py
@@ -0,0 +1,25 @@
+from typing import TypeVar
+import re
+
+T = TypeVar('T', bound=str)
+
+CASE_SPLIT_PATTERN = re.compile(r'[A-Z]?[a-z]+|[0-9]+|[A-Z]+(?![a-z])', re.VERBOSE)
+
+
+def get_words(string: T) -> list[T]:
+    """
+    Splits a string into words based on whitespace and non-alphanumeric characters.
+
+    Args:
+        string (str): The input string to be split into words.
+
+    Returns:
+        list[str]: A list of words extracted from the input string.
+
+    Example:
+        >>> get_words('hello world')
+        ['hello', 'world']
+        >>> get_words('hello-world/foo_bar')
+        ['hello', 'world', 'foo', 'bar']
+    """
+    return re.findall(CASE_SPLIT_PATTERN, string)
diff --git a/src/usepy/string/camel_case.py b/src/usepy/string/camel_case.py
@@ -0,0 +1,41 @@
+from typing import TypeVar
+
+from usepy.string.capitalize import capitalize
+
+T = TypeVar('T', bound=str)
+
+
+def camel_case(string: T) -> T:
+    """
+    Converts a string to camel case.
+
+    Camel case is the naming convention in which each word is written in lowercase
+    and separated by an underscore (_) character.
+
+    Args:
+        string (T): The input string to be converted to camel case.
+
+    Returns:
+        str: The converted string in camel case.
+
+    Examples:
+        >>> camel_case('camelCase')
+        'camelCase'
+        >>> camel_case('some whitespace')
+        'someWhitespace'
+        >>> camel_case('hyphen-text')
+        'hyphenText'
+        >>> camel_case('HTTPRequest')
+        'httpRequest'
+    """
+    from usepy.string._get_words import get_words
+
+    words = get_words(string)
+
+    if not words:
+        return ""
+
+    first, *rest = words
+    capitalized_rest = [capitalize(word) for word in rest]
+
+    return f"{first.lower()}{''.join(capitalized_rest)}"
diff --git a/src/usepy/string/capitalize.py b/src/usepy/string/capitalize.py
@@ -0,0 +1,22 @@
+from typing import TypeVar
+
+T = TypeVar('T', bound=str)
+
+
+def capitalize(string: T) -> T:
+    """
+    Converts the first character of a string to uppercase and the remaining characters to lowercase.
+
+    Args:
+        string (T): The string to be capitalized.
+
+    Returns:
+        Capitalize[T]: The capitalized string.
+
+    Examples:
+        >>> capitalize('fred')
+        'Fred'
+        >>> capitalize('FRED')
+        'Fred'
+    """
+    return string.title()
diff --git a/src/usepy/string/kebab_case.py b/src/usepy/string/kebab_case.py
@@ -0,0 +1,33 @@
+from typing import Union
+
+
+def kebab_case(string: Union[str, bytes]) -> str:
+    """
+    Converts a string to kebab case.
+
+    Kebab case is the naming convention in which each word is written in lowercase
+    and separated by a dash (-) character.
+
+    Args:
+        string (Union[str, bytes]): The input string to be converted to kebab case.
+
+    Returns:
+        str: The converted string in kebab case.
+
+    Examples:
+        >>> kebab_case('camelCase')
+        'camel-case'
+        >>> kebab_case('some whitespace')
+        'some-whitespace'
+        >>> kebab_case('hyphen-text')
+        'hyphen-text'
+        >>> kebab_case('HTTPRequest')
+        'http-request'
+    """
+    from usepy.string._get_words import get_words
+
+    if isinstance(string, bytes):
+        string = string.decode('utf-8')
+
+    words = get_words(string)
+    return '-'.join(word.lower() for word in words)
diff --git a/src/usepy/string/left.py b/src/usepy/string/left.py
@@ -0,0 +1,24 @@
+from typing import Optional
+
+
+
+
+def left(original_str: str, end_str: str) -> Optional[str]:
+    """
+    Get the substring to the left of a given substring in a string.
+
+    Args:
+        original_str (str): The original string to search in.
+        end_str (str): The substring to end the search at.
+
+    Returns:
+        Optional[str]: The substring to the left of the given end substring, or None if not found.
+
+    Examples:
+        >>> left('abc123def', 'def')
+        'abc123'
+    """
+    from usepy.string._get_section import get_section
+
+    result, *_ = get_section(original_str, end_str=end_str)
+    return result
diff --git a/src/usepy/string/lower_case.py b/src/usepy/string/lower_case.py
@@ -0,0 +1,33 @@
+from typing import Union
+
+
+def lower_case(string: Union[str, bytes]) -> str:
+    """
+    Converts a string to lower case.
+
+    Lower case is the naming convention in which each word is written in lowercase
+    and separated by a space ( ) character.
+
+    Args:
+        string (Union[str, bytes]): The input string to be converted to lower case.
+
+    Returns:
+        str: The converted string in lower case.
+
+    Examples:
+        >>> lower_case('camelCase')
+        'camel case'
+        >>> lower_case('some whitespace')
+        'some whitespace'
+        >>> lower_case('hyphen-text')
+        'hyphen text'
+        >>> lower_case('HTTPRequest')
+        'http request'
+    """
+    from usepy.string._get_words import get_words
+
+    if isinstance(string, bytes):
+        string = string.decode('utf-8')
+
+    words = get_words(string)
+    return ' '.join(word.lower() for word in words)
diff --git a/src/usepy/string/middle.py b/src/usepy/string/middle.py
@@ -0,0 +1,27 @@
+from typing import Optional
+
+
+def middle(
+        original_str: str, start_str: Optional[str] = None, end_str: Optional[str] = None
+) -> Optional[str]:
+    """
+    Get the substring between two given substrings in a string.
+
+    Args:
+        original_str (str): The original string to search in.
+        start_str (Optional[str], optional): The substring to start the search from.
+            If not provided, the search starts from the beginning of the string.
+        end_str (Optional[str], optional): The substring to end the search at.
+            If not provided, the search ends at the end of the string.
+
+    Returns:
+        Optional[str]: The substring between the start and end substrings, or None if not found.
+
+    Examples:
+        >>> middle('abc123def', 'abc', 'def')
+        '123'
+    """
+    from usepy.string._get_section import get_section
+
+    result, _, _ = get_section(original_str, start_str, end_str)
+    return result
diff --git a/src/usepy/string/middle_batch.py b/src/usepy/string/middle_batch.py
@@ -0,0 +1,47 @@
+from typing import Optional, List
+
+
+def middle_batch(
+        original_str: str,
+        start_str: Optional[str] = None,
+        end_str: Optional[str] = None,
+        max_count: Optional[int] = None,
+) -> List[str]:
+    """
+    Get a list of substrings between two given substrings in a string.
+
+    Args:
+        original_str (str): The original string to search in.
+        start_str (Optional[str], optional): The substring to start the search from.
+            If not provided, the search starts from the beginning of the string.
+        end_str (Optional[str], optional): The substring to end the search at.
+            If not provided, the search ends at the end of the string.
+        max_count (Optional[int], optional): The maximum number of substrings to return.
+            If not provided or set to None, all substrings will be returned.
+
+    Returns:
+        List[str]: A list of substrings between the start and end substrings.
+
+    Examples:
+        >>> middle_batch('abc123def456abc789def', 'abc', 'def')
+        ['123', '789']
+        >>> middle_batch('abc123def456abc789def', 'abc', 'def', 1)
+        ['123']
+    """
+    from usepy.string._get_section import get_section
+
+    result = []
+    original_str_copy = original_str
+
+    while True:
+        substring, start_index, end_index = get_section(original_str_copy, start_str, end_str)
+        if substring is None:
+            break
+
+        result.append(substring)
+        original_str_copy = original_str_copy[end_index + len(end_str or ""):]
+
+    if max_count is not None:
+        return result[:max_count]
+    else:
+        return result
diff --git a/src/usepy/string/pascal_case.py b/src/usepy/string/pascal_case.py
@@ -0,0 +1,34 @@
+from typing import Union
+
+from usepy.string.capitalize import capitalize
+
+
+def pascal_case(string: Union[str, bytes]) -> str:
+    """
+    Converts a string to Pascal case.
+
+    Pascal case is the naming convention in which each word is capitalized and concatenated without any separator characters.
+
+    Args:
+        string (Union[str, bytes]): The input string to be converted to Pascal case.
+
+    Returns:
+        str: The converted string in Pascal case.
+
+    Examples:
+        >>> pascal_case('pascalCase')
+        'PascalCase'
+        >>> pascal_case('some whitespace')
+        'SomeWhitespace'
+        >>> pascal_case('hyphen-text')
+        'HyphenText'
+        >>> pascal_case('HTTPRequest')
+        'HttpRequest'
+    """
+    from usepy.string._get_words import get_words
+
+    if isinstance(string, bytes):
+        string = string.decode('utf-8')
+
+    words = get_words(string)
+    return ''.join(capitalize(word) for word in words)
diff --git a/src/usepy/string/right.py b/src/usepy/string/right.py
@@ -0,0 +1,22 @@
+from typing import Optional
+
+
+def right(original_str: str, start_str: str) -> Optional[str]:
+    """
+    Get the substring to the right of a given substring in a string.
+
+    Args:
+        original_str (str): The original string to search in.
+        start_str (str): The substring to start the search from.
+
+    Returns:
+        Optional[str]: The substring to the right of the given start substring, or None if not found.
+
+    Examples:
+        >>> right('abc123def', 'abc')
+        '123def'
+    """
+    from usepy.string._get_section import get_section
+
+    result, *_ = get_section(original_str, start_str=start_str)
+    return result