Source code for simplebench.utils.filenames
"""Utility functions for file names."""
import re
from simplebench.exceptions import SimpleBenchTypeError, SimpleBenchValueError
from .exceptions import _UtilsErrorTag
# Finds all characters that are not a-z, A-Z, 0-9, _ (underline), or - (dash)
_SANITIZE_FILENAME_RE = re.compile(r'[^-a-zA-Z0-9_]+')
# Finds one or more sequential _ (underline) characters
_COLLAPSE_UNDERSCORES_RE = re.compile(r'_+')
[docs]
def sanitize_filename(name: str) -> str:
"""Sanitizes a filename by replacing invalid characters with _ (underline).
Only a-z, A-Z, 0-9, _ (underline), and - (dash) characters are allowed. All other
characters are replaced with _ and multiple sequential _ characters are then
collapsed to single _ characters. Leading and trailing _ and - characters are removed.
Examples:
.. code-block:: python3
sanitize_filename("My File-Name.txt") # returns "My_File-Name_txt"
sanitize_filename("Invalid/Chars\\In:Name*?") # returns "Invalid_Chars_In_Name"
sanitize_filename(" Leading and Trailing ") # returns "Leading_and_Trailing"
sanitize_filename("!!!") # returns "_"
.. note::
This function does not check for reserved filenames on any operating system.
It is the caller's responsibility to ensure the sanitized filename is valid
for the target filesystem.
If a filename becomes completely empty after sanitization, the function will return
a single underscore ('_') character. This is the one exception to the rule that
leading and trailing _ and - characters are removed.
:param name: The filename to sanitize.
:type name: str
:return: The sanitized filename.
:rtype: str
:raises SimpleBenchTypeError: If the ``name`` arg is not a str.
:raises SimpleBenchValueError: If the ``name`` arg is an empty string.
"""
if not isinstance(name, str):
raise SimpleBenchTypeError(
"name arg must be a str",
tag=_UtilsErrorTag.SANITIZE_FILENAME_INVALID_NAME_ARG_TYPE)
if name == '':
raise SimpleBenchValueError(
"name arg must not be an empty string",
tag=_UtilsErrorTag.SANITIZE_FILENAME_EMPTY_NAME_ARG)
first_pass: str = re.sub(_SANITIZE_FILENAME_RE, '_', name)
second_pass: str = re.sub(_COLLAPSE_UNDERSCORES_RE, '_', first_pass)
third_pass: str = second_pass.strip('_-')
return '_' if third_pass == '' else third_pass