import os
import re
import glob
from warnings import warn
[docs]def parse_directory(directory, names, extensions=('tif', 'tiff', 'TIF', 'TIFF')):
"""
Extract necessary filenames
Args:
directory: str
names: tuple of str, band or file names, e.g. ['RED', '101']
extensions: tuple of str, allowable file extensions
Returns:
list of matched paths
"""
paths = glob.glob(os.path.join(directory, '*'))
extensions = '|'.join(extensions)
res = []
for name in names:
# the channel name must be either full filename (that is, ./RED.tif) or a part after '_' (./dse_channel_RED.tif)
pattern = '.*({}|_)({})\.({})$'.format(os.sep, name, extensions)
band_path = [path for path in paths if re.match(pattern, path) is not None]
# Normally with our datasets it will never be the case, and may indicate wrong file naming
if len(band_path) > 1:
warn(RuntimeWarning(
"There are multiple files matching the channel {}. "
"It can cause ambiguous behavior later.".format(name)))
res += band_path
return res