33  FS

Python functions similar to R’s {fs}

import pandas as pd
from siuba import _, group_by, mutate, select, filter
import fs

33.1 List Directory

from pathlib import Path

def dir_ls(path):
    return [p for p in Path(path).iterdir()]


dir_ls("d")
[PosixPath('d/test1.txt'), PosixPath('d/e2'), PosixPath('d/e1')]

33.1.1 Custom Implement fs.dir_ls()

d_paths = fs.dir_ls("d", recurse=True)
d_paths
[PosixPath('d/test1.txt'),
 PosixPath('d/e2'),
 PosixPath('d/e2/test3.txt'),
 PosixPath('d/e2/test3-cp.txt'),
 PosixPath('d/e1'),
 PosixPath('d/e1/test2.txt'),
 PosixPath('d/e1/test2-cp.txt'),
 PosixPath('d/e1/test2-copy.txt')]
d_paths_df = pd.DataFrame({"paths": d_paths})
d_paths_df
paths
0 d/test1.txt
1 d/e2
2 d/e2/test3.txt
3 d/e2/test3-cp.txt
4 d/e1
5 d/e1/test2.txt
6 d/e1/test2-cp.txt
7 d/e1/test2-copy.txt

33.2 File Name

from pathlib import Path
from typing import Union, List

def path_file(path: Union[str, Path, List[str], List[Path]]) -> Union[Path, List[Path]]:
    """Get filename(s) from path(s).

    Parameters
    ----------
    path : str, Path, or list of str/Path
        Single path or list of paths

    Returns
    -------
    Path or list of Path
        Filename(s) as PosixPath object(s)
    """
    if isinstance(path, (str, Path)):
        return Path(Path(path).name)
    return [Path(Path(p).name) for p in path]
print(d_paths_df['paths'][0])
path_file(d_paths_df['paths'][0])
d/test1.txt
PosixPath('test1.txt')
d_paths_df.assign(file_name = path_file(d_paths_df["paths"]))
paths file_name
0 d/test1.txt test1.txt
1 d/e2 e2
2 d/e2/test3.txt test3.txt
3 d/e2/test3-cp.txt test3-cp.txt
4 d/e1 e1
5 d/e1/test2.txt test2.txt
6 d/e1/test2-cp.txt test2-cp.txt
7 d/e1/test2-copy.txt test2-copy.txt

33.3 Copy file

import shutil
from pathlib import Path

def file_copy(src, dst):
    shutil.copy2(src, dst)
# Example usage:
file_copy("d/e1/test2.txt", "d/e1/test2-copy.txt")

33.3.1 Custom fs.file_copy()

fs.file_copy(path = ["d/e1/test2.txt", "d/e2/test3.txt"],
             new_path = ["d/e1/test2-cp.txt", "d/e2/test3-cp.txt"],
             overwrite = True)
fs.dir_ls("d", recurse=True)
[PosixPath('d/test1.txt'),
 PosixPath('d/e2'),
 PosixPath('d/e2/test3.txt'),
 PosixPath('d/e2/test3-cp.txt'),
 PosixPath('d/e1'),
 PosixPath('d/e1/test2.txt'),
 PosixPath('d/e1/test2-cp.txt'),
 PosixPath('d/e1/test2-copy.txt')]

33.4 Copy Folder

import shutil
from pathlib import Path

def dir_copy(src, dst):
    shutil.copytree(src, dst)
d_paths_df2 = d_paths_df.assign(is_file = d_paths_df["paths"].map(str).str.contains("\\.txt"))
d_paths_df2
paths is_file
0 d/test1.txt True
1 d/e2 False
2 d/e2/test3.txt True
3 d/e2/test3-cp.txt True
4 d/e1 False
5 d/e1/test2.txt True
6 d/e1/test2-cp.txt True
7 d/e1/test2-copy.txt True
d_paths_df3 = (d_paths_df2
               >> filter(_.is_file == False)
               >> mutate(new_paths=_.paths.map(str) + "_cp"))

d_paths_df3
paths is_file new_paths
1 d/e2 False d/e2_cp
4 d/e1 False d/e1_cp

33.4.1 Custom fs.dir_copy()

fs.dir_copy(d_paths_df3["paths"], d_paths_df3["new_paths"])