FIXED:
* paths: Async searching works correctly now, and is consolidated to a
  single struct for searching options for async and synchronous
  searches.
This commit is contained in:
brent saner 2024-11-18 17:36:14 -05:00
parent eefe02afaf
commit c6efc2d83c
Signed by: bts
GPG Key ID: 8C004C2F93481F6B
6 changed files with 383 additions and 187 deletions

11
fsutils/errs.go Normal file
View File

@ -0,0 +1,11 @@
package fsutils

import (
`syscall`
)

var (
// Yes, I know. "Why ENOTTY?" I don't know, ask Linus.
// If you see "inappropriate ioctl for device", it's this'un.
ErrFsAttrsUnsupported error = syscall.ENOTTY
)

View File

@ -17,6 +17,17 @@ const (
modeAnyExceptRegular pathMode = modeDir | modeSymlink | modePipe | modeSocket | modeDev | modeCharDev | modeIrregular
)

// Miss reasons
const (
MissNoMiss missReason = ""
MissNoMeta missReason = "Could not determine metadata"
MissBadBase missReason = "Base name does not match BasePtrn"
MissBadPath missReason = "Path does not match PathPtrn"
MissBadTime missReason = "Time(s) does not/do not match Age"
MissFile missReason = "Object is a file and NoFiles is set"
MissType missReason = "Object does not match TargetType"
)

// Times
const TimeAny pathTimeType = 0
const (

12
paths/errs.go Normal file
View File

@ -0,0 +1,12 @@
package paths

import (
`errors`
)

var (
ErrNilErrChan error = errors.New("an initialized error channel is required")
ErrNilMatchChan error = errors.New("an initialized matches channel is required")
ErrNilMismatchChan error = errors.New("an initialized mismatches channel is required")
ErrNilWg error = errors.New("a non-nil sync.WaitGroup is required")
)

View File

@ -26,8 +26,7 @@ import (
"os"
"os/user"
"path/filepath"
`regexp`
`slices`
`sort`
"strings"
`sync`
`time`
@ -35,7 +34,6 @@ import (
// "syscall"

`github.com/djherbis/times`
`golang.org/x/sync/semaphore`
`r00t2.io/goutils/bitmask`
)

@ -277,86 +275,33 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error)
return
}

/*
SearchPaths gets a file/directory path list based on the provided criteria.
// SearchFsPaths gets a file/directory/etc. path list based on the provided criteria.
func SearchFsPaths(matcher FsSearchCriteria) (found, miss []*FsSearchResult, err error) {

targetType defines what should be included in the path list.
It can consist of one or more (io/)fs.FileMode types OR'd together
(ensure they are part of (io/)fs.ModeType).
(You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.)
var matched *FsSearchResult
var missed *FsSearchResult

noFiles, if true, will explicitly filter out regular files from the path results.
(Normally they are *always* included regardless of targetType.)

basePtrn may be nil; if it isn't, it will be applied to *base names*
(that is, quux.txt rather than /foo/bar/baz/quux.txt).

pathPtrn is like basePtrn except it applies to the *entire* path,
not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt,
not just quux.txt).

If age is not nil, it will be applied to the path object.
It will match older files/directories/etc. if olderThan is true,
otherwise it will match newer files/directories/etc.
(olderThan is not used otherwise.)

ageType is one or more Time* constants OR'd together to describe which timestamp type to check.
(Note that TimeCreated may not match if specified as it is only available on certain OSes,
kernel versions, and filesystems. This may lead to files being excluded that may have otherwise
been included.)
(You can use TimeAny to specify any supported time.)
*Any* matching timestamp of all specified (and supported) timestamp types matches,
so be judicious with your selection. They are processed in order of:

* btime (birth/creation time) (if supported)
* mtime (modification time -- contents have changed)
* ctime (OS-specific behavior; generally disk metadata has changed) (if supported)
* atime (access time)

olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.

now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
*/
func SearchFsPaths(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (foundPaths []string, err error) {

if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}

if err = RealPath(&root); err != nil {
if err = RealPath(&matcher.Root); err != nil {
return
}

if err = filepath.WalkDir(
root,
matcher.Root,
func(path string, d fs.DirEntry, inErr error) (outErr error) {

var include bool

if inErr != nil {
outErr = inErr
return
}

if include, outErr = filterPath(
path, d,
targetType, noFiles,
basePtrn, pathPtrn,
age, ageType, olderThan, now,
); outErr != nil {
if matched, missed, outErr = matcher.Match(path, d, nil); outErr != nil {
return
}

if include {
foundPaths = append(foundPaths, path)
if matched != nil && !matcher.NoMatch {
found = append(found, matched)
}
if missed != nil && !matcher.NoMismatch {
miss = append(miss, missed)
}

return
@ -365,8 +310,18 @@ func SearchFsPaths(
return
}

if found == nil || len(found) == 0 {
return
}

// And sort them.
slices.Sort(foundPaths)
sort.Slice(
found,
func(i, j int) (isLess bool) {
isLess = found[i].Path < found[j].Path
return
},
)

return
}
@ -375,162 +330,107 @@ func SearchFsPaths(
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
workers for the filtering logic instead of performing iteratively/recursively.
It may, in some cases, be *slightly more* performant and *slightly less* in others.
Additional options are documented below.
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
guaranteed to be in any predictable order.
Note that unlike SearchFsPaths, the results written to the
FsSearchCriteriaAsync.ResChan are not guaranteed to be in any predictable order.

All channels are expected to have been initialized by the caller ahead of time,
and all provided channels will be closed upon completion (so they are only safe
to READ from after invoking SearchFsPathsAsync).

foundPathsChan is a channel to which matched filepaths will be written.

sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
(semCtx is the context.Context used for sem when acquiring. It may be nil;
one will be locally created if so.)
The default will be to spawn all filtering logic concurrently.
For very large directories, you almost assuredly do not want that -- it
can cause a significant amount of I/O and CPU wait.
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)

wg *must not* be nil, and must be managed by the caller.
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.

errChan will receive any/all encountered errors.
All channels are expected to have already been initialized by the caller.
They will not be closed by this function.
*/
func SearchFsPathsAsync(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
foundPathsChan chan string,
sem *semaphore.Weighted, semCtx context.Context,
wg *sync.WaitGroup,
errChan chan error,
) {
func SearchFsPathsAsync(matcher FsSearchCriteriaAsync) {

var err error
var localWg sync.WaitGroup
var wgLocal sync.WaitGroup
var doneChan chan bool = make(chan bool, 1)

if wg == nil {
if matcher.ErrChan == nil {
panic(ErrNilErrChan)
return
}

if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
if matcher.WG == nil {
matcher.ErrChan <- ErrNilWg
return
}

if sem != nil && semCtx == nil {
semCtx = context.Background()
defer matcher.WG.Done()

if matcher.ResChan == nil && !matcher.NoMatch {
matcher.ErrChan <- ErrNilMatchChan
return
}
if matcher.MismatchChan == nil && !matcher.NoMismatch {
matcher.ErrChan <- ErrNilMismatchChan
return
}

if err = RealPath(&matcher.Root); err != nil {
matcher.ErrChan <- err
return
}

if matcher.Semaphore != nil && matcher.SemaphoreCtx == nil {
matcher.SemaphoreCtx = context.Background()
}

if err = filepath.WalkDir(
root,
matcher.Root,
func(path string, de fs.DirEntry, inErr error) (outErr error) {
localWg.Add(1)
wg.Add(1)
if sem != nil {
if err = sem.Acquire(semCtx, 1); err != nil {

if inErr != nil {
inErr = filterNoFileDir(inErr)
if inErr != nil {
outErr = inErr
return
}
}

wgLocal.Add(1)
if matcher.Semaphore != nil {
if err = matcher.Semaphore.Acquire(matcher.SemaphoreCtx, 1); err != nil {
return
}
}

go func(p string, d fs.DirEntry) {
var pErr error
var pInclude bool
var pResMatch *FsSearchResult
var pResMiss *FsSearchResult

defer localWg.Done()
defer wg.Done()
defer wgLocal.Done()

if sem != nil {
defer sem.Release(1)
if matcher.Semaphore != nil {
defer matcher.Semaphore.Release(1)
}

if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
errChan <- pErr
if pResMatch, pResMiss, pErr = matcher.Match(p, d, nil); pErr != nil {
matcher.ErrChan <- pErr
return
}

if pInclude {
foundPathsChan <- p
if pResMatch != nil && !matcher.NoMatch {
matcher.ResChan <- pResMatch
}
if pResMiss != nil && !matcher.NoMismatch {
matcher.MismatchChan <- pResMiss
}
}(path, de)

return
},
); err != nil {
errChan <- err
return
err = filterNoFileDir(err)
if err != nil {
matcher.ErrChan <- err
return
}
}

go func() {
localWg.Wait()
close(foundPathsChan)
close(errChan)
wgLocal.Wait()
doneChan <- true
}()

return
}

// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
func filterPath(
path string, d fs.DirEntry,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (include bool, err error) {

var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))

if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}

// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}

// age
if age != nil {
if tspec, err = times.Stat(path); err != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, now) {
return
}
}

// fs object type (file, dir, etc.)
if fi, err = d.Info(); err != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
} else if typeMode != 0 {
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
return
}
}

include = true
<-doneChan

return
}
@ -597,3 +497,13 @@ func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType

return
}

func filterNoFileDir(err error) (filtered error) {

filtered = err
if errors.Is(err, fs.ErrNotExist) {
filtered = nil
}

return
}

View File

@ -0,0 +1,125 @@
package paths

import (
`io/fs`
`os`
`path/filepath`
`time`

`github.com/djherbis/times`
`r00t2.io/goutils/bitmask`
)

/*
Match returns match (a ptr to a FsSearchResult if the specified path matches, otherwise nil),
miss (ptr the specified path does not match, otherwise nil), and an fs.DirEntry and fs.FileInfo
for path. d and/or fi may be nil.

If err is not nil, it represents an unexpected error and as such, both match and miss should be nil.

Match, miss, and err will all be nil if the filesystem object/path does not exist.
*/
func (f *FsSearchCriteria) Match(path string, d fs.DirEntry, fi fs.FileInfo) (match, miss *FsSearchResult, err error) {

var typeMode fs.FileMode
var m FsSearchResult
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(f.TargetType))

m = FsSearchResult{
Path: path,
DirEntry: d,
FileInfo: fi,
Criteria: f,
}

if f == nil {
return
}

// A DirEntry can be created from a FileInfo but not vice versa.
if m.FileInfo == nil {
if m.DirEntry != nil {
if m.FileInfo, err = m.DirEntry.Info(); err != nil {
err = filterNoFileDir(err)
if err != nil {
return
}
}
} else {
if f.FollowSymlinks {
if m.FileInfo, err = os.Stat(path); err != nil {
err = filterNoFileDir(err)
if err != nil {
return
}
}
} else {
if m.FileInfo, err = os.Lstat(path); err != nil {
err = filterNoFileDir(err)
if err != nil {
return
}
}
}
m.DirEntry = fs.FileInfoToDirEntry(m.FileInfo)
}
}
if m.DirEntry == nil {
m.DirEntry = fs.FileInfoToDirEntry(m.FileInfo)
}
if m.DirEntry == nil || m.FileInfo == nil {
m.MissReason = MissNoMeta
miss = &m
return
}

if m.Times, err = times.Stat(path); err != nil {
err = filterNoFileDir(err)
if err != nil {
return
}
}

if f.PathPtrn != nil && !f.PathPtrn.MatchString(path) {
m.MissReason = MissBadPath
miss = &m
return
}
if f.BasePtrn != nil && !f.BasePtrn.MatchString(filepath.Base(path)) {
m.MissReason = MissBadBase
miss = &m
return
}

// age
if f.Age != nil {
if f.Now == nil {
f.Now = new(time.Time)
*f.Now = time.Now()
}
if !filterTimes(m.Times, f.Age, &f.AgeType, f.OlderThan, f.Now) {
m.MissReason = MissBadTime
miss = &m
return
}
}

// fs object type (file, dir, etc.)
typeMode = m.FileInfo.Mode().Type()
if typeMode == 0 && f.NoFiles {
m.MissReason = MissFile
miss = &m
return
} else if typeMode != 0 {
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
m.MissReason = MissType
miss = &m
return
}
}

// If it gets to here, it matches.
match = &m

return
}

View File

@ -1,9 +1,136 @@
package paths

import (
`context`
`io/fs`
`regexp`
`sync`
`time`

`github.com/djherbis/times`
`golang.org/x/sync/semaphore`
`r00t2.io/goutils/bitmask`
)

// FsSearchCriteria contains filter criteria for SearchFsPaths* functions.
type FsSearchCriteria struct {
// Root indicates the root to search.
Root string `json:"root" toml:"RootPath" yaml:"Root Path" xml:"root,attr" validate:"dir"`
// NoMatch, if true, will not return matches. If NoMatch and NoMismatch are both true, no results will be returned.
NoMatch bool `json:"no_match" toml:"NoMatch" yaml:"No Matches" xml:"noMatch,attr"`
// NoMismatch, if true, will not return mismatches. If NoMatch and NoMismatch are both true, no results will be returned.
NoMismatch bool `json:"no_miss" toml:"NoMismatch" yaml:"No Mismatches" xml:"noMiss,attr"`
/*
TargetType defines what types of filesystem objects should be matched.
It can consist of one or more (io/)fs.FileMode types OR'd together
(ensure they are part of (io/)fs.ModeType).
(You can use 0 to match regular files explicitly, and/or NoFiles = true to exclude them.)
*/
TargetType fs.FileMode `json:"type_tgt" toml:"TargetType" yaml:"Target Type" xml:"typeTgt,attr"`
// NoFiles excludes files from TargetType-matching (as there isn't a way to explicitly exclude files otherwise if a non-zero mode is given).
NoFiles bool `json:"no_file" toml:"ExcludeFiles" yaml:"Exclude Files" xml:"noFile,attr"`
// FollowSymlinks, if true and a path being tested is a symlink, will use metadata (age, etc.) of the symlink itself rather than the link target.
FollowSymlinks bool `json:"follow_sym" toml:"FollowSymlinks" yaml:"Follow Symlinks" xml:"followSym,attr"`
// BasePtrn, if specified, will apply to the *base name (that is, quux.txt rather than /foo/bar/baz/quux.txt). See also PathPtrn.
BasePtrn *regexp.Regexp `json:"ptrn_base,omitempty" toml:"BaseNamePattern,omitempty" yaml:"Base Name Pattern,omitempty" xml:"ptrnBase,attr,omitempty"`
// PathPtrn, if specified, will apply to the *full path* (e.g. /foo/bar/baz/quux.txt, not just quux.txt). See also BasePtrn.
PathPtrn *regexp.Regexp `json:"ptrn_path,omitempty" toml:"PathPattern,omitempty" yaml:"Path Pattern,omitempty" xml:"ptrnPath,attr,omitempty"`
/*
Age, if specified, indicates the comparison of Now againt the AgeType of filesystem objects.
Use OlderThan to indicate if it should be older or newer.
*/
Age *time.Duration `json:"age,omitempty" toml:"Age,omitempty" yaml:"Age,omitempty" xml:"age,attr,omitempty"`
/*
AgeType can be one (or more, OR'd together) of the Time* constants in this package (TimeAny, TimeAccessed, TimeCreated,
TimeChanged, TimeModified) to indicate what timestamp(s) to use for comparing Age.

The zero-value is TimeAny.

The first matching timestamp will pass all time comparisons.
Be mindful of timestamp type support/limitations per OS/filesystem of Root.

Completely unused if Age is nil.
*/
AgeType pathTimeType `json:"type_age" toml:"AgeType" yaml:"Age Type" xml:"typeAge,attr"`
/*
OlderThan, if true (and Age is not nil), indicates that matching filesystem objects should have their
AgeType older than Now. If false, their AgeType should be *newer* than Now.

Completely unused if Age is nil.
*/
OlderThan bool `json:"older" toml:"OlderThan" yaml:"Older Than" xml:"older,attr"`
/*
Now expresses a time to compare to Age via AgeType and OlderThan.
Note that it may be any valid time, not necessarily "now".
If Age is specified but Now is nil, it will be populated with time.Now() when the search is invoked.

Completely unused if Age is nil.
*/
Now *time.Time `json:"now,omitempty" toml:"Now,omitempty" yaml:"Now,omitempty" xml:"now,attr,omitempty"`
}

// FsSearchCriteriaAsync extends FsSearchCriteria for use in an asynchronous (goroutine) manner.
type FsSearchCriteriaAsync struct {
FsSearchCriteria
/*
WG should be a non-nil pointer to a sync.WaitGroup.
This is used to manage searching completion to the caller.

.Done() will be called once within the search function, but no .Add() will be called;
.Add() should be done by the caller beforehand.
*/
WG *sync.WaitGroup
// ResChan must be a non-nil channel for (positive) match results to be sent to.
ResChan chan *FsSearchResult
// MismatchChan, if not nil, will have negative matches/"misses" sent to it.
MismatchChan chan *FsSearchResult
/*
ErrChan should be a non-nil error channel for any unexpected errors encountered.

If nil, a panic will be raised.
*/
ErrChan chan error
/*
Semaphore is completely optional, but if non-nil
it will be used to limit concurrent filesystem
object processing.

It is generally a Very Good Idea(TM) to use this,
as the default is to dispatch all processing concurrently.
This can lead to some heavy I/O and CPU wait.

(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)
*/
Semaphore *semaphore.Weighted
/*
SemaphoreCtx is the context.Context to use for Semaphore.
If nil (but Sempaphore is not), one will be created locally/internally.
*/
SemaphoreCtx context.Context
}

// FsSearchResult contains a match/miss result for FsSearchCriteria and FsSearchCriteriaAsync.
type FsSearchResult struct {
/*
Path is the path to the object on the filesystem.
It may or may not exist at the time of return,
but will not be an empty string.
*/
Path string `json:"path" toml:"Path" yaml:"Path" xml:"path,attr"`
// DirEntry is the fs.DirEntry for the Path; note that .Name() is the base name only. TODO: serialization?
DirEntry fs.DirEntry `json:"-" toml:"-" yaml:"-" xml:"-"`
// FileInfo is the fs.FileInfo for the Path; note that .Name() is the base name only. TODO: serialization?
FileInfo fs.FileInfo `json:"-" toml:"-" yaml:"-" xml:"-"`
// Criteria is the evaluated criteria specified that this FsSearchResult matched.
Criteria *FsSearchCriteria `json:"criteria" toml:"Criteria" yaml:"Criteria" xml:"criteria"`
// Times holds the mtime, ctime, etc. of the filesystem object (where supported). TODO: serialization?
Times times.Timespec `json:"-" toml:"-" yaml:"-" xml:"-"`
// MissReason contains the reason the result is a miss (MissNoMiss if a match); see the Miss* constants.
MissReason missReason `json:"miss_reason" toml:"MissReason" yaml:"Miss Reason" xml:"miss,attr"`
}

type missReason string

type pathMode bitmask.MaskBit

type pathTimeType bitmask.MaskBit