From 834395c050dbce6cee00fe5b98fc3c7493a0ded7 Mon Sep 17 00:00:00 2001 From: brent saner Date: Tue, 6 Jan 2026 02:53:21 -0500 Subject: [PATCH] v1.15.2 ADDED: * Better docs for remap * Added returner convenience funcs for remap FIXED: * Proper resliced remap.ReMap.MapString --- remap/doc.go | 10 +- remap/funcs.go | 113 +++++++++++++++ remap/funcs_remap.go | 36 +++-- remap/funcs_remap_test.go | 279 ++++++++++++++++++++++++++++++++++++++ remap/types.go | 2 +- 5 files changed, 424 insertions(+), 16 deletions(-) create mode 100644 remap/funcs.go create mode 100644 remap/funcs_remap_test.go diff --git a/remap/doc.go b/remap/doc.go index c8dcfb1..90b7335 100644 --- a/remap/doc.go +++ b/remap/doc.go @@ -1,4 +1,12 @@ /* -Package remap provides convenience functions around regular expressions, primarily offering maps for named capture groups. +Package remap provides convenience functions around regular expressions, +primarily offering maps for named capture groups. + +It offers convenience equivalents of the following: + + * [regexp.Compile] ([Compile]) + * [regexp.CompilePOSIX] ([CompilePOSIX]) + * [regexp.MustCompile] ([MustCompile]) + * [regexp.MustCompilePOSIX] ([MustCompilePOSIX]) */ package remap diff --git a/remap/funcs.go b/remap/funcs.go new file mode 100644 index 0000000..86e93b4 --- /dev/null +++ b/remap/funcs.go @@ -0,0 +1,113 @@ +package remap + +import ( + "regexp" +) + +/* +Compile is a convenience shorthand for: + + var err error + var r *remap.ReMap = new(remap.ReMap) + + if r.Regexp, err = regexp.Compile(expr); err != nil { + // ... + } + +It corresponds to [regexp.Compile]. +*/ +func Compile(expr string) (r *ReMap, err error) { + + var p *regexp.Regexp + + if p, err = regexp.Compile(expr); err != nil { + return + } + + r = &ReMap{ + Regexp: p, + } + + return +} + +/* +CompilePOSIX is a convenience shorthand for: + + var err error + var r *remap.ReMap = new(remap.ReMap) + + if r.Regexp, err = regexp.CompilePOSIX(expr); err != nil { + // ... + } + +It corresponds to [regexp.CompilePOSIX]. +*/ +func CompilePOSIX(expr string) (r *ReMap, err error) { + + var p *regexp.Regexp + + if p, err = regexp.CompilePOSIX(expr); err != nil { + return + } + + r = &ReMap{ + Regexp: p, + } + + return +} + +/* +MustCompile is a convenience shorthand for: + + var r *remap.ReMap = &remap.ReMap{ + Regexp: regexp.MustCompile(expr), + } + +It corresponds to [regexp.MustCompile]. +*/ +func MustCompile(expr string) (r *ReMap) { + + var err error + var p *regexp.Regexp + + // We panic ourselves instead of wrapping regexp.MustCompile. + // Makes debuggers a little more explicit. + if p, err = regexp.Compile(expr); err != nil { + panic(err) + } + + r = &ReMap{ + Regexp: p, + } + + return +} + +/* +MustCompilePOSIX is a convenience shorthand for: + + var r *remap.ReMap = &remap.ReMap{ + Regexp: regexp.MustCompilePOSIX(expr), + } + +It corresponds to [regexp.MustCompilePOSIX]. +*/ +func MustCompilePOSIX(expr string) (r *ReMap) { + + var err error + var p *regexp.Regexp + + // We panic ourselves instead of wrapping regexp.MustCompilePOSIX. + // Makes debuggers a little more explicit. + if p, err = regexp.CompilePOSIX(expr); err != nil { + panic(err) + } + + r = &ReMap{ + Regexp: p, + } + + return +} diff --git a/remap/funcs_remap.go b/remap/funcs_remap.go index 51fddea..f1749cf 100644 --- a/remap/funcs_remap.go +++ b/remap/funcs_remap.go @@ -5,6 +5,8 @@ Map returns a map[string][] for regexes with named capture groups m Note that this supports non-unique group names; [regexp.Regexp] allows for patterns with multiple groups using the same group name (though your IDE might complain; I know GoLand does). +It will panic if the embedded [regexp.Regexp] is nil. + Each match for each group is in a slice keyed under that group name, with that slice ordered by the indexing done by the regex match itself. @@ -87,7 +89,7 @@ In detail, matches and/or its values may be nil or empty under the following con IF inclNoMatch is true IF inclNoMatchStrict is true THEN matches[] is defined and non-nil, but populated with placeholder nils - (matches[] == [][]byte{nil[, nil...]}) + (matches[] == [][]byte{nil[, nil, ...]}) ELSE THEN matches[] is guaranteed defined but may be nil (_, ok = matches[]; ok == true) ELSE @@ -109,7 +111,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m return } - names = r.Regexp.SubexpNames() + names = r.Regexp.SubexpNames()[:] matchBytes = r.Regexp.FindSubmatch(b) if matchBytes == nil { @@ -204,13 +206,15 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m } /* -MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead. +MapString is exactly like [ReMap.Map], but operates on (and returns) strings instead. (matches will always be nil if s == “.) -A small deviation, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified). +It will panic if the embedded [regexp.Regexp] is nil. + +A small deviation and caveat, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified). This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either -*not* use inclNoMatchStrict or to use ReMap.Map() instead and convert any non-nil values to strings after. +*not* use inclNoMatchStrict or to use [ReMap.Map] instead and convert any non-nil values to strings after. Particularly: @@ -233,7 +237,7 @@ is provided but s does not match then matches will be: # inclNoMatchStrict If true (and inclNoMatch is true), instead of a single nil the group's values will be -a slice of eempty string values explicitly matching the number of times the group name is specified +a slice of empty string values explicitly matching the number of times the group name is specified in the pattern. For example, if a pattern: @@ -290,8 +294,8 @@ In detail, matches and/or its values may be nil or empty under the following con IF does not have a match IF inclNoMatch is true IF inclNoMatchStrict is true - THEN matches[] is defined and non-nil, but populated with placeholder nils - (matches[] == []string{""[, ""...]}) + THEN matches[] is defined and non-nil, but populated with placeholder strings + (matches[] == []string{""[, "", ...]}) ELSE THEN matches[] is guaranteed defined but may be nil (_, ok = matches[]; ok == true) ELSE @@ -334,7 +338,8 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo return } /* - I'm not entirely sure how serious they are about "the slice should not be modified"... + I'm not entirely sure how serious they are about + "the slice should not be modified"... DO NOT sort or dedupe `names`! If the same name for groups is duplicated, it will be duplicated here in proper order and the ordering is tied to @@ -351,7 +356,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo return } - if names == nil || len(names) <= 1 { + if names == nil || len(names) == 0 || len(names) == 1 { /* No named capture groups; technically only the last condition would be the case, @@ -363,6 +368,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo } return } + names = names[1:] if len(matchIndices) == 0 || len(matchIndices) == 1 { /* @@ -385,15 +391,15 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo } return } + matchIndices = matchIndices[2:] /* - A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:]) + A reslice of `matchIndices` starts at 2 (as long as `names` is sliced [1:]) because they're in pairs: []int{, , , , ...} and the first pair is the entire pattern match (un-resliced names[0]). - Thus the len(matchIndices) == 2*len(names), *even* if you + Thus the len(matchIndices) == 2*len(names), *even* if you reslice. Keep in mind that since the first element of names is removed, - the first pair here is skipped. - This provides a bit more consistent readability, though. + we reslices matchIndices as well (above). */ idxChunks = make([][]int, len(names)) chunkIdx = 0 @@ -411,6 +417,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo // group did not match chunkIndices = nil } else { + // single character if chunkIndices[0] == chunkIndices[1] { chunkIndices = []int{chunkIndices[0]} } else { @@ -432,6 +439,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo (which is either an *unnamed* capture group OR the first element in `names`, which is always the entire match). + (We reslice out the latter.) */ if grpNm == "" { continue diff --git a/remap/funcs_remap_test.go b/remap/funcs_remap_test.go new file mode 100644 index 0000000..9a25fbf --- /dev/null +++ b/remap/funcs_remap_test.go @@ -0,0 +1,279 @@ +package remap + +import ( + `fmt` + `reflect` + `regexp` + "testing" +) + +type ( + testMatcher struct { + Nm string + S string + M *ReMap + Expected map[string][][]byte + ExpectedStr map[string][]string + ParamInclNoMatch bool + ParamInclNoMatchStrict bool + ParamInclMustMatch bool + } +) + +func TestRemap(t *testing.T) { + + var matches map[string][][]byte + + for midx, m := range []testMatcher{ + testMatcher{ + Nm: "No matches", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: nil, + }, + testMatcher{ + Nm: "Single mid match", + S: "This contains a single match in the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch)\s+`)}, + Expected: map[string][][]byte{ + "g1": [][]byte{[]byte("match")}, + }, + }, + testMatcher{ + Nm: "multi mid match", + S: "This contains a single match and another match in the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another (?Pmatch)\s+`)}, + Expected: map[string][][]byte{ + "g1": [][]byte{ + []byte("match"), + []byte("match"), + }, + }, + }, + testMatcher{ + Nm: "line match", + S: "This\ncontains a\nsingle\nmatch\non a dedicated line", + M: &ReMap{regexp.MustCompile(`(?m)^(?Pmatch)$`)}, + Expected: map[string][][]byte{ + "g1": [][]byte{ + []byte("match"), + }, + }, + }, + testMatcher{ + Nm: "multiline match", + S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another\s+(?Pmatch)\s+`)}, + Expected: map[string][][]byte{ + "g1": [][]byte{ + []byte("match"), + []byte("match"), + }, + }, + }, + } { + matches = m.M.Map([]byte(m.S), false, false, false) + t.Logf( + "#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n", + midx+1, + m.S, + m.M.Regexp.String(), + testBmapToStrMap(matches), + ) + if !reflect.DeepEqual(matches, m.Expected) { + t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.Expected, matches) + } + } + +} + +func TestRemapParams(t *testing.T) { + + var matches map[string][][]byte + + for midx, m := range []testMatcher{ + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: nil, + ParamInclNoMatch: false, + ParamInclNoMatchStrict: false, + ParamInclMustMatch: false, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: nil, + ParamInclNoMatch: false, + ParamInclNoMatchStrict: true, + ParamInclMustMatch: false, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: nil, + ParamInclNoMatch: false, + ParamInclNoMatchStrict: true, + ParamInclMustMatch: true, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: nil, + ParamInclNoMatch: false, + ParamInclNoMatchStrict: false, + ParamInclMustMatch: true, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: make(map[string][][]byte), + ParamInclNoMatch: true, + ParamInclNoMatchStrict: false, + ParamInclMustMatch: false, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: make(map[string][][]byte), + ParamInclNoMatch: true, + ParamInclNoMatchStrict: true, + ParamInclMustMatch: false, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: make(map[string][][]byte), + ParamInclNoMatch: true, + ParamInclNoMatchStrict: true, + ParamInclMustMatch: true, + }, + testMatcher{ + Nm: "", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + Expected: make(map[string][][]byte), + ParamInclNoMatch: true, + ParamInclNoMatchStrict: false, + ParamInclMustMatch: true, + }, + } { + matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch) + t.Logf( + "%d: %v/%v/%v: %#v\n", + midx+1, m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch, matches, + ) + if !reflect.DeepEqual(matches, m.Expected) { + t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.ExpectedStr, matches) + } + } + +} + +func TestRemapString(t *testing.T) { + + var matches map[string][]string + + for midx, m := range []testMatcher{ + testMatcher{ + Nm: "No matches", + S: "this is a test", + M: &ReMap{regexp.MustCompile(``)}, + ExpectedStr: nil, + }, + testMatcher{ + Nm: "Single mid match", + S: "This contains a single match in the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch)\s+`)}, + ExpectedStr: map[string][]string{ + "g1": []string{"match"}, + }, + }, + testMatcher{ + Nm: "multi mid match", + S: "This contains a single match and another match in the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another (?Pmatch)\s+`)}, + ExpectedStr: map[string][]string{ + "g1": []string{ + "match", + "match", + }, + }, + }, + testMatcher{ + Nm: "line match", + S: "This\ncontains a\nsingle\nmatch\non a dedicated line", + M: &ReMap{regexp.MustCompile(`(?m)^(?Pmatch)$`)}, + ExpectedStr: map[string][]string{ + "g1": []string{ + "match", + }, + }, + }, + testMatcher{ + Nm: "multiline match", + S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another\s+(?Pmatch)\s+`)}, + ExpectedStr: map[string][]string{ + "g1": []string{ + "match", + "match", + }, + }, + }, + } { + matches = m.M.MapString(m.S, false, false, false) + t.Logf( + "#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n", + midx+1, + m.S, + m.M.Regexp.String(), + testSmapToStrMap(matches), + ) + if !reflect.DeepEqual(matches, m.ExpectedStr) { + t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.ExpectedStr, matches) + } + } + +} + +func testBmapToStrMap(bmap map[string][][]byte) (s string) { + + if bmap == nil { + return + } + + s = "\n" + for k, v := range bmap { + s += fmt.Sprintf("\t%s\n", k) + for _, i := range v { + s += fmt.Sprintf("\t\t%s\n", string(i)) + } + } + + return +} + +func testSmapToStrMap(smap map[string][]string) (s string) { + + if smap == nil { + return + } + + s = "\n" + for k, v := range smap { + s += fmt.Sprintf("\t%s\n", k) + for _, i := range v { + s += fmt.Sprintf("\t\t%s\n", i) + } + } + + return +} diff --git a/remap/types.go b/remap/types.go index dba0f0e..c54fc37 100644 --- a/remap/types.go +++ b/remap/types.go @@ -5,7 +5,7 @@ import ( ) type ( - // ReMap provides some map-related functions around a regexp.Regexp. + // ReMap provides some map-related functions around a [regexp.Regexp]. ReMap struct { *regexp.Regexp }