ADDED:
* Better docs for remap
* Added returner convenience funcs for remap

FIXED:
* Proper resliced remap.ReMap.MapString
This commit is contained in:
brent saner
2026-01-06 02:53:21 -05:00
parent ef56898d6b
commit 834395c050
5 changed files with 424 additions and 16 deletions

View File

@@ -1,4 +1,12 @@
/*
Package remap provides convenience functions around regular expressions, primarily offering maps for named capture groups.
Package remap provides convenience functions around regular expressions,
primarily offering maps for named capture groups.
It offers convenience equivalents of the following:
* [regexp.Compile] ([Compile])
* [regexp.CompilePOSIX] ([CompilePOSIX])
* [regexp.MustCompile] ([MustCompile])
* [regexp.MustCompilePOSIX] ([MustCompilePOSIX])
*/
package remap

113
remap/funcs.go Normal file
View File

@@ -0,0 +1,113 @@
package remap
import (
"regexp"
)
/*
Compile is a convenience shorthand for:
var err error
var r *remap.ReMap = new(remap.ReMap)
if r.Regexp, err = regexp.Compile(expr); err != nil {
// ...
}
It corresponds to [regexp.Compile].
*/
func Compile(expr string) (r *ReMap, err error) {
var p *regexp.Regexp
if p, err = regexp.Compile(expr); err != nil {
return
}
r = &ReMap{
Regexp: p,
}
return
}
/*
CompilePOSIX is a convenience shorthand for:
var err error
var r *remap.ReMap = new(remap.ReMap)
if r.Regexp, err = regexp.CompilePOSIX(expr); err != nil {
// ...
}
It corresponds to [regexp.CompilePOSIX].
*/
func CompilePOSIX(expr string) (r *ReMap, err error) {
var p *regexp.Regexp
if p, err = regexp.CompilePOSIX(expr); err != nil {
return
}
r = &ReMap{
Regexp: p,
}
return
}
/*
MustCompile is a convenience shorthand for:
var r *remap.ReMap = &remap.ReMap{
Regexp: regexp.MustCompile(expr),
}
It corresponds to [regexp.MustCompile].
*/
func MustCompile(expr string) (r *ReMap) {
var err error
var p *regexp.Regexp
// We panic ourselves instead of wrapping regexp.MustCompile.
// Makes debuggers a little more explicit.
if p, err = regexp.Compile(expr); err != nil {
panic(err)
}
r = &ReMap{
Regexp: p,
}
return
}
/*
MustCompilePOSIX is a convenience shorthand for:
var r *remap.ReMap = &remap.ReMap{
Regexp: regexp.MustCompilePOSIX(expr),
}
It corresponds to [regexp.MustCompilePOSIX].
*/
func MustCompilePOSIX(expr string) (r *ReMap) {
var err error
var p *regexp.Regexp
// We panic ourselves instead of wrapping regexp.MustCompilePOSIX.
// Makes debuggers a little more explicit.
if p, err = regexp.CompilePOSIX(expr); err != nil {
panic(err)
}
r = &ReMap{
Regexp: p,
}
return
}

View File

@@ -5,6 +5,8 @@ Map returns a map[string][]<match bytes> for regexes with named capture groups m
Note that this supports non-unique group names; [regexp.Regexp] allows for patterns with multiple groups
using the same group name (though your IDE might complain; I know GoLand does).
It will panic if the embedded [regexp.Regexp] is nil.
Each match for each group is in a slice keyed under that group name, with that slice
ordered by the indexing done by the regex match itself.
@@ -87,7 +89,7 @@ In detail, matches and/or its values may be nil or empty under the following con
IF inclNoMatch is true
IF inclNoMatchStrict is true
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
(matches[<group name>] == [][]byte{nil[, nil...]})
(matches[<group name>] == [][]byte{nil[, nil, ...]})
ELSE
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
ELSE
@@ -109,7 +111,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
return
}
names = r.Regexp.SubexpNames()
names = r.Regexp.SubexpNames()[:]
matchBytes = r.Regexp.FindSubmatch(b)
if matchBytes == nil {
@@ -204,13 +206,15 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
}
/*
MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead.
MapString is exactly like [ReMap.Map], but operates on (and returns) strings instead.
(matches will always be nil if s == “.)
A small deviation, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
It will panic if the embedded [regexp.Regexp] is nil.
A small deviation and caveat, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
*not* use inclNoMatchStrict or to use ReMap.Map() instead and convert any non-nil values to strings after.
*not* use inclNoMatchStrict or to use [ReMap.Map] instead and convert any non-nil values to strings after.
Particularly:
@@ -233,7 +237,7 @@ is provided but s does not match then matches will be:
# inclNoMatchStrict
If true (and inclNoMatch is true), instead of a single nil the group's values will be
a slice of eempty string values explicitly matching the number of times the group name is specified
a slice of empty string values explicitly matching the number of times the group name is specified
in the pattern.
For example, if a pattern:
@@ -290,8 +294,8 @@ In detail, matches and/or its values may be nil or empty under the following con
IF <group name> does not have a match
IF inclNoMatch is true
IF inclNoMatchStrict is true
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
(matches[<group name>] == []string{""[, ""...]})
THEN matches[<group name>] is defined and non-nil, but populated with placeholder strings
(matches[<group name>] == []string{""[, "", ...]})
ELSE
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
ELSE
@@ -334,7 +338,8 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
return
}
/*
I'm not entirely sure how serious they are about "the slice should not be modified"...
I'm not entirely sure how serious they are about
"the slice should not be modified"...
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
it will be duplicated here in proper order and the ordering is tied to
@@ -351,7 +356,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
return
}
if names == nil || len(names) <= 1 {
if names == nil || len(names) == 0 || len(names) == 1 {
/*
No named capture groups;
technically only the last condition would be the case,
@@ -363,6 +368,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
}
return
}
names = names[1:]
if len(matchIndices) == 0 || len(matchIndices) == 1 {
/*
@@ -385,15 +391,15 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
}
return
}
matchIndices = matchIndices[2:]
/*
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
A reslice of `matchIndices` starts at 2 (as long as `names` is sliced [1:])
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first pair is the entire pattern match (un-resliced names[0]).
Thus the len(matchIndices) == 2*len(names), *even* if you
Thus the len(matchIndices) == 2*len(names), *even* if you reslice.
Keep in mind that since the first element of names is removed,
the first pair here is skipped.
This provides a bit more consistent readability, though.
we reslices matchIndices as well (above).
*/
idxChunks = make([][]int, len(names))
chunkIdx = 0
@@ -411,6 +417,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
// group did not match
chunkIndices = nil
} else {
// single character
if chunkIndices[0] == chunkIndices[1] {
chunkIndices = []int{chunkIndices[0]}
} else {
@@ -432,6 +439,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
(which is either an *unnamed* capture group
OR the first element in `names`, which is always
the entire match).
(We reslice out the latter.)
*/
if grpNm == "" {
continue

279
remap/funcs_remap_test.go Normal file
View File

@@ -0,0 +1,279 @@
package remap
import (
`fmt`
`reflect`
`regexp`
"testing"
)
type (
testMatcher struct {
Nm string
S string
M *ReMap
Expected map[string][][]byte
ExpectedStr map[string][]string
ParamInclNoMatch bool
ParamInclNoMatchStrict bool
ParamInclMustMatch bool
}
)
func TestRemap(t *testing.T) {
var matches map[string][][]byte
for midx, m := range []testMatcher{
testMatcher{
Nm: "No matches",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: nil,
},
testMatcher{
Nm: "Single mid match",
S: "This contains a single match in the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match)\s+`)},
Expected: map[string][][]byte{
"g1": [][]byte{[]byte("match")},
},
},
testMatcher{
Nm: "multi mid match",
S: "This contains a single match and another match in the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another (?P<g1>match)\s+`)},
Expected: map[string][][]byte{
"g1": [][]byte{
[]byte("match"),
[]byte("match"),
},
},
},
testMatcher{
Nm: "line match",
S: "This\ncontains a\nsingle\nmatch\non a dedicated line",
M: &ReMap{regexp.MustCompile(`(?m)^(?P<g1>match)$`)},
Expected: map[string][][]byte{
"g1": [][]byte{
[]byte("match"),
},
},
},
testMatcher{
Nm: "multiline match",
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
Expected: map[string][][]byte{
"g1": [][]byte{
[]byte("match"),
[]byte("match"),
},
},
},
} {
matches = m.M.Map([]byte(m.S), false, false, false)
t.Logf(
"#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n",
midx+1,
m.S,
m.M.Regexp.String(),
testBmapToStrMap(matches),
)
if !reflect.DeepEqual(matches, m.Expected) {
t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.Expected, matches)
}
}
}
func TestRemapParams(t *testing.T) {
var matches map[string][][]byte
for midx, m := range []testMatcher{
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: nil,
ParamInclNoMatch: false,
ParamInclNoMatchStrict: false,
ParamInclMustMatch: false,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: nil,
ParamInclNoMatch: false,
ParamInclNoMatchStrict: true,
ParamInclMustMatch: false,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: nil,
ParamInclNoMatch: false,
ParamInclNoMatchStrict: true,
ParamInclMustMatch: true,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: nil,
ParamInclNoMatch: false,
ParamInclNoMatchStrict: false,
ParamInclMustMatch: true,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: make(map[string][][]byte),
ParamInclNoMatch: true,
ParamInclNoMatchStrict: false,
ParamInclMustMatch: false,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: make(map[string][][]byte),
ParamInclNoMatch: true,
ParamInclNoMatchStrict: true,
ParamInclMustMatch: false,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: make(map[string][][]byte),
ParamInclNoMatch: true,
ParamInclNoMatchStrict: true,
ParamInclMustMatch: true,
},
testMatcher{
Nm: "",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: make(map[string][][]byte),
ParamInclNoMatch: true,
ParamInclNoMatchStrict: false,
ParamInclMustMatch: true,
},
} {
matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
t.Logf(
"%d: %v/%v/%v: %#v\n",
midx+1, m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch, matches,
)
if !reflect.DeepEqual(matches, m.Expected) {
t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.ExpectedStr, matches)
}
}
}
func TestRemapString(t *testing.T) {
var matches map[string][]string
for midx, m := range []testMatcher{
testMatcher{
Nm: "No matches",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
ExpectedStr: nil,
},
testMatcher{
Nm: "Single mid match",
S: "This contains a single match in the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match)\s+`)},
ExpectedStr: map[string][]string{
"g1": []string{"match"},
},
},
testMatcher{
Nm: "multi mid match",
S: "This contains a single match and another match in the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another (?P<g1>match)\s+`)},
ExpectedStr: map[string][]string{
"g1": []string{
"match",
"match",
},
},
},
testMatcher{
Nm: "line match",
S: "This\ncontains a\nsingle\nmatch\non a dedicated line",
M: &ReMap{regexp.MustCompile(`(?m)^(?P<g1>match)$`)},
ExpectedStr: map[string][]string{
"g1": []string{
"match",
},
},
},
testMatcher{
Nm: "multiline match",
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
ExpectedStr: map[string][]string{
"g1": []string{
"match",
"match",
},
},
},
} {
matches = m.M.MapString(m.S, false, false, false)
t.Logf(
"#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n",
midx+1,
m.S,
m.M.Regexp.String(),
testSmapToStrMap(matches),
)
if !reflect.DeepEqual(matches, m.ExpectedStr) {
t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.ExpectedStr, matches)
}
}
}
func testBmapToStrMap(bmap map[string][][]byte) (s string) {
if bmap == nil {
return
}
s = "\n"
for k, v := range bmap {
s += fmt.Sprintf("\t%s\n", k)
for _, i := range v {
s += fmt.Sprintf("\t\t%s\n", string(i))
}
}
return
}
func testSmapToStrMap(smap map[string][]string) (s string) {
if smap == nil {
return
}
s = "\n"
for k, v := range smap {
s += fmt.Sprintf("\t%s\n", k)
for _, i := range v {
s += fmt.Sprintf("\t\t%s\n", i)
}
}
return
}

View File

@@ -5,7 +5,7 @@ import (
)
type (
// ReMap provides some map-related functions around a regexp.Regexp.
// ReMap provides some map-related functions around a [regexp.Regexp].
ReMap struct {
*regexp.Regexp
}