v1.15.3
FIXED: * Properly parse into map, add *All* variants
This commit is contained in:
11
remap/errs.go
Normal file
11
remap/errs.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package remap
|
||||
|
||||
import (
|
||||
`errors`
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidIdxPair error = errors.New("invalid index pair; [1] must be >= [0]")
|
||||
ErrNoStr error = errors.New("no string to slice/reslice/subslice")
|
||||
ErrShortStr error = errors.New("string too short to slice/reslice/subslice")
|
||||
)
|
||||
@@ -111,3 +111,60 @@ func MustCompilePOSIX(expr string) (r *ReMap) {
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
strIdxSlicer takes string s, and returns the substring marked by idxPair,
|
||||
where:
|
||||
|
||||
idxPair = [2]int{
|
||||
<substring START POSITION>,
|
||||
<substring END BOUNDARY>,
|
||||
}
|
||||
|
||||
That is, to get `oo` from `foobar`,
|
||||
|
||||
idxPair = [2]int{1, 3}
|
||||
# NOT:
|
||||
#idxPair = [2]int{1, 2}
|
||||
|
||||
subStr will be empty and matched will be false if:
|
||||
|
||||
* idxPair[0] < 0
|
||||
* idxPair[1] < 0
|
||||
|
||||
It will panic with [ErrShortStr] if:
|
||||
|
||||
* idxPair[0] > len(s)-1
|
||||
* idxPair[1] > len(s)
|
||||
|
||||
It will panic with [ErrInvalidIdxPair] if:
|
||||
|
||||
* idxPair[0] > idxPair[1]
|
||||
|
||||
It will properly handle single-character addresses (i.e. idxPair[0] == idxPair[1]).
|
||||
*/
|
||||
func strIdxSlicer(s string, idxPair [2]int) (subStr string, matched bool) {
|
||||
|
||||
if idxPair[0] < 0 || idxPair[1] < 0 {
|
||||
return
|
||||
}
|
||||
matched = true
|
||||
|
||||
if (idxPair[0] > (len(s) - 1)) ||
|
||||
(idxPair[1] > len(s)) {
|
||||
panic(ErrShortStr)
|
||||
}
|
||||
if idxPair[0] > idxPair[1] {
|
||||
panic(ErrInvalidIdxPair)
|
||||
}
|
||||
|
||||
if idxPair[0] == idxPair[1] {
|
||||
// single character
|
||||
subStr = string(s[idxPair[0]])
|
||||
} else {
|
||||
// multiple characters
|
||||
subStr = s[idxPair[0]:idxPair[1]]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -10,6 +10,9 @@ It will panic if the embedded [regexp.Regexp] is nil.
|
||||
Each match for each group is in a slice keyed under that group name, with that slice
|
||||
ordered by the indexing done by the regex match itself.
|
||||
|
||||
This operates on only the first found match (like [regexp.Regexp.FindSubmatch]).
|
||||
To operate on *all* matches, use [ReMap.MapAll].
|
||||
|
||||
In summary, the parameters are as follows:
|
||||
|
||||
# inclNoMatch
|
||||
@@ -33,6 +36,7 @@ is provided but b does not match then matches will be:
|
||||
If true (and inclNoMatch is true), instead of a single nil the group's values will be
|
||||
a slice of nil values explicitly matching the number of times the group name is specified
|
||||
in the pattern.
|
||||
May be unpredictable if the same name is used multiple times for different capture groups across multiple patterns.
|
||||
|
||||
For example, if a pattern:
|
||||
|
||||
@@ -144,6 +148,9 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
|
||||
if inclNoMatch {
|
||||
if len(names) >= 1 {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
matches[grpNm] = nil
|
||||
}
|
||||
}
|
||||
@@ -156,7 +163,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
|
||||
grpNm = names[mIdx]
|
||||
/*
|
||||
Thankfully, it's actually a build error if a pattern specifies a named
|
||||
capture group with an empty name.
|
||||
capture group with an matched name.
|
||||
So we don't need to worry about accounting for that,
|
||||
and can just skip over grpNm == "" (which is an *unnamed* capture group).
|
||||
*/
|
||||
@@ -192,6 +199,138 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
|
||||
// This *technically* should be completely handled above.
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(tmpMap) > 0 {
|
||||
matches = tmpMap
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
MapAll behaves exactly like [ReMap.Map] but will "squash"/consolidate *all* found matches, not just the first occurrence,
|
||||
into the group name.
|
||||
|
||||
You likely want to use this instead of [ReMap.Map] for multiline patterns.
|
||||
*/
|
||||
func (r *ReMap) MapAll(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
|
||||
|
||||
var ok bool
|
||||
var mIdx int
|
||||
var isEmpty bool
|
||||
var match []byte
|
||||
var grpNm string
|
||||
var names []string
|
||||
var mbGrp [][]byte
|
||||
var ptrnNms []string
|
||||
var matchBytes [][][]byte
|
||||
var tmpMap map[string][][]byte = make(map[string][][]byte)
|
||||
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
|
||||
names = r.Regexp.SubexpNames()[:]
|
||||
matchBytes = r.Regexp.FindAllSubmatch(b, -1)
|
||||
|
||||
if matchBytes == nil {
|
||||
// b does not match pattern
|
||||
if !mustMatch {
|
||||
matches = make(map[string][][]byte)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if names == nil || len(names) == 0 || len(names) == 1 {
|
||||
/*
|
||||
no named capture groups;
|
||||
technically only the last condition would be the case.
|
||||
*/
|
||||
if inclNoMatch {
|
||||
matches = make(map[string][][]byte)
|
||||
}
|
||||
return
|
||||
}
|
||||
names = names[1:]
|
||||
|
||||
tmpMap = make(map[string][][]byte)
|
||||
|
||||
// From here, it behaves (sort of) like ReMap.Map
|
||||
// except mbGrp is like matchBytes in Map.
|
||||
for _, mbGrp = range matchBytes {
|
||||
|
||||
// Unlike ReMap.Map, we have to do a little additional logic.
|
||||
isEmpty = false
|
||||
ptrnNms = make([]string, 0, len(names))
|
||||
|
||||
if mbGrp == nil {
|
||||
isEmpty = true
|
||||
}
|
||||
|
||||
if !isEmpty {
|
||||
if len(mbGrp) == 0 || len(mbGrp) == 1 {
|
||||
/*
|
||||
no submatches whatsoever.
|
||||
*/
|
||||
isEmpty = true
|
||||
} else {
|
||||
mbGrp = mbGrp[1:]
|
||||
|
||||
for mIdx, match = range mbGrp {
|
||||
if mIdx > len(names) {
|
||||
break
|
||||
}
|
||||
grpNm = names[mIdx]
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
ptrnNms = append(ptrnNms, grpNm)
|
||||
|
||||
if match == nil {
|
||||
// This specific group didn't match, but it matched the whole pattern.
|
||||
if !inclNoMatch {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
if !inclNoMatchStrict {
|
||||
tmpMap[grpNm] = nil
|
||||
} else {
|
||||
tmpMap[grpNm] = [][]byte{nil}
|
||||
}
|
||||
} else {
|
||||
if inclNoMatchStrict {
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = make([][]byte, 0)
|
||||
}
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I can't recall why I capture this.
|
||||
_ = ptrnNms
|
||||
}
|
||||
|
||||
// *Theoretically* all of these should be populated with at least a nil.
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
@@ -207,10 +346,14 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
|
||||
|
||||
/*
|
||||
MapString is exactly like [ReMap.Map], but operates on (and returns) strings instead.
|
||||
(matches will always be nil if s == “.)
|
||||
(matches will always be nil if s == "".)
|
||||
|
||||
It will panic if the embedded [regexp.Regexp] is nil.
|
||||
|
||||
This operates on only the first found match (like [regexp.Regexp.FindStringSubmatch]).
|
||||
To operate on *all* matches, use [ReMap.MapStringAll].
|
||||
To operate on *all* matches with retained grouping, use [ReMap.MapStringAllSplit].
|
||||
|
||||
A small deviation and caveat, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
|
||||
This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
|
||||
not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
|
||||
@@ -239,6 +382,7 @@ is provided but s does not match then matches will be:
|
||||
If true (and inclNoMatch is true), instead of a single nil the group's values will be
|
||||
a slice of empty string values explicitly matching the number of times the group name is specified
|
||||
in the pattern.
|
||||
May be unpredictable if the same name is used multiple times for different capture groups across multiple patterns.
|
||||
|
||||
For example, if a pattern:
|
||||
|
||||
@@ -308,27 +452,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
||||
var ok bool
|
||||
var endIdx int
|
||||
var startIdx int
|
||||
var chunkIdx int
|
||||
var grpIdx int
|
||||
var grpNm string
|
||||
var names []string
|
||||
var matchStr string
|
||||
/*
|
||||
A slice of indices or index pairs.
|
||||
For each element `e` in idxChunks,
|
||||
* if `e` is nil, no group match.
|
||||
* if len(e) == 1, only a single character was matched.
|
||||
* otherwise len(e) == 2, the start and end of the match.
|
||||
*/
|
||||
var idxChunks [][]int
|
||||
var si stringIndexer
|
||||
var matchIndices []int
|
||||
var chunkIndices []int // always 2 elements; start pos and end pos
|
||||
var tmpMap map[string][]string = make(map[string][]string)
|
||||
|
||||
/*
|
||||
OK so this is a bit of a deviation.
|
||||
|
||||
It's not as straightforward as above, because there isn't an explicit way
|
||||
like above to determine if a pattern was *matched as an empty string* vs.
|
||||
like above to determine if a pattern was *matched as an matched string* vs.
|
||||
*not matched*.
|
||||
|
||||
So instead do roundabout index-y things.
|
||||
@@ -384,26 +520,34 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
||||
matches = make(map[string][]string)
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm != "" {
|
||||
matches[grpNm] = nil
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
matches[grpNm] = nil
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
matchIndices = matchIndices[2:]
|
||||
|
||||
/*
|
||||
A reslice of `matchIndices` starts at 2 (as long as `names` is sliced [1:])
|
||||
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
|
||||
and the first pair is the entire pattern match (un-resliced names[0]).
|
||||
Thus the len(matchIndices) == 2*len(names), *even* if you reslice.
|
||||
The reslice of `matchIndices` starts at 2 because they're in pairs:
|
||||
|
||||
[]int{<start>, <end>, <start>, <end>, ...}
|
||||
|
||||
and the first pair is the entire pattern match (un-resliced names[0],
|
||||
un-resliced matchIndices[0]).
|
||||
|
||||
Thus the len(matchIndices) == 2*len(names) (*should*, that is), *even* if you reslice.
|
||||
Keep in mind that since the first element of names is removed,
|
||||
we reslices matchIndices as well (above).
|
||||
we reslice matchIndices as well.
|
||||
*/
|
||||
idxChunks = make([][]int, len(names))
|
||||
chunkIdx = 0
|
||||
endIdx = 0
|
||||
matchIndices = matchIndices[2:]
|
||||
|
||||
tmpMap = make(map[string][]string)
|
||||
|
||||
// Note that the second index is the *upper boundary*, not a *position in the string*
|
||||
// so these indices are perfectly usable as-is as returned from the regexp methods.
|
||||
// http://golang.org/ref/spec#Slice_expressions
|
||||
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
|
||||
endIdx = startIdx + 2
|
||||
// This technically should never happen.
|
||||
@@ -411,77 +555,253 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
||||
endIdx = len(matchIndices)
|
||||
}
|
||||
|
||||
chunkIndices = matchIndices[startIdx:endIdx]
|
||||
|
||||
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
|
||||
// group did not match
|
||||
chunkIndices = nil
|
||||
} else {
|
||||
// single character
|
||||
if chunkIndices[0] == chunkIndices[1] {
|
||||
chunkIndices = []int{chunkIndices[0]}
|
||||
} else {
|
||||
chunkIndices = matchIndices[startIdx:endIdx]
|
||||
}
|
||||
if grpIdx >= len(names) {
|
||||
break
|
||||
}
|
||||
idxChunks[chunkIdx] = chunkIndices
|
||||
chunkIdx++
|
||||
}
|
||||
|
||||
// Now associate with names and pull the string sequence.
|
||||
for chunkIdx, chunkIndices = range idxChunks {
|
||||
grpNm = names[chunkIdx]
|
||||
/*
|
||||
Thankfully, it's actually a build error if a pattern specifies a named
|
||||
capture group with an empty name.
|
||||
So we don't need to worry about accounting for that,
|
||||
and can just skip over grpNm == ""
|
||||
(which is either an *unnamed* capture group
|
||||
OR the first element in `names`, which is always
|
||||
the entire match).
|
||||
(We reslice out the latter.)
|
||||
*/
|
||||
if grpNm == "" {
|
||||
si = stringIndexer{
|
||||
group: grpIdx,
|
||||
start: matchIndices[startIdx],
|
||||
end: matchIndices[endIdx-1],
|
||||
matched: true,
|
||||
nm: names[grpIdx],
|
||||
grpS: "",
|
||||
s: &matchStr,
|
||||
ptrn: r.Regexp,
|
||||
}
|
||||
grpIdx++
|
||||
|
||||
if si.nm == "" {
|
||||
// unnamed capture group
|
||||
continue
|
||||
}
|
||||
|
||||
if chunkIndices == nil || len(chunkIndices) == 0 {
|
||||
// group did not match
|
||||
// sets si.matched and si.grpS
|
||||
si.idxSlice(&s)
|
||||
|
||||
if !si.matched {
|
||||
if !inclNoMatch {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
if _, ok = tmpMap[si.nm]; !ok {
|
||||
if !inclNoMatchStrict {
|
||||
tmpMap[grpNm] = nil
|
||||
tmpMap[si.nm] = nil
|
||||
} else {
|
||||
tmpMap[grpNm] = []string{""}
|
||||
tmpMap[si.nm] = []string{""}
|
||||
}
|
||||
} else {
|
||||
if inclNoMatchStrict {
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], "")
|
||||
tmpMap[si.nm] = append(tmpMap[si.nm], "")
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
switch len(chunkIndices) {
|
||||
case 1:
|
||||
// Single character
|
||||
matchStr = string(s[chunkIndices[0]])
|
||||
case 2:
|
||||
// Multiple characters
|
||||
matchStr = s[chunkIndices[0]:chunkIndices[1]]
|
||||
if _, ok = tmpMap[si.nm]; !ok {
|
||||
tmpMap[si.nm] = make([]string, 0)
|
||||
}
|
||||
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = make([]string, 0)
|
||||
}
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
|
||||
tmpMap[si.nm] = append(tmpMap[si.nm], si.grpS)
|
||||
}
|
||||
|
||||
// This *technically* should be completely handled above.
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(tmpMap) > 0 {
|
||||
matches = tmpMap
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
MapStringAll behaves exactly like [ReMap.MapString] but will "squash"/consolidate *all* found matches, not just the first occurrence,
|
||||
into the group name.
|
||||
|
||||
You likely want to use this instead of [ReMap.MapString] for multiline patterns.
|
||||
*/
|
||||
func (r *ReMap) MapStringAll(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
|
||||
|
||||
var ok bool
|
||||
var endIdx int
|
||||
var startIdx int
|
||||
var grpIdx int
|
||||
var grpNm string
|
||||
var names []string
|
||||
var matchStr string
|
||||
var si stringIndexer
|
||||
var matchIndices []int
|
||||
var allMatchIndices [][]int
|
||||
var tmpMap map[string][]string = make(map[string][]string)
|
||||
|
||||
if s == "" {
|
||||
return
|
||||
}
|
||||
|
||||
names = r.Regexp.SubexpNames()[:]
|
||||
allMatchIndices = r.Regexp.FindAllStringSubmatchIndex(s, -1)
|
||||
|
||||
if allMatchIndices == nil {
|
||||
// s does not match pattern at all.
|
||||
if !mustMatch {
|
||||
matches = make(map[string][]string)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if names == nil || len(names) == 0 || len(names) == 1 {
|
||||
/*
|
||||
No named capture groups;
|
||||
technically only the last condition would be the case,
|
||||
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
|
||||
return a `[]string{""}`.
|
||||
*/
|
||||
if inclNoMatch {
|
||||
matches = make(map[string][]string)
|
||||
}
|
||||
return
|
||||
}
|
||||
names = names[1:]
|
||||
|
||||
if len(allMatchIndices) == 0 {
|
||||
// No matches (and thus submatches) whatsoever.
|
||||
// I think this is actually covered by the `if allMatchIndices == nil { ... }` above,
|
||||
// but this is still here for safety and efficiency - early return on no matches to iterate.
|
||||
matches = make(map[string][]string)
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
matches[grpNm] = nil
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
// Do *NOT* trim/reslice allMatchIndices!
|
||||
// The reslicing is done below, *inside* each matchIndices iteration!
|
||||
|
||||
tmpMap = make(map[string][]string)
|
||||
|
||||
// From here, it behaves (sort of) like ReMap.MapString.
|
||||
|
||||
// Build the strictly-paired chunk indexes and populate them.
|
||||
// We are iterating over *match sets*; matchIndices here should be analgous
|
||||
// to matchIndices in ReMap.MapString.
|
||||
for _, matchIndices = range allMatchIndices {
|
||||
|
||||
if matchIndices == nil {
|
||||
// I *think* the exception with the *All* variant here
|
||||
// is the *entire* return (allMatchIndices) is nil if there
|
||||
// aren't any matches; I can't imagine there'd be any feasible
|
||||
// way it'd insert a nil *element* for an index mapping group.
|
||||
// So just continuing here should be fine;
|
||||
// this continue SHOULD be unreachable.
|
||||
continue
|
||||
}
|
||||
|
||||
// Reslice *here*, on the particular match index group.
|
||||
// Grap the matchStr first; it's not currently *used* by anything but may in the future.
|
||||
matchStr, ok = strIdxSlicer(
|
||||
s,
|
||||
*(*[2]int)(matchIndices[0:2]),
|
||||
)
|
||||
if len(matchIndices) == 0 || len(matchIndices) == 1 {
|
||||
// No *sub*matches (capture groups) in this match, but it still matched the pattern.
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
// We don't immediately return, though; we just stage out group names just in case.
|
||||
// That's why we use tmpMap and not matches.
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
matchIndices = matchIndices[2:]
|
||||
|
||||
// Reset from previous loop
|
||||
endIdx = 0
|
||||
grpIdx = 0
|
||||
|
||||
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
|
||||
endIdx = startIdx + 2
|
||||
if endIdx > len(matchIndices) {
|
||||
endIdx = len(matchIndices)
|
||||
}
|
||||
|
||||
if grpIdx >= len(names) {
|
||||
break
|
||||
}
|
||||
|
||||
si = stringIndexer{
|
||||
group: grpIdx,
|
||||
start: matchIndices[startIdx],
|
||||
end: matchIndices[endIdx-1],
|
||||
matched: true,
|
||||
nm: names[grpIdx],
|
||||
grpS: "",
|
||||
ptrn: r.Regexp,
|
||||
}
|
||||
grpIdx++
|
||||
// We do not include the entire match string here;
|
||||
// we don't need it for this. Waste of memory.
|
||||
_ = matchStr
|
||||
/*
|
||||
si.s = new(string)
|
||||
*si.s = matchStr
|
||||
*/
|
||||
|
||||
if si.nm == "" {
|
||||
// unnamed capture group
|
||||
continue
|
||||
}
|
||||
|
||||
// sets si.matched and si.grpS
|
||||
si.idxSlice(&s)
|
||||
|
||||
if !si.matched {
|
||||
if !inclNoMatch {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[si.nm]; !ok {
|
||||
if !inclNoMatchStrict {
|
||||
tmpMap[si.nm] = nil
|
||||
} else {
|
||||
tmpMap[si.nm] = []string{""}
|
||||
}
|
||||
} else {
|
||||
if inclNoMatchStrict {
|
||||
tmpMap[si.nm] = append(tmpMap[si.nm], "")
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok = tmpMap[si.nm]; !ok {
|
||||
tmpMap[si.nm] = make([]string, 0)
|
||||
}
|
||||
tmpMap[si.nm] = append(tmpMap[si.nm], si.grpS)
|
||||
}
|
||||
}
|
||||
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package remap
|
||||
|
||||
import (
|
||||
`fmt`
|
||||
`reflect`
|
||||
`regexp`
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -12,6 +12,7 @@ type (
|
||||
Nm string
|
||||
S string
|
||||
M *ReMap
|
||||
All bool
|
||||
Expected map[string][][]byte
|
||||
ExpectedStr map[string][]string
|
||||
ParamInclNoMatch bool
|
||||
@@ -25,12 +26,14 @@ func TestRemap(t *testing.T) {
|
||||
var matches map[string][][]byte
|
||||
|
||||
for midx, m := range []testMatcher{
|
||||
// 1
|
||||
testMatcher{
|
||||
Nm: "No matches",
|
||||
S: "this is a test",
|
||||
M: &ReMap{regexp.MustCompile(``)},
|
||||
Expected: nil,
|
||||
},
|
||||
// 2
|
||||
testMatcher{
|
||||
Nm: "Single mid match",
|
||||
S: "This contains a single match in the middle of a string",
|
||||
@@ -39,6 +42,7 @@ func TestRemap(t *testing.T) {
|
||||
"g1": [][]byte{[]byte("match")},
|
||||
},
|
||||
},
|
||||
// 3
|
||||
testMatcher{
|
||||
Nm: "multi mid match",
|
||||
S: "This contains a single match and another match in the middle of a string",
|
||||
@@ -50,6 +54,7 @@ func TestRemap(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
// 4
|
||||
testMatcher{
|
||||
Nm: "line match",
|
||||
S: "This\ncontains a\nsingle\nmatch\non a dedicated line",
|
||||
@@ -60,10 +65,12 @@ func TestRemap(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
// 5
|
||||
testMatcher{
|
||||
Nm: "multiline match",
|
||||
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
|
||||
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
|
||||
Nm: "multiline match",
|
||||
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
|
||||
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
|
||||
All: true,
|
||||
Expected: map[string][][]byte{
|
||||
"g1": [][]byte{
|
||||
[]byte("match"),
|
||||
@@ -71,8 +78,32 @@ func TestRemap(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
// 6
|
||||
// More closely mirrors something closer to real-life
|
||||
testMatcher{
|
||||
Nm: "mixed match",
|
||||
S: " # No longer log hits/reqs/resps to file.\n" +
|
||||
" #access_log /mnt/nginx_logs/vhost/tenant/site/access.log main;\n" +
|
||||
" #error_log /mnt/nginx_logs/vhost/tenant/site/error.log;\n" +
|
||||
" access_log off;\n" +
|
||||
" error_log /dev/null;\n\n" +
|
||||
" ssl_certificate /etc/nginx/tls/crt/tenant.pem;\n" +
|
||||
" ssl_certificate_key /etc/nginx/tls/key/tenant.pem;\n\n",
|
||||
M: &ReMap{regexp.MustCompile(`(?m)^\s*(?:error|access)_log\s+(?P<logpath>.+);\s*$`)},
|
||||
All: true,
|
||||
Expected: map[string][][]byte{
|
||||
"logpath": [][]byte{
|
||||
[]byte("off"),
|
||||
[]byte("/dev/null"),
|
||||
},
|
||||
},
|
||||
},
|
||||
} {
|
||||
matches = m.M.Map([]byte(m.S), false, false, false)
|
||||
if m.All {
|
||||
matches = m.M.MapAll([]byte(m.S), false, false, false)
|
||||
} else {
|
||||
matches = m.M.Map([]byte(m.S), false, false, false)
|
||||
}
|
||||
t.Logf(
|
||||
"#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n",
|
||||
midx+1,
|
||||
@@ -81,7 +112,7 @@ func TestRemap(t *testing.T) {
|
||||
testBmapToStrMap(matches),
|
||||
)
|
||||
if !reflect.DeepEqual(matches, m.Expected) {
|
||||
t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.Expected, matches)
|
||||
t.Fatalf("Case #%d (\"%s\"): expected '%#v' != received '%#v'", midx+1, m.Nm, m.Expected, matches)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -165,7 +196,11 @@ func TestRemapParams(t *testing.T) {
|
||||
ParamInclMustMatch: true,
|
||||
},
|
||||
} {
|
||||
matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
|
||||
if m.All {
|
||||
matches = m.M.MapAll([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
|
||||
} else {
|
||||
matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
|
||||
}
|
||||
t.Logf(
|
||||
"%d: %v/%v/%v: %#v\n",
|
||||
midx+1, m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch, matches,
|
||||
@@ -182,12 +217,14 @@ func TestRemapString(t *testing.T) {
|
||||
var matches map[string][]string
|
||||
|
||||
for midx, m := range []testMatcher{
|
||||
// 1
|
||||
testMatcher{
|
||||
Nm: "No matches",
|
||||
S: "this is a test",
|
||||
M: &ReMap{regexp.MustCompile(``)},
|
||||
ExpectedStr: nil,
|
||||
},
|
||||
// 2
|
||||
testMatcher{
|
||||
Nm: "Single mid match",
|
||||
S: "This contains a single match in the middle of a string",
|
||||
@@ -196,6 +233,7 @@ func TestRemapString(t *testing.T) {
|
||||
"g1": []string{"match"},
|
||||
},
|
||||
},
|
||||
// 3
|
||||
testMatcher{
|
||||
Nm: "multi mid match",
|
||||
S: "This contains a single match and another match in the middle of a string",
|
||||
@@ -207,6 +245,7 @@ func TestRemapString(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
// 4
|
||||
testMatcher{
|
||||
Nm: "line match",
|
||||
S: "This\ncontains a\nsingle\nmatch\non a dedicated line",
|
||||
@@ -217,10 +256,12 @@ func TestRemapString(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
// 5
|
||||
testMatcher{
|
||||
Nm: "multiline match",
|
||||
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
|
||||
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
|
||||
Nm: "multiline match",
|
||||
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
|
||||
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
|
||||
All: true,
|
||||
ExpectedStr: map[string][]string{
|
||||
"g1": []string{
|
||||
"match",
|
||||
@@ -228,8 +269,32 @@ func TestRemapString(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
// 6
|
||||
// More closely mirrors something closer to real-life
|
||||
testMatcher{
|
||||
Nm: "mixed match",
|
||||
S: " # No longer log hits/reqs/resps to file.\n" +
|
||||
" #access_log /mnt/nginx_logs/vhost/tenant/site/access.log main;\n" +
|
||||
" #error_log /mnt/nginx_logs/vhost/tenant/site/error.log;\n" +
|
||||
" access_log off;\n" +
|
||||
" error_log /dev/null;\n\n" +
|
||||
" ssl_certificate /etc/nginx/tls/crt/tenant.pem;\n" +
|
||||
" ssl_certificate_key /etc/nginx/tls/key/tenant.pem;\n\n",
|
||||
M: &ReMap{regexp.MustCompile(`(?m)^\s*(?:error|access)_log\s+(?P<logpath>.+);\s*$`)},
|
||||
All: true,
|
||||
ExpectedStr: map[string][]string{
|
||||
"logpath": []string{
|
||||
"off",
|
||||
"/dev/null",
|
||||
},
|
||||
},
|
||||
},
|
||||
} {
|
||||
matches = m.M.MapString(m.S, false, false, false)
|
||||
if m.All {
|
||||
matches = m.M.MapStringAll(m.S, false, false, false)
|
||||
} else {
|
||||
matches = m.M.MapString(m.S, false, false, false)
|
||||
}
|
||||
t.Logf(
|
||||
"#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n",
|
||||
midx+1,
|
||||
|
||||
34
remap/funcs_stringindexer.go
Normal file
34
remap/funcs_stringindexer.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package remap
|
||||
|
||||
// idx returns []int{s.start, s.end}.
|
||||
func (s *stringIndexer) idx() (i []int) {
|
||||
return []int{s.start, s.end}
|
||||
}
|
||||
|
||||
// idxStrict returns [2]int{s.start, s.end}.
|
||||
func (s *stringIndexer) idxStrict() (i [2]int) {
|
||||
return [2]int{s.start, s.end}
|
||||
}
|
||||
|
||||
/*
|
||||
idxSlice populates s.grpS using s.start and s.end.
|
||||
|
||||
If str is nil, it will use s.s.
|
||||
If str is nil and s.s is nil, it will panic with [ErrNoStr].
|
||||
|
||||
If the pattern does not match (s.start < 0 or s.end < 0),
|
||||
s.matched will be set to false (otherwise true).
|
||||
*/
|
||||
func (s *stringIndexer) idxSlice(str *string) {
|
||||
|
||||
if str == nil {
|
||||
if s.s == nil {
|
||||
panic(ErrNoStr)
|
||||
}
|
||||
str = s.s
|
||||
}
|
||||
|
||||
s.grpS, s.matched = strIdxSlicer(*str, s.idxStrict())
|
||||
|
||||
return
|
||||
}
|
||||
@@ -24,4 +24,45 @@ type (
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
stringIndexer struct {
|
||||
// group is the capture group index for this match.
|
||||
group int
|
||||
// start is the string index (from the original string) where the matched group starts
|
||||
start int
|
||||
// end is the string index where the matched group ends
|
||||
end int
|
||||
/*
|
||||
matched indicates if explicitly no match was found.
|
||||
(This is normally indeterminate with string regex returns,
|
||||
as e.g. `(?P<mygrp>\s*)`, `(?P<mygrp>(?:somestring)?)`, etc. all can be a *matched* "".)
|
||||
|
||||
If grpS == "" and matched == true, it DID match an empty string.
|
||||
If grpS == "" and matched == false, it DID NOT MATCH the pattern.
|
||||
If grpS != "", matched can be completely disregarded.
|
||||
*/
|
||||
matched bool
|
||||
// nm is the match group name.
|
||||
nm string
|
||||
/*
|
||||
grpS is the actual group-matched *substring*.
|
||||
|
||||
It will ALWAYS be either:
|
||||
|
||||
* the entirety of s
|
||||
* a substring of s
|
||||
* an empty string
|
||||
|
||||
it will never, and cannot be, a SUPERset of s.
|
||||
it may not always be included/populated to save on memory.
|
||||
*/
|
||||
grpS string
|
||||
/*
|
||||
s is the *entire* MATCHED (sub)string.
|
||||
It may not always be populated if not needed to save memory.
|
||||
*/
|
||||
s *string
|
||||
// ptrn is the pattern applied to s.
|
||||
ptrn *regexp.Regexp
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user