diff options
Diffstat (limited to 'jim-regexp.c')
-rw-r--r-- | jim-regexp.c | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/jim-regexp.c b/jim-regexp.c index 8eb457d..b0411f8 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -55,13 +55,18 @@ #include <regex.h> #endif #include "jim.h" +#include "utf8.h" static void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr) { - regfree(objPtr->internalRep.regexpValue.compre); - Jim_Free(objPtr->internalRep.regexpValue.compre); + regfree(objPtr->internalRep.ptrIntValue.ptr); + Jim_Free(objPtr->internalRep.ptrIntValue.ptr); } +/* internal rep is stored in ptrIntvalue + * ptr = compiled regex + * int1 = flags + */ static const Jim_ObjType regexpObjType = { "regexp", FreeRegexpInternalRep, @@ -78,9 +83,9 @@ static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned f /* Check if the object is already an uptodate variable */ if (objPtr->typePtr == ®expObjType && - objPtr->internalRep.regexpValue.compre && objPtr->internalRep.regexpValue.flags == flags) { + objPtr->internalRep.ptrIntValue.ptr && objPtr->internalRep.ptrIntValue.int1 == flags) { /* nothing to do */ - return objPtr->internalRep.regexpValue.compre; + return objPtr->internalRep.ptrIntValue.ptr; } /* Not a regexp or the flags do not match */ @@ -102,8 +107,8 @@ static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned f Jim_FreeIntRep(interp, objPtr); objPtr->typePtr = ®expObjType; - objPtr->internalRep.regexpValue.flags = flags; - objPtr->internalRep.regexpValue.compre = compre; + objPtr->internalRep.ptrIntValue.int1 = flags; + objPtr->internalRep.ptrIntValue.ptr = compre; return compre; } @@ -222,7 +227,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) source_str += source_len; } else if (offset > 0) { - source_str += offset; + source_str += utf8_index(source_str, offset); } eflags |= REG_NOTBOL; } @@ -276,16 +281,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) } } else { - int len = pmatch[j].rm_eo - pmatch[j].rm_so; - if (opt_indices) { - Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, - offset + pmatch[j].rm_so)); - Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, - offset + pmatch[j].rm_so + len - 1)); + /* rm_so and rm_eo are byte offsets. We need char offsets */ + int so = utf8_strlen(source_str, pmatch[j].rm_so); + int eo = utf8_strlen(source_str, pmatch[j].rm_eo); + Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + so)); + Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + eo - 1)); } else { - Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len); + Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, pmatch[j].rm_eo - pmatch[j].rm_so); } } @@ -306,7 +310,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) try_next_match: if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) { if (pmatch[0].rm_eo) { - offset += pmatch[0].rm_eo; + offset += utf8_strlen(source_str, pmatch[0].rm_eo); source_str += pmatch[0].rm_eo; } else { @@ -437,6 +441,8 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) offset = 0; } } + /* Convert from character offset to byte offset */ + offset = utf8_index(source_str, offset); /* Copy the part before -start */ Jim_AppendString(interp, resultObj, source_str, offset); |