summaryrefslogtreecommitdiff
path: root/jim-regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'jim-regexp.c')
-rw-r--r--jim-regexp.c36
1 files changed, 21 insertions, 15 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index 8eb457d..b0411f8 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -55,13 +55,18 @@
#include <regex.h>
#endif
#include "jim.h"
+#include "utf8.h"
static void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr)
{
- regfree(objPtr->internalRep.regexpValue.compre);
- Jim_Free(objPtr->internalRep.regexpValue.compre);
+ regfree(objPtr->internalRep.ptrIntValue.ptr);
+ Jim_Free(objPtr->internalRep.ptrIntValue.ptr);
}
+/* internal rep is stored in ptrIntvalue
+ * ptr = compiled regex
+ * int1 = flags
+ */
static const Jim_ObjType regexpObjType = {
"regexp",
FreeRegexpInternalRep,
@@ -78,9 +83,9 @@ static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned f
/* Check if the object is already an uptodate variable */
if (objPtr->typePtr == &regexpObjType &&
- objPtr->internalRep.regexpValue.compre && objPtr->internalRep.regexpValue.flags == flags) {
+ objPtr->internalRep.ptrIntValue.ptr && objPtr->internalRep.ptrIntValue.int1 == flags) {
/* nothing to do */
- return objPtr->internalRep.regexpValue.compre;
+ return objPtr->internalRep.ptrIntValue.ptr;
}
/* Not a regexp or the flags do not match */
@@ -102,8 +107,8 @@ static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned f
Jim_FreeIntRep(interp, objPtr);
objPtr->typePtr = &regexpObjType;
- objPtr->internalRep.regexpValue.flags = flags;
- objPtr->internalRep.regexpValue.compre = compre;
+ objPtr->internalRep.ptrIntValue.int1 = flags;
+ objPtr->internalRep.ptrIntValue.ptr = compre;
return compre;
}
@@ -222,7 +227,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
source_str += source_len;
}
else if (offset > 0) {
- source_str += offset;
+ source_str += utf8_index(source_str, offset);
}
eflags |= REG_NOTBOL;
}
@@ -276,16 +281,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
}
}
else {
- int len = pmatch[j].rm_eo - pmatch[j].rm_so;
-
if (opt_indices) {
- Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
- offset + pmatch[j].rm_so));
- Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
- offset + pmatch[j].rm_so + len - 1));
+ /* rm_so and rm_eo are byte offsets. We need char offsets */
+ int so = utf8_strlen(source_str, pmatch[j].rm_so);
+ int eo = utf8_strlen(source_str, pmatch[j].rm_eo);
+ Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + so));
+ Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + eo - 1));
}
else {
- Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len);
+ Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, pmatch[j].rm_eo - pmatch[j].rm_so);
}
}
@@ -306,7 +310,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
try_next_match:
if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
if (pmatch[0].rm_eo) {
- offset += pmatch[0].rm_eo;
+ offset += utf8_strlen(source_str, pmatch[0].rm_eo);
source_str += pmatch[0].rm_eo;
}
else {
@@ -437,6 +441,8 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
offset = 0;
}
}
+ /* Convert from character offset to byte offset */
+ offset = utf8_index(source_str, offset);
/* Copy the part before -start */
Jim_AppendString(interp, resultObj, source_str, offset);