Extract helper function to only add regex anchors if necessary

This avoid double-anchoring, for example if users put their own anchors
in mappings. It also reduces clutter in the RegexMatch/RegexReplace
functions now that both need to do the same anchoring.
This commit is contained in:
Kim Gräsman 2022-10-09 21:05:01 +02:00
parent fb4093be77
commit 6d416d5f90
1 changed files with 28 additions and 9 deletions

View File

@ -13,9 +13,31 @@
#include "llvm/Support/Regex.h"
#include "iwyu_port.h"
#include "iwyu_string_util.h"
namespace include_what_you_use {
namespace {
// Add ^...$ start/end anchoring if they don't already exist.
// This is useful to transform from search-inside-string semantics to match-
// whole-string semantics for regex implementations that don't support the
// latter.
std::string Anchored(const std::string& pattern) {
const char* prefix = "";
const char* suffix = "";
if (!StartsWith(pattern, "^")) {
prefix = "^";
}
if (!EndsWith(pattern, "$")) {
suffix = "$";
}
return prefix + pattern + suffix;
}
} // anonymous namespace
bool ParseRegexDialect(const char* str, RegexDialect* dialect) {
if (strcmp(str, "llvm") == 0) {
*dialect = RegexDialect::LLVM;
@ -31,9 +53,8 @@ bool RegexMatch(RegexDialect dialect, const std::string& str,
const std::string& pattern) {
switch (dialect) {
case RegexDialect::LLVM: {
// llvm::Regex::match has search semantics. Enclose the pattern in ^...$
// for start/end anchoring.
llvm::Regex r("^" + pattern + "$");
// llvm::Regex::match has search semantics; ensure anchored.
llvm::Regex r(Anchored(pattern));
return r.match(str);
}
@ -50,16 +71,14 @@ std::string RegexReplace(RegexDialect dialect, const std::string& str,
const std::string& replacement) {
switch (dialect) {
case RegexDialect::LLVM: {
// llvm::Regex::sub has search semantics. Enclose the pattern in ^...$
// for start/end anchoring.
llvm::Regex r("^" + pattern + "$");
// llvm::Regex::sub has search semantics; ensure anchored.
llvm::Regex r(Anchored(pattern));
return r.sub(replacement, str);
}
case RegexDialect::ECMAScript: {
// std::regex_replace has search semantics. Enclose the pattern in ^...$
// for start/end anchoring.
std::regex r("^" + pattern + "$", std::regex_constants::ECMAScript);
// std::regex_replace has search semantics; ensure anchored.
std::regex r(Anchored(pattern), std::regex_constants::ECMAScript);
return std::regex_replace(str, r, replacement,
std::regex_constants::format_first_only);
}