Extract helper function to only add regex anchors if necessary
This avoid double-anchoring, for example if users put their own anchors in mappings. It also reduces clutter in the RegexMatch/RegexReplace functions now that both need to do the same anchoring.
This commit is contained in:
parent
fb4093be77
commit
6d416d5f90
|
@ -13,9 +13,31 @@
|
|||
#include "llvm/Support/Regex.h"
|
||||
|
||||
#include "iwyu_port.h"
|
||||
#include "iwyu_string_util.h"
|
||||
|
||||
namespace include_what_you_use {
|
||||
|
||||
namespace {
|
||||
|
||||
// Add ^...$ start/end anchoring if they don't already exist.
|
||||
// This is useful to transform from search-inside-string semantics to match-
|
||||
// whole-string semantics for regex implementations that don't support the
|
||||
// latter.
|
||||
std::string Anchored(const std::string& pattern) {
|
||||
const char* prefix = "";
|
||||
const char* suffix = "";
|
||||
if (!StartsWith(pattern, "^")) {
|
||||
prefix = "^";
|
||||
}
|
||||
if (!EndsWith(pattern, "$")) {
|
||||
suffix = "$";
|
||||
}
|
||||
|
||||
return prefix + pattern + suffix;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
bool ParseRegexDialect(const char* str, RegexDialect* dialect) {
|
||||
if (strcmp(str, "llvm") == 0) {
|
||||
*dialect = RegexDialect::LLVM;
|
||||
|
@ -31,9 +53,8 @@ bool RegexMatch(RegexDialect dialect, const std::string& str,
|
|||
const std::string& pattern) {
|
||||
switch (dialect) {
|
||||
case RegexDialect::LLVM: {
|
||||
// llvm::Regex::match has search semantics. Enclose the pattern in ^...$
|
||||
// for start/end anchoring.
|
||||
llvm::Regex r("^" + pattern + "$");
|
||||
// llvm::Regex::match has search semantics; ensure anchored.
|
||||
llvm::Regex r(Anchored(pattern));
|
||||
return r.match(str);
|
||||
}
|
||||
|
||||
|
@ -50,16 +71,14 @@ std::string RegexReplace(RegexDialect dialect, const std::string& str,
|
|||
const std::string& replacement) {
|
||||
switch (dialect) {
|
||||
case RegexDialect::LLVM: {
|
||||
// llvm::Regex::sub has search semantics. Enclose the pattern in ^...$
|
||||
// for start/end anchoring.
|
||||
llvm::Regex r("^" + pattern + "$");
|
||||
// llvm::Regex::sub has search semantics; ensure anchored.
|
||||
llvm::Regex r(Anchored(pattern));
|
||||
return r.sub(replacement, str);
|
||||
}
|
||||
|
||||
case RegexDialect::ECMAScript: {
|
||||
// std::regex_replace has search semantics. Enclose the pattern in ^...$
|
||||
// for start/end anchoring.
|
||||
std::regex r("^" + pattern + "$", std::regex_constants::ECMAScript);
|
||||
// std::regex_replace has search semantics; ensure anchored.
|
||||
std::regex r(Anchored(pattern), std::regex_constants::ECMAScript);
|
||||
return std::regex_replace(str, r, replacement,
|
||||
std::regex_constants::format_first_only);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue