2022-04-24 19:13:59 +01:00
|
|
|
//===--- iwyu_regex.cc - iwyu regex implementation ------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "iwyu_regex.h"
|
|
|
|
|
|
|
|
#include <regex>
|
Add --regex option
As reported in issue #981, using std::regex in IWYU has caused a
tremendous performance regression for large mapping files containing
regex mappings.
$ cat t.cc
#include <string>
# with llvm::Regex
$ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc
...
real 0m0,529s
user 0m0,509s
sys 0m0,020s
# with std::regex
$ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc
...
real 0m29,870s
user 0m29,717s
sys 0m0,012s
qt5_11.imp contains 2300+ regex mappings, and <string> has a bunch of
includes, so this is a good testbed for regular expression engines, but
over 50x slower is not the result we were hoping for.
The reason we switched to std::regex was to get support for negative
lookaround (llvm::Regex does not have it), but exotic regexes in
mappings are pretty rare, and this is a significant performance hit.
Introduce a --regex option to select regex dialect, with documented
tradeoffs. Put the default back to LLVM's fast implementation.
This fixes issue #981.
2022-09-02 19:55:48 +01:00
|
|
|
#include "llvm/Support/Regex.h"
|
|
|
|
|
|
|
|
#include "iwyu_port.h"
|
2022-10-09 20:05:01 +01:00
|
|
|
#include "iwyu_string_util.h"
|
2022-04-24 19:13:59 +01:00
|
|
|
|
|
|
|
namespace include_what_you_use {
|
|
|
|
|
2022-10-09 20:05:01 +01:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Add ^...$ start/end anchoring if they don't already exist.
|
|
|
|
// This is useful to transform from search-inside-string semantics to match-
|
|
|
|
// whole-string semantics for regex implementations that don't support the
|
|
|
|
// latter.
|
|
|
|
std::string Anchored(const std::string& pattern) {
|
|
|
|
const char* prefix = "";
|
|
|
|
const char* suffix = "";
|
|
|
|
if (!StartsWith(pattern, "^")) {
|
|
|
|
prefix = "^";
|
|
|
|
}
|
|
|
|
if (!EndsWith(pattern, "$")) {
|
|
|
|
suffix = "$";
|
|
|
|
}
|
|
|
|
|
|
|
|
return prefix + pattern + suffix;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
Add --regex option
As reported in issue #981, using std::regex in IWYU has caused a
tremendous performance regression for large mapping files containing
regex mappings.
$ cat t.cc
#include <string>
# with llvm::Regex
$ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc
...
real 0m0,529s
user 0m0,509s
sys 0m0,020s
# with std::regex
$ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc
...
real 0m29,870s
user 0m29,717s
sys 0m0,012s
qt5_11.imp contains 2300+ regex mappings, and <string> has a bunch of
includes, so this is a good testbed for regular expression engines, but
over 50x slower is not the result we were hoping for.
The reason we switched to std::regex was to get support for negative
lookaround (llvm::Regex does not have it), but exotic regexes in
mappings are pretty rare, and this is a significant performance hit.
Introduce a --regex option to select regex dialect, with documented
tradeoffs. Put the default back to LLVM's fast implementation.
This fixes issue #981.
2022-09-02 19:55:48 +01:00
|
|
|
bool ParseRegexDialect(const char* str, RegexDialect* dialect) {
|
|
|
|
if (strcmp(str, "llvm") == 0) {
|
|
|
|
*dialect = RegexDialect::LLVM;
|
|
|
|
return true;
|
|
|
|
} else if (strcmp(str, "ecmascript") == 0) {
|
|
|
|
*dialect = RegexDialect::ECMAScript;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool RegexMatch(RegexDialect dialect, const std::string& str,
|
|
|
|
const std::string& pattern) {
|
|
|
|
switch (dialect) {
|
|
|
|
case RegexDialect::LLVM: {
|
2022-10-09 20:05:01 +01:00
|
|
|
// llvm::Regex::match has search semantics; ensure anchored.
|
|
|
|
llvm::Regex r(Anchored(pattern));
|
Add --regex option
As reported in issue #981, using std::regex in IWYU has caused a
tremendous performance regression for large mapping files containing
regex mappings.
$ cat t.cc
#include <string>
# with llvm::Regex
$ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc
...
real 0m0,529s
user 0m0,509s
sys 0m0,020s
# with std::regex
$ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc
...
real 0m29,870s
user 0m29,717s
sys 0m0,012s
qt5_11.imp contains 2300+ regex mappings, and <string> has a bunch of
includes, so this is a good testbed for regular expression engines, but
over 50x slower is not the result we were hoping for.
The reason we switched to std::regex was to get support for negative
lookaround (llvm::Regex does not have it), but exotic regexes in
mappings are pretty rare, and this is a significant performance hit.
Introduce a --regex option to select regex dialect, with documented
tradeoffs. Put the default back to LLVM's fast implementation.
This fixes issue #981.
2022-09-02 19:55:48 +01:00
|
|
|
return r.match(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
case RegexDialect::ECMAScript: {
|
|
|
|
std::regex r(pattern, std::regex_constants::ECMAScript);
|
|
|
|
return std::regex_match(str, r);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK_UNREACHABLE_("Unexpected regex dialect");
|
2022-04-24 19:13:59 +01:00
|
|
|
}
|
|
|
|
|
2022-10-07 15:41:07 +01:00
|
|
|
std::string RegexReplace(RegexDialect dialect, const std::string& str,
|
|
|
|
const std::string& pattern,
|
|
|
|
const std::string& replacement) {
|
|
|
|
switch (dialect) {
|
|
|
|
case RegexDialect::LLVM: {
|
2022-10-09 20:05:01 +01:00
|
|
|
// llvm::Regex::sub has search semantics; ensure anchored.
|
|
|
|
llvm::Regex r(Anchored(pattern));
|
2022-10-07 15:41:07 +01:00
|
|
|
return r.sub(replacement, str);
|
|
|
|
}
|
|
|
|
|
|
|
|
case RegexDialect::ECMAScript: {
|
2022-10-09 20:05:01 +01:00
|
|
|
// std::regex_replace has search semantics; ensure anchored.
|
|
|
|
std::regex r(Anchored(pattern), std::regex_constants::ECMAScript);
|
2022-10-07 15:41:07 +01:00
|
|
|
return std::regex_replace(str, r, replacement,
|
|
|
|
std::regex_constants::format_first_only);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK_UNREACHABLE_("Unexpected regex dialect");
|
|
|
|
}
|
|
|
|
|
2022-04-24 19:13:59 +01:00
|
|
|
} // namespace include_what_you_use
|