Add --regex option
As reported in issue #981, using std::regex in IWYU has caused a tremendous performance regression for large mapping files containing regex mappings. $ cat t.cc #include <string> # with llvm::Regex $ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc ... real 0m0,529s user 0m0,509s sys 0m0,020s # with std::regex $ time include-what-you-use -Xiwyu --mapping_file=qt5_11.imp t.cc ... real 0m29,870s user 0m29,717s sys 0m0,012s qt5_11.imp contains 2300+ regex mappings, and <string> has a bunch of includes, so this is a good testbed for regular expression engines, but over 50x slower is not the result we were hoping for. The reason we switched to std::regex was to get support for negative lookaround (llvm::Regex does not have it), but exotic regexes in mappings are pretty rare, and this is a significant performance hit. Introduce a --regex option to select regex dialect, with documented tradeoffs. Put the default back to LLVM's fast implementation. This fixes issue #981.
This commit is contained in:
parent
0a714265c0
commit
cf53880822
|
@ -134,6 +134,20 @@ No new includes are added, existing ones are removed.
|
|||
.B \-\-quoted_includes_first
|
||||
When sorting includes, place quoted includes first.
|
||||
.TP
|
||||
.BI \-\-regex= dialect
|
||||
Use the specified regex
|
||||
.IR dialect
|
||||
for matching in IWYU:
|
||||
.RS
|
||||
.TP
|
||||
.B llvm
|
||||
Fast and simple extended regular expressions. This is the default.
|
||||
.TP
|
||||
.B ecmascript
|
||||
Slower but more full-featured regular expressions with support for lookaround
|
||||
assertions, etc.
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-transitive_includes_only
|
||||
Do not suggest that a file should add
|
||||
.IR foo.h " unless " foo.h
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "iwyu_location_util.h"
|
||||
#include "iwyu_path_util.h"
|
||||
#include "iwyu_port.h" // for CHECK_, etc
|
||||
#include "iwyu_regex.h"
|
||||
#include "iwyu_stl_util.h"
|
||||
#include "iwyu_string_util.h"
|
||||
#include "iwyu_verrs.h"
|
||||
|
@ -108,6 +109,9 @@ static void PrintHelp(const char* extra_msg) {
|
|||
" --error_always[=N]: always exit with N (default: 1) (for use\n"
|
||||
" with 'make -k')\n"
|
||||
" --debug=flag[,flag...]: debug flags (undocumented)\n"
|
||||
" --regex=<dialect>: use specified regex dialect in IWYU:\n"
|
||||
" llvm: fast and simple (default)\n"
|
||||
" ecmascript: slower, but more feature-complete\n"
|
||||
"\n"
|
||||
"In addition to IWYU-specific options you can specify the following\n"
|
||||
"options without -Xiwyu prefix:\n"
|
||||
|
@ -197,7 +201,8 @@ CommandlineFlags::CommandlineFlags()
|
|||
quoted_includes_first(false),
|
||||
cxx17ns(false),
|
||||
exit_code_error(EXIT_SUCCESS),
|
||||
exit_code_always(EXIT_SUCCESS) {
|
||||
exit_code_always(EXIT_SUCCESS),
|
||||
regex_dialect(RegexDialect::LLVM) {
|
||||
// Always keep Qt .moc includes; its moc compiler does its own IWYU analysis.
|
||||
keep.emplace("*.moc");
|
||||
}
|
||||
|
@ -222,9 +227,10 @@ int CommandlineFlags::ParseArgv(int argc, char** argv) {
|
|||
{"error", optional_argument, nullptr, 'e'},
|
||||
{"error_always", optional_argument, nullptr, 'a'},
|
||||
{"debug", required_argument, nullptr, 'd'},
|
||||
{"regex", required_argument, nullptr, 'r'},
|
||||
{nullptr, 0, nullptr, 0}
|
||||
};
|
||||
static const char shortopts[] = "v:c:m:d:n";
|
||||
static const char shortopts[] = "v:c:m:d:nr";
|
||||
while (true) {
|
||||
switch (getopt_long(argc, argv, shortopts, longopts, nullptr)) {
|
||||
case 'c': AddGlobToReportIWYUViolationsFor(optarg); break;
|
||||
|
@ -296,6 +302,12 @@ int CommandlineFlags::ParseArgv(int argc, char** argv) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case 'r':
|
||||
if (!ParseRegexDialect(optarg, ®ex_dialect)) {
|
||||
PrintHelp("FATAL ERROR: unsupported regex dialect.");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
case -1: return optind; // means 'no more input'
|
||||
default:
|
||||
PrintHelp("FATAL ERROR: unknown flag.");
|
||||
|
@ -401,7 +413,8 @@ void InitGlobals(clang::SourceManager* sm, clang::HeaderSearch* header_search) {
|
|||
vector<HeaderSearchPath> search_paths =
|
||||
ComputeHeaderSearchPaths(header_search);
|
||||
SetHeaderSearchPaths(search_paths);
|
||||
include_picker = new IncludePicker(GlobalFlags().no_default_mappings);
|
||||
include_picker = new IncludePicker(GlobalFlags().no_default_mappings,
|
||||
GlobalFlags().regex_dialect);
|
||||
function_calls_full_use_cache = new FullUseCache;
|
||||
class_members_full_use_cache = new FullUseCache;
|
||||
|
||||
|
@ -494,7 +507,8 @@ void InitGlobalsAndFlagsForTesting() {
|
|||
commandline_flags = new CommandlineFlags;
|
||||
source_manager = nullptr;
|
||||
data_getter = nullptr;
|
||||
include_picker = new IncludePicker(GlobalFlags().no_default_mappings);
|
||||
include_picker = new IncludePicker(GlobalFlags().no_default_mappings,
|
||||
GlobalFlags().regex_dialect);
|
||||
function_calls_full_use_cache = new FullUseCache;
|
||||
class_members_full_use_cache = new FullUseCache;
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ using std::set;
|
|||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
enum class RegexDialect;
|
||||
class FullUseCache;
|
||||
class IncludePicker;
|
||||
class SourceManagerCharacterDataGetter;
|
||||
|
@ -98,6 +99,7 @@ struct CommandlineFlags {
|
|||
int exit_code_error; // Exit with this code for iwyu violations.
|
||||
int exit_code_always; // Always exit with this exit code.
|
||||
set<string> dbg_flags; // Debug flags.
|
||||
RegexDialect regex_dialect; // Dialect for regular expression processing.
|
||||
};
|
||||
|
||||
const CommandlineFlags& GlobalFlags();
|
||||
|
|
|
@ -1231,8 +1231,10 @@ bool MappedInclude::HasAbsoluteQuotedInclude() const {
|
|||
return IsAbsolutePath(path);
|
||||
}
|
||||
|
||||
IncludePicker::IncludePicker(bool no_default_mappings)
|
||||
: has_called_finalize_added_include_lines_(false) {
|
||||
IncludePicker::IncludePicker(bool no_default_mappings,
|
||||
RegexDialect regex_dialect)
|
||||
: has_called_finalize_added_include_lines_(false),
|
||||
regex_dialect(regex_dialect) {
|
||||
if (!no_default_mappings) {
|
||||
AddDefaultMappings();
|
||||
}
|
||||
|
@ -1437,7 +1439,8 @@ void IncludePicker::ExpandRegexes() {
|
|||
const vector<MappedInclude>& map_to = filepath_include_map_[regex_key];
|
||||
// Enclose the regex in ^(...)$ for full match.
|
||||
std::string regex("^(" + regex_key.substr(1) + ")$");
|
||||
if (RegexMatch(hdr, regex) && !ContainsQuotedInclude(map_to, hdr)) {
|
||||
if (RegexMatch(regex_dialect, hdr, regex) &&
|
||||
!ContainsQuotedInclude(map_to, hdr)) {
|
||||
Extend(&filepath_include_map_[hdr], filepath_include_map_[regex_key]);
|
||||
MarkVisibility(&include_visibility_map_, hdr,
|
||||
include_visibility_map_[regex_key]);
|
||||
|
@ -1445,7 +1448,7 @@ void IncludePicker::ExpandRegexes() {
|
|||
}
|
||||
for (const string& regex_key : friend_to_headers_map_regex_keys) {
|
||||
std::string regex("^(" + regex_key.substr(1) + ")$");
|
||||
if (RegexMatch(hdr, regex)) {
|
||||
if (RegexMatch(regex_dialect, hdr, regex)) {
|
||||
InsertAllInto(friend_to_headers_map_[regex_key],
|
||||
&friend_to_headers_map_[hdr]);
|
||||
}
|
||||
|
|
|
@ -65,6 +65,7 @@ using std::vector;
|
|||
|
||||
struct IncludeMapEntry;
|
||||
|
||||
enum class RegexDialect;
|
||||
enum IncludeVisibility { kUnusedVisibility, kPublic, kPrivate };
|
||||
|
||||
// When a symbol or file is mapped to an include, that include is represented
|
||||
|
@ -91,7 +92,7 @@ class IncludePicker {
|
|||
// visibility of the respective files.
|
||||
typedef map<string, IncludeVisibility> VisibilityMap;
|
||||
|
||||
explicit IncludePicker(bool no_default_mappings);
|
||||
IncludePicker(bool no_default_mappings, RegexDialect regex_dialect);
|
||||
|
||||
// ----- Routines to dynamically modify the include-picker
|
||||
|
||||
|
@ -291,6 +292,9 @@ class IncludePicker {
|
|||
|
||||
// Make sure we don't do any non-const operations after finalizing.
|
||||
bool has_called_finalize_added_include_lines_;
|
||||
|
||||
// Controls regex dialect to use for mappings.
|
||||
RegexDialect regex_dialect;
|
||||
}; // class IncludePicker
|
||||
|
||||
} // namespace include_what_you_use
|
||||
|
|
|
@ -10,12 +10,38 @@
|
|||
#include "iwyu_regex.h"
|
||||
|
||||
#include <regex>
|
||||
#include "llvm/Support/Regex.h"
|
||||
|
||||
#include "iwyu_port.h"
|
||||
|
||||
namespace include_what_you_use {
|
||||
|
||||
bool RegexMatch(const std::string& str, const std::string& pattern) {
|
||||
std::regex r(pattern);
|
||||
return std::regex_match(str, r);
|
||||
bool ParseRegexDialect(const char* str, RegexDialect* dialect) {
|
||||
if (strcmp(str, "llvm") == 0) {
|
||||
*dialect = RegexDialect::LLVM;
|
||||
return true;
|
||||
} else if (strcmp(str, "ecmascript") == 0) {
|
||||
*dialect = RegexDialect::ECMAScript;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if str matches regular expression pattern for the given dialect.
|
||||
bool RegexMatch(RegexDialect dialect, const std::string& str,
|
||||
const std::string& pattern) {
|
||||
switch (dialect) {
|
||||
case RegexDialect::LLVM: {
|
||||
llvm::Regex r(pattern);
|
||||
return r.match(str);
|
||||
}
|
||||
|
||||
case RegexDialect::ECMAScript: {
|
||||
std::regex r(pattern, std::regex_constants::ECMAScript);
|
||||
return std::regex_match(str, r);
|
||||
}
|
||||
}
|
||||
CHECK_UNREACHABLE_("Unexpected regex dialect");
|
||||
}
|
||||
|
||||
} // namespace include_what_you_use
|
||||
|
|
10
iwyu_regex.h
10
iwyu_regex.h
|
@ -14,8 +14,14 @@
|
|||
|
||||
namespace include_what_you_use {
|
||||
|
||||
// Returns true if str matches regular expression pattern.
|
||||
bool RegexMatch(const std::string& str, const std::string& pattern);
|
||||
enum class RegexDialect { LLVM = 0, ECMAScript = 1 };
|
||||
|
||||
// Parse dialect string to enum.
|
||||
bool ParseRegexDialect(const char* str, RegexDialect* dialect);
|
||||
|
||||
// Returns true if str matches regular expression pattern for the given dialect.
|
||||
bool RegexMatch(RegexDialect dialect, const std::string& str,
|
||||
const std::string& pattern);
|
||||
|
||||
} // namespace include_what_you_use
|
||||
|
||||
|
|
Loading…
Reference in New Issue