| diff --git a/src/operators/rx.cc b/src/operators/rx.cc |
| index 0ba983d73..b4fc6ff4d 100644 |
| --- a/src/operators/rx.cc |
| +++ b/src/operators/rx.cc |
| @@ -38,7 +38,6 @@ |
| |
| bool Rx::evaluate(Transaction *transaction, Rule *rule, |
| const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) { |
| - std::list<SMatch> matches; |
| Regex *re; |
| |
| if (m_param.empty() && !m_string->m_containsMacro) { |
| @@ -52,29 +51,29 @@ |
| re = m_re; |
| } |
| |
| - matches = re->searchAll(input); |
| + std::vector<Utils::SMatchCapture> captures; |
| + re->searchOneMatch(input, captures); |
| + |
| if (rule && rule->m_containsCaptureAction && transaction) { |
| - int i = 0; |
| - matches.reverse(); |
| - for (const SMatch& a : matches) { |
| + for (const Utils::SMatchCapture& capture : captures) { |
| + const std::string capture_substring(input.substr(capture.m_offset,capture.m_length)); |
| transaction->m_collections.m_tx_collection->storeOrUpdateFirst( |
| - std::to_string(i), a.str()); |
| + std::to_string(capture.m_group), capture_substring); |
| ms_dbg_a(transaction, 7, "Added regex subexpression TX." + |
| - std::to_string(i) + ": " + a.str()); |
| - transaction->m_matched.push_back(a.str()); |
| - i++; |
| + std::to_string(capture.m_group) + ": " + capture_substring); |
| + transaction->m_matched.push_back(capture_substring); |
| } |
| } |
| |
| - for (const auto & i : matches) { |
| - logOffset(ruleMessage, i.offset(), i.str().size()); |
| + for (const auto & capture : captures) { |
| + logOffset(ruleMessage, capture.m_offset, capture.m_length); |
| } |
| |
| if (m_string->m_containsMacro) { |
| delete re; |
| } |
| |
| - if (matches.size() > 0) { |
| + if (captures.size() > 0) { |
| return true; |
| } |
| |
| diff --git a/src/utils/regex.cc b/src/utils/regex.cc |
| index be56e378a..0feb256cc 100644 |
| --- a/src/utils/regex.cc |
| +++ b/src/utils/regex.cc |
| @@ -16,10 +16,6 @@ |
| #include "src/utils/regex.h" |
| |
| #include <pcre.h> |
| -#include <sys/socket.h> |
| -#include <sys/types.h> |
| -#include <netinet/in.h> |
| -#include <arpa/inet.h> |
| #include <string> |
| #include <list> |
| |
| @@ -99,6 +95,26 @@ std::list<SMatch> Regex::searchAll(const std::string& s) const { |
| return retList; |
| } |
| |
| +bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const { |
| + const char *subject = s.c_str(); |
| + int ovector[OVECCOUNT]; |
| + |
| + int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT); |
| + |
| + for (int i = 0; i < rc; i++) { |
| + size_t start = ovector[2*i]; |
| + size_t end = ovector[2*i+1]; |
| + size_t len = end - start; |
| + if (end > s.size()) { |
| + continue; |
| + } |
| + SMatchCapture capture(i, start, len); |
| + captures.push_back(capture); |
| + } |
| + |
| + return (rc > 0); |
| +} |
| + |
| int Regex::search(const std::string& s, SMatch *match) const { |
| int ovector[OVECCOUNT]; |
| int ret = pcre_exec(m_pc, m_pce, s.c_str(), |
| diff --git a/src/utils/regex.h b/src/utils/regex.h |
| index 7dcc4dbf6..46dab6b83 100644 |
| --- a/src/utils/regex.h |
| +++ b/src/utils/regex.h |
| @@ -19,6 +19,7 @@ |
| #include <fstream> |
| #include <string> |
| #include <list> |
| +#include <vector> |
| |
| #ifndef SRC_UTILS_REGEX_H_ |
| #define SRC_UTILS_REGEX_H_ |
| @@ -47,6 +48,16 @@ |
| size_t m_offset; |
| }; |
| |
| +struct SMatchCapture { |
| + SMatchCapture(size_t group, size_t offset, size_t length) : |
| + m_group(group), |
| + m_offset(offset), |
| + m_length(length) { } |
| + |
| + size_t m_group; // E.g. 0 = full match; 6 = capture group 6 |
| + size_t m_offset; // offset of match within the analyzed string |
| + size_t m_length; |
| +}; |
| |
| class Regex { |
| public: |
| @@ -58,6 +69,7 @@ |
| Regex& operator=(const Regex&) = delete; |
| |
| std::list<SMatch> searchAll(const std::string& s) const; |
| + bool searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const; |
| int search(const std::string &s, SMatch *m) const; |
| int search(const std::string &s) const; |
| |
| diff --git a/test/test-cases/regression/variable-TX.json b/test/test-cases/regression/variable-TX.json |
| index 0cd45381b..904628e9b 100644 |
| --- a/test/test-cases/regression/variable-TX.json |
| +++ b/test/test-cases/regression/variable-TX.json |
| @@ -80,5 +80,143 @@ |
| "SecRule REQUEST_HEADERS \"@rx ([A-z]+)\" \"id:1,log,pass,capture,id:14\"", |
| "SecRule TX:0 \"@rx ([A-z]+)\" \"id:15\"" |
| ] |
| + }, |
| + { |
| + "enabled":1, |
| + "version_min":300000, |
| + "title":"Testing Variables :: capture group match after unused group", |
| + "client":{ |
| + "ip":"200.249.12.31", |
| + "port":123 |
| + }, |
| + "server":{ |
| + "ip":"200.249.12.31", |
| + "port":80 |
| + }, |
| + "request":{ |
| + "uri":"/?key=aadd", |
| + "method":"GET" |
| + }, |
| + "response":{ |
| + "headers":{ |
| + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", |
| + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", |
| + "Content-Type":"text/html" |
| + }, |
| + "body":[ |
| + "no need." |
| + ] |
| + }, |
| + "expected":{ |
| + "debug_log":"Added regex subexpression TX\\.3: dd[\\s\\S]*Target value: \"dd\" \\(Variable\\: TX\\:3[\\s\\S]*Rule returned 1" |
| + }, |
| + "rules":[ |
| + "SecRuleEngine On", |
| + "SecRule ARGS \"@rx (aa)(bb|cc)?(dd)\" \"id:1,log,pass,capture,id:16\"", |
| + "SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\"" |
| + ] |
| + }, |
| + { |
| + "enabled":1, |
| + "version_min":300000, |
| + "title":"Testing Variables :: empty capture group match followed by nonempty capture group", |
| + "client":{ |
| + "ip":"200.249.12.31", |
| + "port":123 |
| + }, |
| + "server":{ |
| + "ip":"200.249.12.31", |
| + "port":80 |
| + }, |
| + "request":{ |
| + "uri":"/?key=aadd", |
| + "method":"GET" |
| + }, |
| + "response":{ |
| + "headers":{ |
| + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", |
| + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", |
| + "Content-Type":"text/html" |
| + }, |
| + "body":[ |
| + "no need." |
| + ] |
| + }, |
| + "expected":{ |
| + "debug_log":"Added regex subexpression TX\\.3: dd[\\s\\S]*Target value: \"dd\" \\(Variable\\: TX\\:3[\\s\\S]*Rule returned 1" |
| + }, |
| + "rules":[ |
| + "SecRuleEngine On", |
| + "SecRule ARGS \"@rx (aa)(bb|cc|)(dd)\" \"id:18,phase:1,log,pass,capture\"", |
| + "SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\"" |
| + ] |
| + }, |
| + { |
| + "enabled":1, |
| + "version_min":300000, |
| + "title":"Testing Variables :: repeating capture group -- alternates", |
| + "client":{ |
| + "ip":"200.249.12.31", |
| + "port":123 |
| + }, |
| + "server":{ |
| + "ip":"200.249.12.31", |
| + "port":80 |
| + }, |
| + "request":{ |
| + "uri":"/?key=_abc123_", |
| + "method":"GET" |
| + }, |
| + "response":{ |
| + "headers":{ |
| + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", |
| + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", |
| + "Content-Type":"text/html" |
| + }, |
| + "body":[ |
| + "no need." |
| + ] |
| + }, |
| + "expected":{ |
| + "debug_log":"Added regex subexpression TX\\.2: abc[\\s\\S]*Added regex subexpression TX\\.3: 123" |
| + }, |
| + "rules":[ |
| + "SecRuleEngine On", |
| + "SecRule ARGS \"@rx _((?:(abc)|(123))+)_\" \"id:18,phase:1,log,pass,capture\"" |
| + ] |
| + }, |
| + { |
| + "enabled":1, |
| + "version_min":300000, |
| + "title":"Testing Variables :: repeating capture group -- same (nested)", |
| + "client":{ |
| + "ip":"200.249.12.31", |
| + "port":123 |
| + }, |
| + "server":{ |
| + "ip":"200.249.12.31", |
| + "port":80 |
| + }, |
| + "request":{ |
| + "uri":"/?key=a:5a:8a:9", |
| + "method":"GET" |
| + }, |
| + "response":{ |
| + "headers":{ |
| + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", |
| + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", |
| + "Content-Type":"text/html" |
| + }, |
| + "body":[ |
| + "no need." |
| + ] |
| + }, |
| + "expected":{ |
| + "debug_log":"Added regex subexpression TX\\.1: 5[\\s\\S]*Added regex subexpression TX\\.2: 8[\\s\\S]*Added regex subexpression TX\\.3: 9" |
| + }, |
| + "rules":[ |
| + "SecRuleEngine On", |
| + "SecRule ARGS \"@rx a:([0-9])(?:a:([0-9])(?:a:([0-9]))*)*\" \"id:18,phase:1,log,pass,capture\"" |
| + ] |
| } |
| ] |