From 272786908cf130b7d84324af141dbad6b7695a58 Mon Sep 17 00:00:00 2001 From: Aaron Bungay Date: Sun, 19 Apr 2020 18:22:18 -0400 Subject: [PATCH] smtp/mime: configurable url scheme extraction Parse extract-url-schemes from the mime config. e.g. 'extract-urls-schemes: [http, https, ftp, mailto]' Update MimeDecConfig struct to new url extraction fields. Change app-layer-smtp.c & util-decode-mime.c to initialize new struct fields for MimeDecConfig. Sets the default value for extract-url-schemes if not found in the config to 'extract-urls-schemes: [http]' for backwards compatibility. Uses the schemes defined in the mime config value for extract-urls-schemes to search for URLS starting with those scheme names followed by "://". Logs the URLS with the scheme + '://' at the start if the log-url-scheme is set in the mime config, otherwise the old behaviour is reverted to and the urls are logged with the schemes stripped. Removed unused constant URL_STR now that URLS are being searched for using extract-urls-schemes mime config values instead of just URL's starting with 'http://'. Added commented out new options for extract-urls-schemes and log-url-scheme to suricata.yaml.in. Update FindUrlStrings comments. Remove old outdated comments/commented code from FindUrlStrings. Update test case for mime which now needs schemes list to be set. Add Test Cases for FindUrlStrings() method. Feature: #2054 --- src/app-layer-smtp.c | 66 +++++- src/util-decode-mime.c | 442 ++++++++++++++++++++++++++++++++++------- src/util-decode-mime.h | 4 + suricata.yaml.in | 6 + 4 files changed, 443 insertions(+), 75 deletions(-) diff --git a/src/app-layer-smtp.c b/src/app-layer-smtp.c index 54eed744b2..8389d1812c 100644 --- a/src/app-layer-smtp.c +++ b/src/app-layer-smtp.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2020 Open Information Security Foundation +/* Copyright (C) 2007-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -237,7 +237,8 @@ SCEnumCharMap smtp_reply_map[ ] = { }; /* Create SMTP config structure */ -SMTPConfig smtp_config = { 0, { 0, 0, 0, 0, 0 }, 0, 0, 0, 0, STREAMING_BUFFER_CONFIG_INITIALIZER}; +SMTPConfig smtp_config = { 0, { 0, 0, 0, NULL, false, 0, 0 }, 0, 0, 0, 0, + STREAMING_BUFFER_CONFIG_INITIALIZER }; static SMTPString *SMTPStringAlloc(void); @@ -258,6 +259,7 @@ static void SMTPConfigure(void) { ConfNode *config = ConfGetNode("app-layer.protocols.smtp.mime"); if (config != NULL) { + ConfNode *extract_urls_schemes = NULL; ret = ConfGetChildValueBool(config, "decode-mime", &val); if (ret) { @@ -284,6 +286,66 @@ static void SMTPConfigure(void) { smtp_config.mime_config.extract_urls = val; } + /* Parse extract-urls-schemes from mime config, add '://' suffix to found schemes, + * and provide a default value of 'http' for the schemes to be extracted + * if no schemes are found in the config */ + extract_urls_schemes = ConfNodeLookupChild(config, "extract-urls-schemes"); + if (extract_urls_schemes) { + ConfNode *scheme = NULL; + + TAILQ_FOREACH (scheme, &extract_urls_schemes->head, next) { + /* new_val_len: scheme value from config e.g. 'http' + '://' + null terminator */ + size_t new_val_len = strlen(scheme->val) + 3 + 1; + char *new_val = SCMalloc(new_val_len); + if (unlikely(new_val == NULL)) { + FatalError(SC_ERR_FATAL, "SCMalloc failure."); + } + + int r = snprintf(new_val, new_val_len, "%s://", scheme->val); + if (r < 0 || r >= (int)new_val_len) { + FatalError(SC_ERR_FATAL, "snprintf failure."); + } + + /* replace existing scheme value stored on the linked list with new value including + * '://' suffix */ + SCFree(scheme->val); + scheme->val = new_val; + } + + smtp_config.mime_config.extract_urls_schemes = extract_urls_schemes; + } else { + /* Add default extract url scheme 'http' since + * extract-urls-schemes wasn't found in the config */ + ConfNode *seq_node = ConfNodeNew(); + if (unlikely(seq_node == NULL)) { + FatalError(SC_ERR_FATAL, "ConfNodeNew failure."); + } + ConfNode *scheme = ConfNodeNew(); + if (unlikely(scheme == NULL)) { + FatalError(SC_ERR_FATAL, "ConfNodeNew failure."); + } + + seq_node->name = SCStrdup("extract-urls-schemes"); + if (unlikely(seq_node->name == NULL)) { + FatalError(SC_ERR_FATAL, "SCStrdup failure."); + } + scheme->val = SCStrdup("http"); + if (unlikely(scheme->val == NULL)) { + FatalError(SC_ERR_FATAL, "SCStrdup failure."); + } + + seq_node->is_seq = 1; + TAILQ_INSERT_TAIL(&seq_node->head, scheme, next); + TAILQ_INSERT_TAIL(&config->head, seq_node, next); + + smtp_config.mime_config.extract_urls_schemes = seq_node; + } + + ret = ConfGetChildValueBool(config, "log-url-scheme", &val); + if (ret) { + smtp_config.mime_config.log_url_scheme = val; + } + ret = ConfGetChildValueBool(config, "body-md5", &val); if (ret) { smtp_config.mime_config.body_md5 = val; diff --git a/src/util-decode-mime.c b/src/util-decode-mime.c index 437335d903..bb3d6d5814 100644 --- a/src/util-decode-mime.c +++ b/src/util-decode-mime.c @@ -1,5 +1,5 @@ /* Copyright (C) 2012 BAE Systems - * Copyright (C) 2020 Open Information Security Foundation + * Copyright (C) 2020-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -70,7 +70,6 @@ #define QP_STR "quoted-printable" #define TXT_STR "text/plain" #define HTML_STR "text/html" -#define URL_STR "http://" /* Memory Usage Constants */ #define STACK_FREE_NODES 10 @@ -80,7 +79,7 @@ #define MAX_IP6_CHARS 39 /* Globally hold configuration data */ -static MimeDecConfig mime_dec_config = { 1, 1, 1, 0, MAX_HEADER_VALUE }; +static MimeDecConfig mime_dec_config = { 1, 1, 1, NULL, false, 0, MAX_HEADER_VALUE }; /* Mime Parser String translation */ static const char *StateFlags[] = { "NONE", @@ -1003,12 +1002,11 @@ static MimeDecUrl *FindExistingUrl(MimeDecEntity *entity, uint8_t *url, uint32_t /** * \brief This function searches a text or html line for a URL string * - * URLS are generally truncated to the 'host.domain' format because - * some email messages contain dozens or even hundreds of URLs with - * the same host, but with only small variations in path. + * The URL strings are searched for using the URL schemes defined in the global + * MIME config e.g. "http", "https". * - * The exception is that URLs with executable file extensions are stored - * with the full path. They are stored in lowercase. + * The found URL strings are stored in lowercase and with their schemes + * stripped unless the MIME config flag for log_url_scheme is set. * * Numeric IPs, malformed numeric IPs, and URLs pointing to executables are * also flagged as URLs of interest. @@ -1024,88 +1022,101 @@ static int FindUrlStrings(const uint8_t *line, uint32_t len, { int ret = MIME_DEC_OK; MimeDecEntity *entity = (MimeDecEntity *) state->stack->top->data; - uint8_t *fptr, *remptr, *tok = NULL, *tempUrl; - uint32_t tokLen = 0, i, tempUrlLen; - uint8_t urlStrLen = 0, flags = 0; + MimeDecConfig *mdcfg = MimeDecGetConfig(); + uint8_t *fptr, *remptr, *tok = NULL, *tempUrl, *urlHost; + uint32_t tokLen = 0, i, tempUrlLen, urlHostLen; + uint8_t schemeStrLen = 0, flags = 0; + ConfNode *scheme = NULL; + char *schemeStr = NULL; + + if (mdcfg != NULL && mdcfg->extract_urls_schemes == NULL) { + SCLogDebug("Error: MIME config extract_urls_schemes was NULL."); + return MIME_DEC_ERR_DATA; + } - remptr = (uint8_t *)line; - do { - SCLogDebug("Looking for URL String starting with: %s", URL_STR); - - /* Check for token definition */ - fptr = FindBuffer(remptr, len - (remptr - line), (uint8_t *)URL_STR, strlen(URL_STR)); - if (fptr != NULL) { - - urlStrLen = strlen(URL_STR); - fptr += urlStrLen; /* Start at end of start string */ - tok = GetToken(fptr, len - (fptr - line), " \"\'<>]\t", &remptr, - &tokLen); - if (tok == fptr) { - SCLogDebug("Found url string"); - - /* First copy to temp URL string */ - tempUrl = SCMalloc(urlStrLen + tokLen); - if (unlikely(tempUrl == NULL)) { - SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); - return MIME_DEC_ERR_MEM; - } + TAILQ_FOREACH (scheme, &mdcfg->extract_urls_schemes->head, next) { + schemeStr = scheme->val; + schemeStrLen = strlen(schemeStr); - PrintChars(SC_LOG_DEBUG, "RAW URL", tok, tokLen); + remptr = (uint8_t *)line; + do { + SCLogDebug("Looking for URL String starting with: %s", schemeStr); + + /* Check for token definition */ + fptr = FindBuffer(remptr, len - (remptr - line), (uint8_t *)schemeStr, schemeStrLen); + if (fptr != NULL) { + if (!mdcfg->log_url_scheme) { + fptr += schemeStrLen; /* Strip scheme from stored URL */ + } + tok = GetToken(fptr, len - (fptr - line), " \"\'<>]\t", &remptr, &tokLen); + if (tok == fptr) { + SCLogDebug("Found url string"); + + /* First copy to temp URL string */ + tempUrl = SCMalloc(tokLen); + if (unlikely(tempUrl == NULL)) { + SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); + return MIME_DEC_ERR_MEM; + } - /* Copy over to temp URL while decoding */ - tempUrlLen = 0; - for (i = 0; i < tokLen && tok[i] != 0; i++) { + PrintChars(SC_LOG_DEBUG, "RAW URL", tok, tokLen); - // URL decoding would probably go here + /* Copy over to temp URL while decoding */ + tempUrlLen = 0; + for (i = 0; i < tokLen && tok[i] != 0; i++) { + /* url is all lowercase */ + tempUrl[tempUrlLen] = tolower(tok[i]); + tempUrlLen++; + } - /* url is all lowercase */ - tempUrl[tempUrlLen] = tolower(tok[i]); - tempUrlLen++; - } + urlHost = tempUrl; + urlHostLen = tempUrlLen; + if (mdcfg->log_url_scheme) { + /* tempUrl contains the scheme in the string but + * IsIpv4Host & IsPv6Host methods below require + * an input URL string with scheme stripped. Get a + * reference sub-string urlHost which starts with + * the host instead of the scheme. */ + urlHost += schemeStrLen; + urlHostLen -= schemeStrLen; + } - /* Determine if URL points to an EXE */ - if (IsExeUrl(tempUrl, tempUrlLen)) { - flags |= URL_IS_EXE; + /* Determine if URL points to an EXE */ + if (IsExeUrl(tempUrl, tempUrlLen)) { + flags |= URL_IS_EXE; - PrintChars(SC_LOG_DEBUG, "EXE URL", tempUrl, tempUrlLen); - } else { - /* Not an EXE URL */ - /* Cut off length at first '/' */ - /* If seems that BAESystems had done the following - in support of PEScan. We don't want it for logging. - Therefore its been removed. - tok = FindString(tempUrl, tempUrlLen, "/"); - if (tok != NULL) { - tempUrlLen = tok - tempUrl; + PrintChars(SC_LOG_DEBUG, "EXE URL", tempUrl, tempUrlLen); } - */ - } - /* Make sure remaining URL exists */ - if (tempUrlLen > 0) { - if (!(FindExistingUrl(entity, tempUrl, tempUrlLen))) { - /* Now look for numeric IP */ - if (IsIpv4Host(tempUrl, tempUrlLen)) { - flags |= URL_IS_IP4; + /* Make sure remaining URL exists */ + if (tempUrlLen > 0) { + if (!(FindExistingUrl(entity, tempUrl, tempUrlLen))) { + /* Now look for numeric IP */ + if (IsIpv4Host(urlHost, urlHostLen)) { + flags |= URL_IS_IP4; - PrintChars(SC_LOG_DEBUG, "IP URL4", tempUrl, tempUrlLen); - } else if (IsIpv6Host(tempUrl, tempUrlLen)) { - flags |= URL_IS_IP6; + PrintChars(SC_LOG_DEBUG, "IP URL4", tempUrl, tempUrlLen); + } else if (IsIpv6Host(urlHost, urlHostLen)) { + flags |= URL_IS_IP6; - PrintChars(SC_LOG_DEBUG, "IP URL6", tempUrl, tempUrlLen); - } + PrintChars(SC_LOG_DEBUG, "IP URL6", tempUrl, tempUrlLen); + } - /* Add URL list item */ - MimeDecAddUrl(entity, tempUrl, tempUrlLen, flags); + /* Add URL list item */ + MimeDecAddUrl(entity, tempUrl, tempUrlLen, flags); + } else { + SCFree(tempUrl); + } } else { SCFree(tempUrl); } - } else { - SCFree(tempUrl); + + /* Reset flags for next URL */ + flags = 0; } } - } - } while (fptr != NULL); + } while (fptr != NULL); + } return ret; } @@ -2737,9 +2748,20 @@ static int MimeDecParseLineTest02(void) uint32_t expected_count = 2; uint32_t line_count = 0; + ConfNode *url_schemes = ConfNodeNew(); + ConfNode *scheme = ConfNodeNew(); + FAIL_IF_NULL(url_schemes); + FAIL_IF_NULL(scheme); + + url_schemes->is_seq = 1; + scheme->val = SCStrdup("http://"); + FAIL_IF_NULL(scheme->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme, next); + MimeDecGetConfig()->decode_base64 = 1; MimeDecGetConfig()->decode_quoted_printable = 1; MimeDecGetConfig()->extract_urls = 1; + MimeDecGetConfig()->extract_urls_schemes = url_schemes; /* Init parser */ MimeDecParseState *state = MimeDecInitParser(&line_count, @@ -2785,6 +2807,9 @@ static int MimeDecParseLineTest02(void) /* De Init parser */ MimeDecDeInitParser(state); + ConfNodeFree(url_schemes); + MimeDecGetConfig()->extract_urls_schemes = NULL; + SCLogInfo("LINE COUNT FINISHED: %d", line_count); if (expected_count != line_count) { @@ -2796,6 +2821,272 @@ static int MimeDecParseLineTest02(void) return 1; } +/* Test error case where no url schemes set in config */ +static int MimeFindUrlStringsTest01(void) +{ + int ret = MIME_DEC_OK; + uint32_t line_count = 0; + + MimeDecGetConfig()->extract_urls = 1; + MimeDecGetConfig()->extract_urls_schemes = NULL; + MimeDecGetConfig()->log_url_scheme = false; + + /* Init parser */ + MimeDecParseState *state = MimeDecInitParser(&line_count, TestDataChunkCallback); + + const char *str = "test"; + ret = FindUrlStrings((uint8_t *)str, strlen(str), state); + /* Expected error since extract_url_schemes is NULL */ + FAIL_IF_NOT(ret == MIME_DEC_ERR_DATA); + + /* Completed */ + ret = MimeDecParseComplete(state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + MimeDecEntity *msg = state->msg; + MimeDecFreeEntity(msg); + + /* De Init parser */ + MimeDecDeInitParser(state); + + PASS; +} + +/* Test simple case of URL extraction */ +static int MimeFindUrlStringsTest02(void) +{ + int ret = MIME_DEC_OK; + uint32_t line_count = 0; + ConfNode *url_schemes = ConfNodeNew(); + ConfNode *scheme = ConfNodeNew(); + FAIL_IF_NULL(url_schemes); + FAIL_IF_NULL(scheme); + + url_schemes->is_seq = 1; + scheme->val = SCStrdup("http://"); + FAIL_IF_NULL(scheme->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme, next); + + MimeDecGetConfig()->extract_urls = 1; + MimeDecGetConfig()->extract_urls_schemes = url_schemes; + MimeDecGetConfig()->log_url_scheme = false; + + /* Init parser */ + MimeDecParseState *state = MimeDecInitParser(&line_count, TestDataChunkCallback); + + const char *str = "A simple message click on " + "http://www.test.com/malware.exe? " + "hahah hopefully you click this link"; + ret = FindUrlStrings((uint8_t *)str, strlen(str), state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + /* Completed */ + ret = MimeDecParseComplete(state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + MimeDecEntity *msg = state->msg; + + FAIL_IF(msg->url_list == NULL); + + FAIL_IF_NOT(msg->url_list->url_flags & URL_IS_EXE); + FAIL_IF_NOT( + memcmp("www.test.com/malware.exe?", msg->url_list->url, msg->url_list->url_len) == 0); + + MimeDecFreeEntity(msg); + + /* De Init parser */ + MimeDecDeInitParser(state); + + ConfNodeFree(url_schemes); + MimeDecGetConfig()->extract_urls_schemes = NULL; + + PASS; +} + +/* Test URL extraction with multiple schemes and URLs */ +static int MimeFindUrlStringsTest03(void) +{ + int ret = MIME_DEC_OK; + uint32_t line_count = 0; + ConfNode *url_schemes = ConfNodeNew(); + ConfNode *scheme1 = ConfNodeNew(); + ConfNode *scheme2 = ConfNodeNew(); + FAIL_IF_NULL(url_schemes); + FAIL_IF_NULL(scheme1); + FAIL_IF_NULL(scheme2); + + url_schemes->is_seq = 1; + scheme1->val = SCStrdup("http://"); + FAIL_IF_NULL(scheme1->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme1, next); + scheme2->val = SCStrdup("https://"); + FAIL_IF_NULL(scheme2->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme2, next); + + MimeDecGetConfig()->extract_urls = 1; + MimeDecGetConfig()->extract_urls_schemes = url_schemes; + MimeDecGetConfig()->log_url_scheme = false; + + /* Init parser */ + MimeDecParseState *state = MimeDecInitParser(&line_count, TestDataChunkCallback); + + const char *str = "A simple message click on " + "http://www.test.com/malware.exe? " + "hahah hopefully you click this link, or " + "you can go to http://www.test.com/test/01.html and " + "https://www.test.com/test/02.php"; + ret = FindUrlStrings((uint8_t *)str, strlen(str), state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + /* Completed */ + ret = MimeDecParseComplete(state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + MimeDecEntity *msg = state->msg; + + FAIL_IF(msg->url_list == NULL); + + MimeDecUrl *url = msg->url_list; + FAIL_IF_NOT(memcmp("www.test.com/test/02.php", url->url, url->url_len) == 0); + + url = url->next; + FAIL_IF_NOT(memcmp("www.test.com/test/01.html", url->url, url->url_len) == 0); + + url = url->next; + FAIL_IF_NOT(memcmp("www.test.com/malware.exe?", url->url, url->url_len) == 0); + + MimeDecFreeEntity(msg); + + /* De Init parser */ + MimeDecDeInitParser(state); + + ConfNodeFree(url_schemes); + MimeDecGetConfig()->extract_urls_schemes = NULL; + + PASS; +} + +/* Test URL extraction with multiple schemes and URLs with + * log_url_scheme enabled in the MIME config */ +static int MimeFindUrlStringsTest04(void) +{ + int ret = MIME_DEC_OK; + uint32_t line_count = 0; + ConfNode *url_schemes = ConfNodeNew(); + ConfNode *scheme1 = ConfNodeNew(); + ConfNode *scheme2 = ConfNodeNew(); + FAIL_IF_NULL(url_schemes); + FAIL_IF_NULL(scheme1); + FAIL_IF_NULL(scheme2); + + url_schemes->is_seq = 1; + scheme1->val = SCStrdup("http://"); + FAIL_IF_NULL(scheme1->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme1, next); + scheme2->val = SCStrdup("https://"); + FAIL_IF_NULL(scheme2->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme2, next); + + MimeDecGetConfig()->extract_urls = 1; + MimeDecGetConfig()->extract_urls_schemes = url_schemes; + MimeDecGetConfig()->log_url_scheme = true; + + /* Init parser */ + MimeDecParseState *state = MimeDecInitParser(&line_count, TestDataChunkCallback); + + const char *str = "A simple message click on " + "http://www.test.com/malware.exe? " + "hahah hopefully you click this link, or " + "you can go to http://www.test.com/test/01.html and " + "https://www.test.com/test/02.php"; + ret = FindUrlStrings((uint8_t *)str, strlen(str), state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + /* Completed */ + ret = MimeDecParseComplete(state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + MimeDecEntity *msg = state->msg; + + FAIL_IF(msg->url_list == NULL); + + MimeDecUrl *url = msg->url_list; + FAIL_IF_NOT(memcmp("https://www.test.com/test/02.php", url->url, url->url_len) == 0); + + url = url->next; + FAIL_IF_NOT(memcmp("http://www.test.com/test/01.html", url->url, url->url_len) == 0); + + url = url->next; + FAIL_IF_NOT(memcmp("http://www.test.com/malware.exe?", url->url, url->url_len) == 0); + + MimeDecFreeEntity(msg); + + /* De Init parser */ + MimeDecDeInitParser(state); + + ConfNodeFree(url_schemes); + MimeDecGetConfig()->extract_urls_schemes = NULL; + + PASS; +} + +/* Test URL extraction of IPV4 and IPV6 URLs with log_url_scheme + * enabled in the MIME config */ +static int MimeFindUrlStringsTest05(void) +{ + int ret = MIME_DEC_OK; + uint32_t line_count = 0; + ConfNode *url_schemes = ConfNodeNew(); + ConfNode *scheme = ConfNodeNew(); + FAIL_IF_NULL(url_schemes); + FAIL_IF_NULL(scheme); + + url_schemes->is_seq = 1; + scheme->val = SCStrdup("http://"); + FAIL_IF_NULL(scheme->val); + TAILQ_INSERT_TAIL(&url_schemes->head, scheme, next); + + MimeDecGetConfig()->extract_urls = 1; + MimeDecGetConfig()->extract_urls_schemes = url_schemes; + MimeDecGetConfig()->log_url_scheme = true; + + /* Init parser */ + MimeDecParseState *state = MimeDecInitParser(&line_count, TestDataChunkCallback); + + const char *str = "A simple message click on " + "http://192.168.1.1/test/01.html " + "hahah hopefully you click this link or this one " + "http://0:0:0:0:0:0:0:0/test/02.php"; + ret = FindUrlStrings((uint8_t *)str, strlen(str), state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + /* Completed */ + ret = MimeDecParseComplete(state); + FAIL_IF_NOT(ret == MIME_DEC_OK); + + MimeDecEntity *msg = state->msg; + + FAIL_IF(msg->url_list == NULL); + + MimeDecUrl *url = msg->url_list; + FAIL_IF_NOT(url->url_flags & URL_IS_IP6); + FAIL_IF_NOT(memcmp("http://0:0:0:0:0:0:0:0/test/02.php", url->url, url->url_len) == 0); + + url = url->next; + FAIL_IF_NOT(url->url_flags & URL_IS_IP4); + FAIL_IF_NOT(memcmp("http://192.168.1.1/test/01.html", url->url, url->url_len) == 0); + + MimeDecFreeEntity(msg); + + /* De Init parser */ + MimeDecDeInitParser(state); + + ConfNodeFree(url_schemes); + MimeDecGetConfig()->extract_urls_schemes = NULL; + + PASS; +} + /* Test full message with linebreaks */ static int MimeDecParseFullMsgTest01(void) { @@ -3093,6 +3384,11 @@ void MimeDecRegisterTests(void) #ifdef UNITTESTS UtRegisterTest("MimeDecParseLineTest01", MimeDecParseLineTest01); UtRegisterTest("MimeDecParseLineTest02", MimeDecParseLineTest02); + UtRegisterTest("MimeFindUrlStringsTest01", MimeFindUrlStringsTest01); + UtRegisterTest("MimeFindUrlStringsTest02", MimeFindUrlStringsTest02); + UtRegisterTest("MimeFindUrlStringsTest03", MimeFindUrlStringsTest03); + UtRegisterTest("MimeFindUrlStringsTest04", MimeFindUrlStringsTest04); + UtRegisterTest("MimeFindUrlStringsTest05", MimeFindUrlStringsTest05); UtRegisterTest("MimeDecParseFullMsgTest01", MimeDecParseFullMsgTest01); UtRegisterTest("MimeDecParseFullMsgTest02", MimeDecParseFullMsgTest02); UtRegisterTest("MimeBase64DecodeTest01", MimeBase64DecodeTest01); diff --git a/src/util-decode-mime.h b/src/util-decode-mime.h index 0baed68bf1..99b5b404b8 100644 --- a/src/util-decode-mime.h +++ b/src/util-decode-mime.h @@ -1,4 +1,5 @@ /* Copyright (C) 2012 BAE Systems + * Copyright (C) 2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -96,6 +97,9 @@ typedef struct MimeDecConfig { int decode_base64; /**< Decode base64 bodies */ int decode_quoted_printable; /**< Decode quoted-printable bodies */ int extract_urls; /**< Extract and store URLs in data structure */ + ConfNode *extract_urls_schemes; /**< List of schemes of which to + extract urls */ + bool log_url_scheme; /**< Log the scheme of extracted URLs */ int body_md5; /**< Compute md5 sum of body */ uint32_t header_value_depth; /**< Depth of which to store header values (Default is 2000) */ diff --git a/suricata.yaml.in b/suricata.yaml.in index b674a40090..d2a1f9f1e2 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -854,6 +854,12 @@ app-layer: # Extract URLs and save in state data structure extract-urls: yes + # Scheme of URLs to extract + # (default is [http]) + #extract-urls-schemes: [http, https, ftp, mailto] + # Log the scheme of URLs that are extracted + # (default is no) + #log-url-scheme: yes # Set to yes to compute the md5 of the mail body. You will then # be able to journalize it. body-md5: no