http: use rust for mime parsing

Ticket: #3487
pull/11227/head
Philippe Antoine 3 years ago committed by Victor Julien
parent ddb3a0c9de
commit 5f75b9a6e3

@ -19,9 +19,9 @@
use crate::common::nom7::take_until_and_consume;
use nom7::branch::alt;
use nom7::bytes::complete::{take_till, take_until, take_while};
use nom7::bytes::complete::{tag, take, take_till, take_until, take_while};
use nom7::character::complete::char;
use nom7::combinator::{complete, opt, rest};
use nom7::combinator::{complete, opt, rest, value};
use nom7::error::{make_error, ErrorKind};
use nom7::{Err, IResult};
use std;
@ -42,7 +42,7 @@ fn mime_parse_value_delimited(input: &[u8]) -> IResult<&[u8], &[u8]> {
if input[i] == b'"' && !escaping {
return Ok((&input[i + 1..], &input[..i]));
}
//TODOmime unescape later
// unescape can be processed later
escaping = false;
}
}
@ -55,8 +55,8 @@ fn mime_parse_value_delimited(input: &[u8]) -> IResult<&[u8], &[u8]> {
fn mime_parse_value_until(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, value) = alt((take_till(|ch: u8| ch == b';'), rest))(input)?;
for i in 0..value.len() {
if !is_mime_space(value[value.len()-i-1]) {
return Ok((input, &value[..value.len()-i]));
if !is_mime_space(value[value.len() - i - 1]) {
return Ok((input, &value[..value.len() - i]));
}
}
return Ok((input, value));
@ -176,6 +176,360 @@ pub unsafe extern "C" fn rs_mime_find_header_token(
return false;
}
#[derive(Debug)]
enum MimeParserState {
MimeStart = 0,
MimeHeader = 1,
MimeHeaderEnd = 2,
MimeChunk = 3,
MimeBoundaryWaitingForEol = 4,
}
impl Default for MimeParserState {
fn default() -> Self {
MimeParserState::MimeStart
}
}
#[derive(Debug, Default)]
pub struct MimeStateHTTP {
boundary: Vec<u8>,
filename: Vec<u8>,
state: MimeParserState,
}
#[repr(u8)]
#[derive(Copy, Clone, PartialOrd, PartialEq)]
pub enum MimeParserResult {
MimeNeedsMore = 0,
MimeFileOpen = 1,
MimeFileChunk = 2,
MimeFileClose = 3,
}
fn mime_parse_skip_line(input: &[u8]) -> IResult<&[u8], MimeParserState> {
let (input, _) = take_till(|ch: u8| ch == b'\n')(input)?;
let (input, _) = char('\n')(input)?;
return Ok((input, MimeParserState::MimeStart));
}
fn mime_parse_boundary_regular<'a, 'b>(
boundary: &'b [u8], input: &'a [u8],
) -> IResult<&'a [u8], MimeParserState> {
let (input, _) = tag(boundary)(input)?;
let (input, _) = take_till(|ch: u8| ch == b'\n')(input)?;
let (input, _) = char('\n')(input)?;
return Ok((input, MimeParserState::MimeHeader));
}
// Number of characters after boundary, without end of line, before changing state to streaming
const MIME_BOUNDARY_MAX_BEFORE_EOL: usize = 128;
const MIME_HEADER_MAX_LINE: usize = 4096;
fn mime_parse_boundary_missing_eol<'a, 'b>(
boundary: &'b [u8], input: &'a [u8],
) -> IResult<&'a [u8], MimeParserState> {
let (input, _) = tag(boundary)(input)?;
let (input, _) = take(MIME_BOUNDARY_MAX_BEFORE_EOL)(input)?;
return Ok((input, MimeParserState::MimeBoundaryWaitingForEol));
}
fn mime_parse_boundary<'a, 'b>(
boundary: &'b [u8], input: &'a [u8],
) -> IResult<&'a [u8], MimeParserState> {
let r = mime_parse_boundary_regular(boundary, input);
if r.is_ok() {
return r;
}
let r2 = mime_parse_skip_line(input);
if r2.is_ok() {
return r2;
}
return mime_parse_boundary_missing_eol(boundary, input);
}
fn mime_consume_until_eol(input: &[u8]) -> IResult<&[u8], bool> {
return alt((value(true, mime_parse_skip_line), value(false, rest)))(input);
}
fn mime_parse_header_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, name) = take_till(|ch: u8| ch == b':')(input)?;
let (input, _) = char(':')(input)?;
return Ok((input, name));
}
// s2 is already lower case
fn rs_equals_lowercase(s1: &[u8], s2: &[u8]) -> bool {
if s1.len() == s2.len() {
for i in 0..s1.len() {
if s1[i].to_ascii_lowercase() != s2[i] {
return false;
}
}
return true;
}
return false;
}
fn mime_parse_headers<'a, 'b>(
ctx: &'b mut MimeStateHTTP, i: &'a [u8],
) -> IResult<&'a [u8], (MimeParserState, bool, bool)> {
let mut fileopen = false;
let mut errored = false;
let mut input = i;
while input.len() > 0 {
match take_until::<_, &[u8], nom7::error::Error<&[u8]>>("\r\n")(input) {
Ok((input2, line)) => {
match mime_parse_header_line(line) {
Ok((value, name)) => {
if rs_equals_lowercase(name, "content-disposition".as_bytes()) {
let mut sections_values = Vec::new();
if let Ok(filename) = mime_find_header_token(
value,
"filename".as_bytes(),
&mut sections_values,
) {
if filename.len() > 0 {
ctx.filename = Vec::with_capacity(filename.len());
fileopen = true;
for c in filename {
// unescape
if *c != b'\\' {
ctx.filename.push(*c);
}
}
}
}
}
if value.len() == 0 {
errored = true;
}
}
_ => {
if line.len() > 0 {
errored = true;
}
}
}
let (input3, _) = tag("\r\n")(input2)?;
input = input3;
if line.len() == 0 || (line.len() == 1 && line[0] == b'\r') {
return Ok((input, (MimeParserState::MimeHeaderEnd, fileopen, errored)));
}
}
_ => {
// guard against too long header lines
if input.len() > MIME_HEADER_MAX_LINE {
return Ok((
input,
(
MimeParserState::MimeBoundaryWaitingForEol,
fileopen,
errored,
),
));
}
if input.len() < i.len() {
return Ok((input, (MimeParserState::MimeHeader, fileopen, errored)));
} // else only an incomplete line, ask for more
return Err(Err::Error(make_error(input, ErrorKind::Eof)));
}
}
}
return Ok((input, (MimeParserState::MimeHeader, fileopen, errored)));
}
fn mime_consume_chunk<'a, 'b>(boundary: &'b [u8], input: &'a [u8]) -> IResult<&'a [u8], bool> {
let r: Result<(&[u8], &[u8]), Err<nom7::error::Error<&[u8]>>> = take_until("\r\n")(input);
match r {
Ok((input, line)) => {
let (input2, _) = tag("\r\n")(input)?;
if input2.len() < boundary.len() {
if input2 == &boundary[..input2.len()] {
if line.len() > 0 {
// consume as chunk up to eol (not consuming eol)
return Ok((input, false));
}
// new line beignning like boundary, with nothin to consume as chunk : request more
return Err(Err::Error(make_error(input, ErrorKind::Eof)));
}
// not like boundary : consume everything as chunk
return Ok((&input[input.len()..], false));
} // else
if &input2[..boundary.len()] == boundary {
// end of file with boundary, consume eol but do not consume boundary
return Ok((input2, true));
}
// not like boundary : consume everything as chunk
return Ok((input2, false));
}
_ => {
return Ok((&input[input.len()..], false));
}
}
}
pub const MIME_EVENT_FLAG_INVALID_HEADER: u32 = 0x01;
pub const MIME_EVENT_FLAG_NO_FILEDATA: u32 = 0x02;
fn mime_process(ctx: &mut MimeStateHTTP, i: &[u8]) -> (MimeParserResult, u32, u32) {
let mut input = i;
let mut consumed = 0;
let mut warnings = 0;
while input.len() > 0 {
match ctx.state {
MimeParserState::MimeStart => {
if let Ok((rem, next)) = mime_parse_boundary(&ctx.boundary, input) {
ctx.state = next;
consumed += (input.len() - rem.len()) as u32;
input = rem;
} else {
return (MimeParserResult::MimeNeedsMore, consumed, warnings);
}
}
MimeParserState::MimeBoundaryWaitingForEol => {
if let Ok((rem, found)) = mime_consume_until_eol(input) {
if found {
ctx.state = MimeParserState::MimeHeader;
}
consumed += (input.len() - rem.len()) as u32;
input = rem;
} else {
// should never happen
return (MimeParserResult::MimeNeedsMore, consumed, warnings);
}
}
MimeParserState::MimeHeader => {
if let Ok((rem, (next, fileopen, err))) = mime_parse_headers(ctx, input) {
ctx.state = next;
consumed += (input.len() - rem.len()) as u32;
input = rem;
if err {
warnings |= MIME_EVENT_FLAG_INVALID_HEADER;
}
if fileopen {
return (MimeParserResult::MimeFileOpen, consumed, warnings);
}
} else {
return (MimeParserResult::MimeNeedsMore, consumed, warnings);
}
}
MimeParserState::MimeHeaderEnd => {
// check if we start with the boundary
// and transition to chunk, or empty file and back to start
if input.len() < ctx.boundary.len() {
if input == &ctx.boundary[..input.len()] {
return (MimeParserResult::MimeNeedsMore, consumed, warnings);
}
ctx.state = MimeParserState::MimeChunk;
} else {
if &input[..ctx.boundary.len()] == ctx.boundary {
ctx.state = MimeParserState::MimeStart;
if ctx.filename.len() > 0 {
warnings |= MIME_EVENT_FLAG_NO_FILEDATA;
}
ctx.filename.clear();
return (MimeParserResult::MimeFileClose, consumed, warnings);
} else {
ctx.state = MimeParserState::MimeChunk;
}
}
}
MimeParserState::MimeChunk => {
if let Ok((rem, eof)) = mime_consume_chunk(&ctx.boundary, input) {
consumed += (input.len() - rem.len()) as u32;
if eof {
ctx.state = MimeParserState::MimeStart;
ctx.filename.clear();
return (MimeParserResult::MimeFileClose, consumed, warnings);
} else {
// + 2 for \r\n
if rem.len() < ctx.boundary.len() + 2 {
return (MimeParserResult::MimeFileChunk, consumed, warnings);
}
input = rem;
}
} else {
return (MimeParserResult::MimeNeedsMore, consumed, warnings);
}
}
}
}
return (MimeParserResult::MimeNeedsMore, consumed, warnings);
}
pub fn mime_state_init(i: &[u8]) -> Option<MimeStateHTTP> {
let mut sections_values = Vec::new();
match mime_find_header_token(i, "boundary".as_bytes(), &mut sections_values) {
Ok(value) => {
if value.len() <= RS_MIME_MAX_TOKEN_LEN {
let mut r = MimeStateHTTP::default();
r.boundary = Vec::with_capacity(2 + value.len());
// start wih 2 additional hyphens
r.boundary.push(b'-');
r.boundary.push(b'-');
for c in value {
// unescape
if *c != b'\\' {
r.boundary.push(*c);
}
}
return Some(r);
}
}
_ => {}
}
return None;
}
#[no_mangle]
pub unsafe extern "C" fn rs_mime_state_init(
input: *const u8, input_len: u32,
) -> *mut MimeStateHTTP {
let slice = build_slice!(input, input_len as usize);
if let Some(ctx) = mime_state_init(slice) {
let boxed = Box::new(ctx);
return Box::into_raw(boxed) as *mut _;
}
return std::ptr::null_mut();
}
#[no_mangle]
pub unsafe extern "C" fn rs_mime_parse(
ctx: &mut MimeStateHTTP, input: *const u8, input_len: u32, consumed: *mut u32,
warnings: *mut u32,
) -> MimeParserResult {
let slice = build_slice!(input, input_len as usize);
let (r, c, w) = mime_process(ctx, slice);
*consumed = c;
*warnings = w;
return r;
}
#[no_mangle]
pub unsafe extern "C" fn rs_mime_state_get_filename(
ctx: &mut MimeStateHTTP, buffer: *mut *const u8, filename_len: *mut u16,
) {
if ctx.filename.len() > 0 {
*buffer = ctx.filename.as_ptr();
if ctx.filename.len() < u16::MAX.into() {
*filename_len = ctx.filename.len() as u16;
} else {
*filename_len = u16::MAX;
}
} else {
*buffer = std::ptr::null_mut();
*filename_len = 0;
}
}
#[no_mangle]
pub unsafe extern "C" fn rs_mime_state_free(ctx: &mut MimeStateHTTP) {
// Just unbox...
std::mem::drop(Box::from_raw(ctx));
}
#[cfg(test)]
mod test {
use super::*;

@ -99,6 +99,10 @@ StreamingBufferConfig htp_sbcfg = STREAMING_BUFFER_CONFIG_INITIALIZER;
/** Limit to the number of libhtp messages that can be handled */
#define HTP_MAX_MESSAGES 512
/** a boundary should be smaller in size */
// RFC 2046 states that max boundary size is 70
#define HTP_BOUNDARY_MAX 200U
SC_ATOMIC_DECLARE(uint32_t, htp_config_flags);
#ifdef DEBUG
@ -368,8 +372,8 @@ static void HtpTxUserDataFree(HtpState *state, HtpTxUserData *htud)
if (htud->response_headers_raw)
HTPFree(htud->response_headers_raw, htud->response_headers_raw_len);
AppLayerDecoderEventsFreeEvents(&htud->tx_data.events);
if (htud->boundary)
HTPFree(htud->boundary, htud->boundary_len);
if (htud->mime_state)
rs_mime_state_free(htud->mime_state);
if (htud->tx_data.de_state != NULL) {
DetectEngineStateFree(htud->tx_data.de_state);
}
@ -1114,92 +1118,6 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len,
return 0;
}
/**
* \param name /Lowercase/ version of the variable name
*/
static int HTTPParseContentTypeHeader(uint8_t *name, size_t name_len,
uint8_t *data, size_t len, uint8_t **retptr, size_t *retlen)
{
SCEnter();
#ifdef PRINT
printf("DATA START: \n");
PrintRawDataFp(stdout, data, len);
printf("DATA END: \n");
#endif
size_t x;
int quote = 0;
for (x = 0; x < len; x++) {
if (!(isspace(data[x])))
break;
}
if (x >= len) {
SCReturnInt(0);
}
uint8_t *line = data+x;
size_t line_len = len-x;
size_t offset = 0;
#ifdef PRINT
printf("LINE START: \n");
PrintRawDataFp(stdout, line, line_len);
printf("LINE END: \n");
#endif
for (x = 0 ; x < line_len; x++) {
if (x > 0) {
if (line[x - 1] != '\\' && line[x] == '\"') {
quote++;
}
if (((line[x - 1] != '\\' && line[x] == ';') || ((x + 1) == line_len)) && (quote == 0 || quote % 2 == 0)) {
uint8_t *token = line + offset;
size_t token_len = x - offset;
if ((x + 1) == line_len) {
token_len++;
}
offset = x + 1;
while (offset < line_len && isspace(line[offset])) {
x++;
offset++;
}
#ifdef PRINT
printf("TOKEN START: \n");
PrintRawDataFp(stdout, token, token_len);
printf("TOKEN END: \n");
#endif
if (token_len > name_len) {
if (name == NULL || SCMemcmpLowercase(name, token, name_len) == 0) {
uint8_t *value = token + name_len;
size_t value_len = token_len - name_len;
if (value[0] == '\"') {
value++;
value_len--;
}
if (value[value_len-1] == '\"') {
value_len--;
}
#ifdef PRINT
printf("VALUE START: \n");
PrintRawDataFp(stdout, value, value_len);
printf("VALUE END: \n");
#endif
*retptr = value;
*retlen = value_len;
SCReturnInt(1);
}
}
}
}
}
SCReturnInt(0);
}
/**
* \brief setup multipart parsing: extract boundary and store it
*
@ -1218,123 +1136,15 @@ static int HtpRequestBodySetupMultipart(htp_tx_t *tx, HtpTxUserData *htud)
htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers,
"Content-Type");
if (h != NULL && bstr_len(h->value) > 0) {
uint8_t *boundary = NULL;
size_t boundary_len = 0;
int r = HTTPParseContentTypeHeader((uint8_t *)"boundary=", 9,
(uint8_t *) bstr_ptr(h->value), bstr_len(h->value),
&boundary, &boundary_len);
if (r == 1) {
#ifdef PRINT
printf("BOUNDARY START: \n");
PrintRawDataFp(stdout, boundary, boundary_len);
printf("BOUNDARY END: \n");
#endif
if (boundary_len < HTP_BOUNDARY_MAX) {
htud->boundary = HTPMalloc(boundary_len);
if (htud->boundary == NULL) {
return -1;
}
htud->boundary_len = (uint8_t)boundary_len;
memcpy(htud->boundary, boundary, boundary_len);
htud->tsflags |= HTP_BOUNDARY_SET;
} else {
SCLogDebug("invalid boundary");
return -1;
}
htud->mime_state = rs_mime_state_init(bstr_ptr(h->value), bstr_len(h->value));
if (htud->mime_state) {
htud->tsflags |= HTP_BOUNDARY_SET;
SCReturnInt(1);
}
//SCReturnInt(1);
}
SCReturnInt(0);
}
#define C_D_HDR "content-disposition:"
#define C_D_HDR_LEN 20
#define C_T_HDR "content-type:"
#define C_T_HDR_LEN 13
static void HtpRequestBodyMultipartParseHeader(HtpState *hstate,
HtpTxUserData *htud,
uint8_t *header, uint32_t header_len,
uint8_t **filename, uint16_t *filename_len,
uint8_t **filetype, uint16_t *filetype_len)
{
uint8_t *fn = NULL;
size_t fn_len = 0;
uint8_t *ft = NULL;
size_t ft_len = 0;
#ifdef PRINT
printf("HEADER START: \n");
PrintRawDataFp(stdout, header, header_len);
printf("HEADER END: \n");
#endif
while (header_len > 0) {
uint8_t *next_line = Bs2bmSearch(header, header_len, (uint8_t *)"\r\n", 2);
uint8_t *line = header;
uint32_t line_len;
if (next_line == NULL) {
line_len = header_len;
} else {
line_len = next_line - header;
}
uint8_t *sc = (uint8_t *)memchr(line, ':', line_len);
if (sc == NULL) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER);
/* if the : we found is the final char, it means we have
* no value */
} else if (line_len > 0 && sc == &line[line_len - 1]) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER);
} else {
#ifdef PRINT
printf("LINE START: \n");
PrintRawDataFp(stdout, line, line_len);
printf("LINE END: \n");
#endif
if (line_len >= C_D_HDR_LEN &&
SCMemcmpLowercase(C_D_HDR, line, C_D_HDR_LEN) == 0) {
uint8_t *value = line + C_D_HDR_LEN;
uint32_t value_len = line_len - C_D_HDR_LEN;
/* parse content-disposition */
(void)HTTPParseContentDispositionHeader((uint8_t *)"filename=", 9,
value, value_len, &fn, &fn_len);
} else if (line_len >= C_T_HDR_LEN &&
SCMemcmpLowercase(C_T_HDR, line, C_T_HDR_LEN) == 0) {
SCLogDebug("content-type line");
uint8_t *value = line + C_T_HDR_LEN;
uint32_t value_len = line_len - C_T_HDR_LEN;
(void)HTTPParseContentTypeHeader(NULL, 0,
value, value_len, &ft, &ft_len);
}
}
if (next_line == NULL) {
SCLogDebug("no next_line");
break;
}
header_len -= ((next_line + 2) - header);
header = next_line + 2;
} /* while (header_len > 0) */
if (fn_len > USHRT_MAX)
fn_len = USHRT_MAX;
if (ft_len > USHRT_MAX)
ft_len = USHRT_MAX;
*filename = fn;
*filename_len = (uint16_t)fn_len;
*filetype = ft;
*filetype_len = (uint16_t)ft_len;
}
/**
* \brief Create a single buffer from the HtpBodyChunks in our list
*
@ -1364,336 +1174,104 @@ static void FlagDetectStateNewFile(HtpTxUserData *tx, int dir)
}
}
/**
* \brief Setup boundary buffers
*/
static void HtpRequestBodySetupBoundary(HtpTxUserData *htud,
uint8_t *boundary, uint32_t boundary_len)
{
memset(boundary, '-', boundary_len);
memcpy(boundary + 2, htud->boundary, htud->boundary_len);
}
static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, void *tx,
const uint8_t *chunks_buffer, uint32_t chunks_buffer_len)
const uint8_t *chunks_buffer, uint32_t chunks_buffer_len, bool eof)
{
int result = 0;
uint8_t boundary[htud->boundary_len + 4]; /**< size limited to HTP_BOUNDARY_MAX + 4 */
uint16_t expected_boundary_len = htud->boundary_len + 2;
uint16_t expected_boundary_end_len = htud->boundary_len + 4;
int tx_progress = 0;
#ifdef PRINT
printf("CHUNK START: \n");
PrintRawDataFp(stdout, chunks_buffer, chunks_buffer_len);
printf("CHUNK END: \n");
#endif
HtpRequestBodySetupBoundary(htud, boundary, htud->boundary_len + 4);
/* search for the header start, header end and form end */
const uint8_t *header_start = Bs2bmSearch(chunks_buffer, chunks_buffer_len,
boundary, expected_boundary_len);
/* end of the multipart form */
const uint8_t *form_end = NULL;
/* end marker belonging to header_start */
const uint8_t *header_end = NULL;
if (header_start != NULL) {
header_end = Bs2bmSearch(header_start, chunks_buffer_len - (header_start - chunks_buffer),
(uint8_t *)"\r\n\r\n", 4);
form_end = Bs2bmSearch(header_start, chunks_buffer_len - (header_start - chunks_buffer),
boundary, expected_boundary_end_len);
}
SCLogDebug("header_start %p, header_end %p, form_end %p", header_start,
header_end, form_end);
/* we currently only handle multipart for ts. When we support it for tc,
* we will need to supply right direction */
tx_progress = AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, STREAM_TOSERVER);
/* if we're in the file storage process, deal with that now */
if (htud->tsflags & HTP_FILENAME_SET) {
if (header_start != NULL || (tx_progress > HTP_REQUEST_BODY)) {
SCLogDebug("reached the end of the file");
const uint8_t *filedata = chunks_buffer;
uint32_t filedata_len = 0;
uint8_t flags = 0;
if (header_start != NULL) {
if (header_start == filedata + 2) {
/* last chunk had all data, but not the boundary */
SCLogDebug("last chunk had all data, but not the boundary");
filedata_len = 0;
} else if (header_start > filedata + 2) {
SCLogDebug("some data from last file before the boundary");
/* some data from last file before the boundary */
filedata_len = header_start - filedata - 2;
}
}
/* body parsing done, we did not get our form end. Use all data
* we still have and signal to files API we have an issue. */
if (tx_progress > HTP_REQUEST_BODY) {
filedata_len = chunks_buffer_len;
flags = FILE_TRUNCATED;
}
if (filedata_len > chunks_buffer_len) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
goto end;
}
#ifdef PRINT
printf("FILEDATA (final chunk) START: \n");
PrintRawDataFp(stdout, filedata, filedata_len);
printf("FILEDATA (final chunk) END: \n");
#endif
if (!(htud->tsflags & HTP_DONTSTORE)) {
if (HTPFileClose(htud, filedata, filedata_len, flags, STREAM_TOSERVER) == -1) {
goto end;
}
}
htud->tsflags &=~ HTP_FILENAME_SET;
/* fall through */
} else {
SCLogDebug("not yet at the end of the file");
if (chunks_buffer_len > expected_boundary_end_len) {
const uint8_t *filedata = chunks_buffer;
uint32_t filedata_len = chunks_buffer_len - expected_boundary_len;
for (; filedata_len < chunks_buffer_len; filedata_len++) {
// take as much as we can until the beginning of a new line
if (chunks_buffer[filedata_len] == '\r') {
if (filedata_len + 1 == chunks_buffer_len ||
chunks_buffer[filedata_len + 1] == '\n') {
break;
}
}
}
#ifdef PRINT
printf("FILEDATA (part) START: \n");
PrintRawDataFp(stdout, filedata, filedata_len);
printf("FILEDATA (part) END: \n");
#endif
if (!(htud->tsflags & HTP_DONTSTORE)) {
result = HTPFileStoreChunk(htud, filedata, filedata_len, STREAM_TOSERVER);
if (result == -1) {
goto end;
} else if (result == -2) {
/* we know for sure we're not storing the file */
htud->tsflags |= HTP_DONTSTORE;
}
}
// libhtp will not call us back too late
// should libhtp send a callback eof for 0 chunked ?
DEBUG_VALIDATE_BUG_ON(AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx,
STREAM_TOSERVER) >= HTP_REQUEST_COMPLETE);
htud->request_body.body_parsed += filedata_len;
} else {
SCLogDebug("chunk too small to already process in part");
}
const uint8_t *cur_buf = chunks_buffer;
uint32_t cur_buf_len = chunks_buffer_len;
goto end;
if (eof) {
// abrupt end of connection
if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) {
/* we currently only handle multipart for ts. When we support it for tc,
* we will need to supply right direction */
HTPFileClose(htud, cur_buf, cur_buf_len, FILE_TRUNCATED, STREAM_TOSERVER);
}
htud->tsflags &= ~HTP_FILENAME_SET;
goto end;
}
while (header_start != NULL && header_end != NULL &&
header_end != form_end &&
header_start < (chunks_buffer + chunks_buffer_len) &&
header_end < (chunks_buffer + chunks_buffer_len) &&
header_start < header_end)
{
uint8_t *filename = NULL;
uint16_t filename_len = 0;
uint8_t *filetype = NULL;
uint16_t filetype_len = 0;
uint32_t header_len = header_end - header_start;
SCLogDebug("header_len %u", header_len);
uint8_t *header = (uint8_t *)header_start;
/* skip empty records */
if (expected_boundary_len == header_len) {
goto next;
} else if ((uint32_t)(expected_boundary_len + 2) <= header_len) {
header_len -= (expected_boundary_len + 2);
header = (uint8_t *)header_start + (expected_boundary_len + 2); // + for 0d 0a
uint32_t consumed;
uint32_t warnings;
int result = 0;
const uint8_t *filename = NULL;
uint16_t filename_len = 0;
// keep parsing mime and use callbacks when needed
while (cur_buf_len > 0) {
MimeParserResult r =
rs_mime_parse(htud->mime_state, cur_buf, cur_buf_len, &consumed, &warnings);
DEBUG_VALIDATE_BUG_ON(consumed > cur_buf_len);
htud->request_body.body_parsed += consumed;
if (warnings) {
if (warnings & MIME_EVENT_FLAG_INVALID_HEADER) {
HTPSetEvent(
hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER);
}
if (warnings & MIME_EVENT_FLAG_NO_FILEDATA) {
HTPSetEvent(
hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA);
}
}
HtpRequestBodyMultipartParseHeader(hstate, htud, header, header_len,
&filename, &filename_len, &filetype, &filetype_len);
if (filename != NULL) {
const uint8_t *filedata = NULL;
uint32_t filedata_len = 0;
SCLogDebug("we have a filename");
htud->tsflags |= HTP_FILENAME_SET;
htud->tsflags &= ~HTP_DONTSTORE;
SCLogDebug("header_end %p", header_end);
SCLogDebug("form_end %p", form_end);
/* everything until the final boundary is the file */
if (form_end != NULL) {
SCLogDebug("have form_end");
filedata = header_end + 4;
if (form_end == filedata) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA);
goto end;
} else if (form_end < filedata) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
goto end;
}
filedata_len = form_end - (header_end + 4 + 2);
SCLogDebug("filedata_len %"PRIuMAX, (uintmax_t)filedata_len);
/* or is it? */
uint8_t *header_next = Bs2bmSearch(filedata, filedata_len,
boundary, expected_boundary_len);
if (header_next != NULL) {
filedata_len -= (form_end - header_next);
}
if (filedata_len > chunks_buffer_len) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
goto end;
}
SCLogDebug("filedata_len %"PRIuMAX, (uintmax_t)filedata_len);
#ifdef PRINT
printf("FILEDATA START: \n");
PrintRawDataFp(stdout, filedata, filedata_len);
printf("FILEDATA END: \n");
#endif
result = HTPFileOpen(hstate, htud, filename, filename_len, filedata, filedata_len,
STREAM_TOSERVER);
if (result == -1) {
goto end;
} else if (result == -2) {
htud->tsflags |= HTP_DONTSTORE;
} else {
if (HTPFileClose(htud, NULL, 0, 0, STREAM_TOSERVER) == -1) {
goto end;
}
}
FlagDetectStateNewFile(htud, STREAM_TOSERVER);
htud->request_body.body_parsed += (header_end - chunks_buffer);
htud->tsflags &= ~HTP_FILENAME_SET;
} else {
SCLogDebug("chunk doesn't contain form end");
filedata = header_end + 4;
filedata_len = chunks_buffer_len - (filedata - chunks_buffer);
SCLogDebug("filedata_len %u (chunks_buffer_len %u)", filedata_len, chunks_buffer_len);
if (filedata_len > chunks_buffer_len) {
HTPSetEvent(hstate, htud, STREAM_TOSERVER,
HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
goto end;
}
#ifdef PRINT
printf("FILEDATA START: \n");
PrintRawDataFp(stdout, filedata, filedata_len);
printf("FILEDATA END: \n");
#endif
/* form doesn't end in this chunk, but the part might. Lets
* see if have another coming up */
uint8_t *header_next = Bs2bmSearch(filedata, filedata_len,
boundary, expected_boundary_len);
SCLogDebug("header_next %p", header_next);
if (header_next == NULL) {
SCLogDebug("more file data to come");
uint32_t offset = (header_end + 4) - chunks_buffer;
SCLogDebug("offset %u", offset);
htud->request_body.body_parsed += offset;
if (filedata_len >= (uint32_t)(expected_boundary_len + 2)) {
filedata_len -= (expected_boundary_len + 2 - 1);
// take as much as we can until start of boundary
for (size_t nb = 0; nb < (size_t)expected_boundary_len + 1; nb++) {
if (filedata[filedata_len] == '\r') {
if (nb == expected_boundary_len ||
filedata[filedata_len + 1] == '\n') {
break;
}
}
filedata_len++;
}
SCLogDebug("opening file with partial data");
} else {
filedata = NULL;
filedata_len = 0;
}
result = HTPFileOpen(hstate, htud, filename, filename_len, filedata,
filedata_len, STREAM_TOSERVER);
switch (r) {
case MimeNeedsMore:
// there is not enough data, wait for more next time
goto end;
case MimeFileOpen:
// get filename owned by mime state
rs_mime_state_get_filename(htud->mime_state, &filename, &filename_len);
if (filename_len > 0) {
htud->tsflags |= HTP_FILENAME_SET;
htud->tsflags &= ~HTP_DONTSTORE;
result = HTPFileOpen(
hstate, htud, filename, filename_len, NULL, 0, STREAM_TOSERVER);
if (result == -1) {
goto end;
} else if (result == -2) {
htud->tsflags |= HTP_DONTSTORE;
}
FlagDetectStateNewFile(htud, STREAM_TOSERVER);
htud->request_body.body_parsed += filedata_len;
SCLogDebug("htud->request_body.body_parsed %"PRIu64, htud->request_body.body_parsed);
} else if (header_next - filedata > 2) {
filedata_len = header_next - filedata - 2;
SCLogDebug("filedata_len %u", filedata_len);
result = HTPFileOpen(hstate, htud, filename, filename_len, filedata,
filedata_len, STREAM_TOSERVER);
}
break;
case MimeFileChunk:
if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) {
result = HTPFileStoreChunk(htud, cur_buf, consumed, STREAM_TOSERVER);
if (result == -1) {
goto end;
} else if (result == -2) {
/* we know for sure we're not storing the file */
htud->tsflags |= HTP_DONTSTORE;
} else {
if (HTPFileClose(htud, NULL, 0, 0, STREAM_TOSERVER) == -1) {
goto end;
}
}
break;
case MimeFileClose:
if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) {
uint32_t lastsize = consumed;
if (lastsize > 0 && cur_buf[lastsize - 1] == '\n') {
lastsize--;
if (lastsize > 0 && cur_buf[lastsize - 1] == '\r') {
lastsize--;
}
}
FlagDetectStateNewFile(htud, STREAM_TOSERVER);
htud->tsflags &= ~HTP_FILENAME_SET;
htud->request_body.body_parsed += (header_end - chunks_buffer);
HTPFileClose(htud, cur_buf, lastsize, 0, STREAM_TOSERVER);
}
}
}
next:
SCLogDebug("header_start %p, header_end %p, form_end %p",
header_start, header_end, form_end);
/* Search next boundary entry after the start of body */
uint32_t cursizeread = header_end - chunks_buffer;
header_start = Bs2bmSearch(header_end + 4,
chunks_buffer_len - (cursizeread + 4),
boundary, expected_boundary_len);
if (header_start != NULL) {
header_end = Bs2bmSearch(header_end + 4,
chunks_buffer_len - (cursizeread + 4),
(uint8_t *) "\r\n\r\n", 4);
}
}
/* if we're parsing the multipart and we're not currently processing a
* file, we move the body pointer forward. */
if (form_end == NULL && !(htud->tsflags & HTP_FILENAME_SET) && header_start == NULL) {
if (chunks_buffer_len > expected_boundary_end_len) {
uint32_t move = chunks_buffer_len - expected_boundary_end_len + 1;
htud->request_body.body_parsed += move;
SCLogDebug("form not ready, file not set, parsing non-file "
"record: moved %u", move);
htud->tsflags &= ~HTP_FILENAME_SET;
break;
// TODO event on parsing error ?
}
cur_buf += consumed;
cur_buf_len -= consumed;
}
end:
@ -1931,7 +1509,8 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
printf("REASSCHUNK END: \n");
#endif
HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len);
HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len,
(d->data == NULL && d->len == 0));
} else if (tx_ud->request_body_type == HTP_BODY_REQUEST_POST ||
tx_ud->request_body_type == HTP_BODY_REQUEST_PUT) {
@ -6030,7 +5609,11 @@ static int HTPBodyReassemblyTest01(void)
printf("REASSCHUNK END: \n");
#endif
HtpRequestBodyHandleMultipart(&hstate, &htud, &tx, chunks_buffer, chunks_buffer_len);
htud.mime_state = rs_mime_state_init((const uint8_t *)"multipart/form-data; boundary=toto",
strlen("multipart/form-data; boundary=toto"));
FAIL_IF_NULL(htud.mime_state);
htud.tsflags |= HTP_BOUNDARY_SET;
HtpRequestBodyHandleMultipart(&hstate, &htud, &tx, chunks_buffer, chunks_buffer_len, false);
if (htud.request_body.content_len_so_far != 669) {
printf("htud.request_body.content_len_so_far %"PRIu64": ", htud.request_body.content_len_so_far);

@ -58,9 +58,6 @@
#define HTP_CONFIG_DEFAULT_RANDOMIZE 1
#define HTP_CONFIG_DEFAULT_RANDOMIZE_RANGE 10
/** a boundary should be smaller in size */
#define HTP_BOUNDARY_MAX 200U
// 0x0001 not used
#define HTP_FLAG_STATE_CLOSED_TS 0x0002 /**< Flag to indicate that HTTP
connection is closed */
@ -212,8 +209,6 @@ typedef struct HtpTxUserData_ {
uint8_t request_has_trailers;
uint8_t response_has_trailers;
uint8_t boundary_len;
uint8_t tsflags;
uint8_t tcflags;
@ -229,10 +224,7 @@ typedef struct HtpTxUserData_ {
uint32_t request_headers_raw_len;
uint32_t response_headers_raw_len;
/** Holds the boundary identification string if any (used on
* multipart/form-data only)
*/
uint8_t *boundary;
MimeStateHTTP *mime_state;
HttpRangeContainerBlock *file_range; /**< used to assign track ids to range file */

Loading…
Cancel
Save