import of integrated htp lib and small libnet fixes

remotes/origin/master-1.0.x
William Metcalf 17 years ago committed by Victor Julien
parent 4768e42159
commit f7111f3847

@ -5,4 +5,8 @@ ACLOCAL_AMFLAGS = -I m4
EXTRA_DIST = ChangeLog COPYING LICENSE suricata.yaml \
doc/AUTHORS doc/GITGUIDE doc/INSTALL doc/NEWS \
doc/README doc/TODO
SUBDIRS = src
if BUILD_LIBHTP
HTP_DIR = htp
endif
SUBDIRS = $(HTP_DIR) src

@ -5,4 +5,6 @@ libtoolize --force --automake --copy
autoheader
automake --add-missing --copy
autoconf
cd htp/
autoreconf -i --force
cd ..

@ -412,6 +412,9 @@ AC_INIT(configure.in)
if test "x$with_libnet_includes" != "xno"; then
CPPFLAGS="${CPPFLAGS} -I${with_libnet_includes}"
libnet_dir="${with_libnet_includes}"
else
libnet_dir="/usr/include /usr/local/include /usr/local/include/libnet11 /opt/local/include"
fi
if test "x$with_libnet_libraries" != "xno"; then
@ -420,7 +423,6 @@ AC_INIT(configure.in)
LIBNET_INC_DIR=""
AC_MSG_CHECKING("for libnet.h version 1.1.x")
libnet_dir="/usr/include /usr/local/include /usr/local/include/libnet11 /opt/local/include"
for i in $libnet_dir; do
if test -r "$i/libnet.h"; then
LIBNET_INC_DIR="$i"
@ -429,7 +431,7 @@ AC_INIT(configure.in)
if test "$LIBNET_INC_DIR" != ""; then
if eval "grep LIBNET_VERSION $LIBNET_INC_DIR/libnet.h | grep -v 1.1 >/dev/null"; then
FAIL_MESSAGE("libnet 1.1.x (libnet.h)", $tmp)
FAIL_MESSAGE("libnet 1.1.x (libnet.h)", $libnet_dir)
fi
#CentOS, Fedora, Ubuntu-LTS, Ubuntu all set defines to the same values. libnet-config seems
@ -439,7 +441,7 @@ AC_INIT(configure.in)
if test "$LLIBNET" != "no"; then
CFLAGS="${CFLAGS} -D_BSD_SOURCE -D__BSD_SOURCE -D__FAVOR_BSD -DHAVE_NET_ETHERNET_H"
fi
AC_MSG_RESULT($i)
#AC_MSG_RESULT($i)
else
AC_MSG_RESULT(no)
AC_MSG_ERROR("libnet 1.1.x could not be found. please download and install the library from http://sourceforge.net/projects/libnet-dev/")
@ -554,6 +556,11 @@ AC_CHECK_HEADER(pcap.h,,[AC_ERROR(pcap.h not found ...)])
fi
#libhtp
AC_ARG_ENABLE(nonbundled-htp,
[ --enable-nonbundled-htp Enable the use of an already installed version of htp],
[enable_non_bundled_htp=yes],[enable_non_bundled_htp=no]
)
if test "$enable_non_bundled_htp" = "yes"; then
AC_ARG_WITH(libhtp_includes,
[ --with-libhtp-includes=DIR libhtp include directory],
[with_libhtp_includes="$withval"],[with_libhtp_includes=no])
@ -580,7 +587,21 @@ AC_CHECK_HEADER(pcap.h,,[AC_ERROR(pcap.h not found ...)])
exit 1
fi
#LDFLAGS="${LDFLAGS} -lhtp"
#we did not specify non-bundled-htp so use the built-in.
else
if test -d "htp"; then
echo "Going to try and build the bundled htp in htp/"
else
echo
echo " ERROR! htp/ dir not found in source"
echo
exit 1
fi
fi
#even if we are using an installed htp lib we still need to gen Makefiles inside of htp
AC_CONFIG_SUBDIRS([htp])
AM_CONDITIONAL([BUILD_LIBHTP], [test "$enable_non_bundled_htp" = "no"])
# enable CUDA output
AC_ARG_ENABLE(cuda,
@ -624,5 +645,5 @@ AC_SUBST(CFLAGS)
AC_SUBST(LDFLAGS)
AC_SUBST(CPPFLAGS)
AC_OUTPUT(Makefile src/Makefile)
AC_CONFIG_FILES([htp/Makefile htp/htp/Makefile Makefile src/Makefile])
AC_OUTPUT

@ -0,0 +1 @@
Ivan Ristic <ivanr@webkreator.com>

@ -0,0 +1,13 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/

@ -0,0 +1,237 @@
Installation Instructions
*************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
2006, 2007 Free Software Foundation, Inc.
This file is free documentation; the Free Software Foundation gives
unlimited permission to copy, distribute and modify it.
Basic Installation
==================
Briefly, the shell commands `./configure; make; make install' should
configure, build, and install this package. The following
more-detailed instructions are generic; see the `README' file for
instructions specific to this package.
The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a `Makefile' in each directory of the package.
It may also create one or more `.h' files containing system-dependent
definitions. Finally, it creates a shell script `config.status' that
you can run in the future to recreate the current configuration, and a
file `config.log' containing compiler output (useful mainly for
debugging `configure').
It can also use an optional file (typically called `config.cache'
and enabled with `--cache-file=config.cache' or simply `-C') that saves
the results of its tests to speed up reconfiguring. Caching is
disabled by default to prevent problems with accidental use of stale
cache files.
If you need to do unusual things to compile the package, please try
to figure out how `configure' could check whether to do them, and mail
diffs or instructions to the address given in the `README' so they can
be considered for the next release. If you are using the cache, and at
some point `config.cache' contains results you don't want to keep, you
may remove or edit it.
The file `configure.ac' (or `configure.in') is used to create
`configure' by a program called `autoconf'. You need `configure.ac' if
you want to change it or regenerate `configure' using a newer version
of `autoconf'.
The simplest way to compile this package is:
1. `cd' to the directory containing the package's source code and type
`./configure' to configure the package for your system.
Running `configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type `make' to compile the package.
3. Optionally, type `make check' to run any self-tests that come with
the package.
4. Type `make install' to install the programs and any data files and
documentation.
5. You can remove the program binaries and object files from the
source code directory by typing `make clean'. To also remove the
files that `configure' created (so you can compile the package for
a different kind of computer), type `make distclean'. There is
also a `make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
6. Often, you can also type `make uninstall' to remove the installed
files again.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that the
`configure' script does not know about. Run `./configure --help' for
details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
is an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU `make'. `cd' to the
directory where you want the object files and executables to go and run
the `configure' script. `configure' automatically checks for the
source code in the directory that `configure' is in and in `..'.
With a non-GNU `make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use `make distclean' before
reconfiguring for another architecture.
Installation Names
==================
By default, `make install' installs the package's commands under
`/usr/local/bin', include files under `/usr/local/include', etc. You
can specify an installation prefix other than `/usr/local' by giving
`configure' the option `--prefix=PREFIX'.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them.
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving `configure' the
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Optional Features
=================
Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
is something like `gnu-as' or `x' (for the X Window System). The
`README' should mention any `--enable-' and `--with-' options that the
package recognizes.
For packages that use the X Window System, `configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the `configure' options `--x-includes=DIR' and
`--x-libraries=DIR' to specify their locations.
Specifying the System Type
==========================
There may be some features `configure' cannot figure out automatically,
but needs to determine by the type of machine the package will run on.
Usually, assuming the package is built to be run on the _same_
architectures, `configure' can figure that out, but if it prints a
message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS KERNEL-OS
See the file `config.sub' for the possible values of each field. If
`config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option `--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with `--host=TYPE'.
Sharing Defaults
================
If you want to set default values for `configure' scripts to share, you
can create a site shell script called `config.site' that gives default
values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
A warning: not all `configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to `configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
an Autoconf bug. Until the bug is fixed you can use this workaround:
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
`configure' Invocation
======================
`configure' recognizes the following options to control how it operates.
`--help'
`-h'
Print a summary of the options to `configure', and exit.
`--version'
`-V'
Print the version of Autoconf used to generate the `configure'
script, and exit.
`--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally `config.cache'. FILE defaults to `/dev/null' to
disable caching.
`--config-cache'
`-C'
Alias for `--cache-file=config.cache'.
`--quiet'
`--silent'
`-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to `/dev/null' (any error
messages will still be shown).
`--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
`configure' can determine that directory automatically.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.

@ -0,0 +1,120 @@
LIBHTP LICENSING EXCEPTION
===============================
Version 1.0, 6 January 2010
As a special exception ("Exception") to the terms and conditions of version 2
of the GPL, Ivan Ristic hereby grants you the rights described below, provided
you agree to the terms and conditions in this Exception, including its
obligations and restrictions on use.
Exception Intent
================
We want specified Free/Libre and Open Source Software ("FLOSS") programs to be
able to use LibHTP (the "Program") despite the fact that not all FLOSS
licenses are compatible with version 2 of the GNU General Public License (the
"GPLv2").
Legal Terms and Conditions
==========================
You are free to distribute a Derivative Work that is formed entirely from the
Program and one or more works (each, a "FLOSS Work") licensed under one or
more of the licenses listed below in section 1, as long as all of the
following conditions are met:
1. You obey the GPLv2 in all respects for the Program and the Derivative
Work, except for identifiable sections of the Derivative Work which are
1. not derived from the Program, and
2. are not designed to interact with the Program, and
3. which can reasonably be considered independent and separate works in
themselves.
2. All such identifiable sections of the Derivative Work are
1. distributed subject to one of the FLOSS licenses listed below, and
2. the object code or executable form of those sections are accompanied
by the complete corresponding machine-readable source code for those
sections on the same medium and under the same FLOSS license as the
corresponding object code or executable forms of those sections.
3. Any works which are aggregated with the Program or with a Derivative Work
on a volume of a storage or distribution medium in accordance with the
GPLv2, can reasonably be considered independent and separate works in
themselves which are not derivatives of either the Program, a Derivative
Work or a FLOSS Work, and are not designed to interact with the Program.
If the above conditions are not met, then the Program may only be copied,
modified, distributed or used under the terms and conditions of the GPLv2
or another valid licensing option from Ivan Ristic.
FLOSS License List
==================
License name Version(s)/Copyright Date
-----------------------------------------------------------------------
Academic Free License 2.0
Apache Software License 1.0/1.1/2.0
Apple Public Source License 2.0
Artistic license From Perl 5.8.0
BSD license "July 22 1999"
Common Development and Distribution License (CDDL) 1.0
Common Public License 1.0
Eclipse Public License 1.0
GNU Library or "Lesser" General Public License (LGPL) 2.0/2.1/3.0
Jabber Open Source License 1.0
MIT License (As listed in file MIT-License.txt) -
Mozilla Public License (MPL) 1.0/1.1
Open Software License 2.0
OpenSSL license (with original SSLeay license) "2003" ("1998")
PHP License 3.0
Python license (CNRI Python License) -
Python Software Foundation License 2.1.1
Sleepycat License "1999"
University of Illinois/NCSA Open Source License -
W3C License "2001"
X11 License "2001"
Zlib/libpng License -
Zope Public License 2.0
Due to the many variants of some of the above licenses, we require that for
any version of the listed FLOSS licenses to qualify under this exception, it
must follow the 2003 version of the Free Software Foundation's Free Software
Definition (http://www.gnu.org/philosophy/free-sw.html) or version 1.9 of the
Open Source Definition by the Open Source Initiative
(http://www.opensource.org/docs/definition.php).
Definitions
===========
1. Terms used, but not defined, herein shall have the meaning provided in the
version 2 of the GPL.
2. Derivative Work means a derivative work under copyright law.
Applicability
=============
This Exception applies to all Programs that contain a notice placed by Ivan
Ristic saying that the Program may be distributed under the terms of
this Exception. If you create or distribute a work which is a Derivative Work
of both the Program and any other work licensed under the GPL, then this FLOSS
Exception is not available for that work; thus, you must remove the FLOSS
Exception notice from that work and comply with the GPL in all respects,
including by retaining all GPL notices.
You may choose to redistribute a copy of the Program exclusively under the
terms of the GPLv2 by removing the Exception notice from that copy of the
Program, provided that the copy has never been modified by you or any third
party.

@ -0,0 +1,281 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS

@ -0,0 +1,8 @@
ACLOCAL_AMFLAGS = -I m4
SUBDIRS= $(GENERIC_LIBRARY_NAME) test
DIST_SUBDIRS = $(GENERIC_LIBRARY_NAME)
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = htp.pc

@ -0,0 +1,103 @@
LibHTP (http://www.libhtp.org)
Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
======================================================
LibHTP is a security-aware parser for the HTTP protocol and the related bits
and pieces. The goals of the project, in the order of importance, are as
follows:
1. Completeness of coverage; LibHTP must be able to parse virtually all
traffic that is found in practice.
2. Permissive parsing; LibHTP must never fail to parse a stream that would
be parsed by some other web server.
3. Awareness of evasion techniques; LibHTP must be able to detect and
effectively deal with various evasion techniques, producing, where
practical, identical or practically identical results as the web
server processing the same traffic stream.
4. Performance; The performance must be adequate for the desired tasks.
Completeness and security are often detremental to performance. Our
idea of handling the conflicting requirements is to put the library
user in control, allowing him to choose the most desired library
characteristic.
| IMPORTANT LIBHTP IS NOT YET CONSIDERED STABLE. USE AT YOUR OWN RISK. DO NOT
| USE IN PRODUCTION. WORK IS CURRENTLY UNDER WAY TO ENSURE THAT
| LIBHTP IS SECURE AND THAT IT PERFORMS WELL.
| STATUS LIBHTP IS VERY YOUNG AT THIS POINT. IT WILL BE SOME TIME BEFORE
| IT CAN BE CONSIDER COMPLETE. AT THE MOMENT, THE FOCUS OF DEVELOPMENT
| IS ON ACHIEVING THE FIRST TWO GOALS.
LibHTP is an open source product, released under terms of the General Public Licence
version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
of the license.
In addition, there is a special exception that allows LibHTP to be freely
used with any OSI-approved open source licence. Please refer to the file
LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
INSTALLATION
------------
The installation process should be as simple as:
$ ./configure
$ make
$ sudo make install
| NOTE If you already have an early 0.2.x version installed, you must
| uninstall it before proceeding. Initially /usr was used for the
| installation, but /usr/local is used now. If you forgot to uninstall,
| clean all traces of LibHTP from /usr/lib/libhtp* and /usr/include/htp/*.
If you want to use a repository version of LibHTP, do the following:
1. Use svn export to retrieve the sources of the version you wish to use
2. Run ./update_version, which will update htp.c with the latest Subversion
revision used
3. You may wish to also update htp.pc.in and configure.ac with the correct version
4. Run autoconf -i --force, which will prepare the library for installation
5. Run doxygen to generate the API documentation
6. Continue to install as described above
DOCUMENTATION
-------------
The best documentation at this time is the code itself and the Doxygen output (which
should be all right). There's also a quick start guide in the doc/ folder, which
should give you enough information to get going.
NO WARRANTY
-----------
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.

@ -0,0 +1,98 @@
dnl ----------------------
dnl Initialization macros
dnl ----------------------
AC_INIT(htp/htp.h)
AM_CONFIG_HEADER(config.h)
dnl -----------------------------------------------
dnl Package name and version number (user defined)
dnl -----------------------------------------------
GENERIC_LIBRARY_NAME=htp
GENERIC_MAJOR_VERSION=0
GENERIC_MINOR_VERSION=3
GENERIC_MICRO_VERSION=X
# API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION)
GENERIC_API_VERSION=1.0
AC_SUBST(GENERIC_API_VERSION)
# Shared library versioning
GENERIC_LIBRARY_VERSION=1:2:0
# | | |
# +------+ | +---+
# | | |
# current:revision:age
# | | |
# | | +- increment if interfaces have been added
# | | set to zero if interfaces have been removed
# or changed
# | +- increment if source code has changed
# | set to zero if current is incremented
# +- increment if interfaces have been added, removed or changed
dnl --------------------------------
dnl Package name and version number
dnl --------------------------------
AC_SUBST(GENERIC_LIBRARY_VERSION)
PACKAGE=$GENERIC_LIBRARY_NAME
AC_SUBST(GENERIC_LIBRARY_NAME)
GENERIC_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION.$GENERIC_MICRO_VERSION
GENERIC_RELEASE=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
AC_SUBST(GENERIC_RELEASE)
AC_SUBST(GENERIC_VERSION)
VERSION=$GENERIC_VERSION
AM_INIT_AUTOMAKE($PACKAGE, $VERSION, no-define)
AC_CONFIG_MACRO_DIR([m4])
AC_ARG_ENABLE(htp-debug, [ --enable-htp-debug Enable debug output], [ enable_htp_debug=yes ])
if test "$enable_htp_debug" = "yes"; then
CFLAGS="${CFLAGS} -DHTP_DEBUG"
echo "Debug mode enabled"
fi
dnl -----------------------------------------------
dnl Checks for programs.
dnl -----------------------------------------------
AC_PROG_CC
AM_PROG_LIBTOOL
AM_SANITY_CHECK
dnl -----------------------------------------------
dnl Checks for libs.
dnl -----------------------------------------------
AC_CHECK_HEADER(zlib.h,,[AC_ERROR(zlib.h not found ...)])
ZLIB=""
AC_CHECK_LIB(z, inflate,, ZLIB="no")
if test "$ZLIB" = "no"; then
echo
echo " ERROR! zlib library not found"
echo
exit 1
fi
dnl -----------------------------------------------
dnl Generates Makefile's, configuration files and scripts
dnl -----------------------------------------------
AC_PREFIX_DEFAULT(/usr/local)
AC_OUTPUT(Makefile \
htp.pc \
htp/Makefile \
test/Makefile
)

@ -0,0 +1,102 @@
QUICK START
-----------
LibHTP is envisioned to be many things, but the only scenario in which it has been tested
so far is that when you need to parse a duplex HTTP stream which you have obtained by
passively intercepting a communication channel. The assumption is that you have raw TCP data
(after SSL, if SSL is used).
Every parsing operation needs to follow these steps:
1. Configure-time:
1.1. Create one or more parser configuration structures.
1.2. Tweak the configuration of each parser to match the behaviour of
the server you're intercepting the communication of (htp_config_set_* functions).
1.3. Register the parser callbacks you'll need. You will need to use parser callbacks
if you want to monitor parsing events as they occur, and gain access to partial
transaction information. If you are processing data in batch (off-line) you may
simply parse entire streams at a time and only analyze complete transaction data
after the fact.
If you need to gain access to request and response bodies, your only option at
this time is to use the callbacks, because the parser will not preserve that
information.
For callback registration, look up the htp_config_register_* functions.
If your program operates in real-time then it may be desirable to dispose of
the used resources after each transaction is parsed. To do that, you are allowed
to call htp_tx_destroy() at the end of the RESPONSE callback.
2. Run-time:
2.1. Create a parser instance for every TCP stream you want to process.
2.2. Feed the parser inbound and outbound data.
The parser will typically always consume complete data chunks and return
STREAM_STATE_DATA, which means that you can continue to feed it more data
when you have it. If you have a queue of data chunks, always send the
parsed all the request chunks you have. That will ensure that the parser
never encounters a response for which it had not seen a request.
If you get STREAM_STATE_ERROR, the parser has encountered a fatal error and
is unable to continue to parse the stream. An error should never happen for
a valid HTTP stream. If you encounter such an error please send me the pcap
file for analysis.
There is one situation when the parser will not be able to consume a complete
request data chunk, in which case it will return STREAM_STATE_DATA_OTHER. You
will then need to do the following:
2.2.1. Remember how many bytes of data were consumed (using
htp_connp_req_data_consumed()).
2.2.2. Suspend request parsing until you get some response data.
2.2.3. Feed some response data to the parser.
Note that it is now possible to receive STREAM_STATE_DATA_OTHER
from the response parser. If that happens, you will need to
remember how many bytes were consumed using
htp_connp_res_data_consumed().
2.2.4. After each chunk of response data fed to the parser, attempt
to resume request stream parsing.
2.2.5. If you again receive STREAM_STATE_DATA_OTHER go back to 2.2.3.
2.2.6. At this point you should feed the parser all the request data
you have accumulated before giving it any response data. This is
necessary to prevent the case of the parser seeing more responses
than requests (which would inevitably result with an error).
2.2.7. Send unprocessed response data from 2.2.3 (if any).
2.2.8. Continue sending request/response data as normal.
The above situation should occur very rarely.
2.3. Analyze transaction data in callbacks (if any).
2.4. Analyze transaction data after an entire TCP stream has been processed.
2.4. Destroy parser instance to free up the allocated resources.
USER DATA
---------
If you're using the callbacks and you need to keep state between invocations, you have two
options:
1. Associate one opaque structure with a parser instance, using htp_connp_set_user_data().
2. Associate one opaque structure with a transaction instance, using htp_tx_set_user_data().
The best place to do this is in a TRANSACTION_START callback. Don't forget to free up
any resources you allocate on per-transaction basis, before you delete each transaction.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,11 @@
prefix=/usr
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${prefix}/include
Name: HTP
Description: HTTP parser
Version: 0.3.X
Libs: -L${libdir} -lhtp
Cflags: -I${includedir}/htp -I${libdir}/htp/include

@ -0,0 +1,14 @@
h_sources = bstr.h bstr_builder.h dslib.h hooks.h htp.h utf8_decoder.h htp_decompressors.h htp_urlencoded.h htp_multipart.h
c_sources = bstr.c bstr_builder.c hooks.c htp_config.c htp_connection_parser.c htp_request_apache_2_2.c htp_request_generic.c htp_request_parsers.c htp_response_generic.c htp_util.c dslib.c htp.c htp_connection.c htp_parsers.c htp_request.c htp_response.c htp_transaction.c utf8_decoder.c htp_decompressors.c htp_urlencoded.c htp_multipart.c
library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)
library_include_HEADERS = $(h_sources)
INCLUDES = -I$(top_srcdir)
AM_CFLAGS = -D_GNU_SOURCE -g -O2 -Wall -Wextra -std=gnu99 -pedantic
lib_LTLIBRARIES= libhtp.la
libhtp_la_SOURCES= $(h_sources) $(c_sources)
libhtp_la_LDFLAGS= -version-info $(GENERIC_LIBRARY_VERSION) -release $(GENERIC_RELEASE)

@ -0,0 +1,88 @@
TODO
====
- A failure to parse a part of a transaction does not necessarily mean we have to
give up (on the transaction and on the connection).
- Inspect storage size choices for all structures
- Table key storage and element retrieval is inefficient.
- We're currently using a fixed-size line buffer. Although one buffer isn't very big,
they add up if we want to supports tens of thousands of concurrent connections. For
example 20K connections x 20K buffer = 400M. We can perhaps start with a 2K buffer
(configurable) and grow as needed.
- Implement htp_get_version().
- Find all places where we use external information to determine input length. Ensure
the storage types are big enough to hold the biggest numbers we want to handle, and
ensure that we are able to detect wrapping and such.
MISC NOTES
==========
- Memory allocation strategies. We want to support two strategies:
#1 Supply a pair of functions (alloc and free) along with a void * pointer.
#2 Use memory pools for all allocations. Desired functions:
- create pool (w/hierarchy), destroy pool, clear pool
- alloc (calloc?), free
- register callback
The plan is to have a simple memory pool implementation that does not pool memory
but only tracks what is allocated so that it can free it all in one go. The library
users can provide an external implementation to use if they so wish.
- Consider enums where appropriate.
- The plan for SSL handling is as follows:
- For fully encrypted streams, upstream is free to decrypt SSL and feed the
parser just the data.
- On-demand SSL is not used with HTTP in practice but, in principle, the idea
is to have the parser return the HTP_TLS_UPGRADE code. Upon detecting the
code, upstream would handle the upgrade (either by passively decrypting the
traffic stream or handling SSL/TLS directly) and provide plain text data
to the HTTP parser on every subsequent invocation.
- Document the source for each request method
- At some point test the performance of the macros that fetch data and
determine if it makes more sense to implement the same functionality
as functions
- There will be two types of hook: connection and transaction hooks. If we want to allow
a hook to disconnect itself (as we should) then we need to make sure the disconnect is
applied to the correct scope. For example, a transaction hook that requires disconnection
should not be invoked for the same transaction, but should be invoked for the subsequent
transaction. This tells me that we need to keep a prototype of transaction hooks and to
make a copy of it whenever a new transaction begins.
- Does the API need to support closing one stream at a time? For example, when
a client sends his request(s), closes his side of a connection, then waits for
the server to respond.
- Detect if the request headers were submitted across several packets (which would
indicate manual access).
- Do we want to have separate limits for headers? Or should headers also use the line limits?
- Perhaps we also want to limit the size of the request line and headers combined, like the
IIS does?
- Chunk length evasion
- Chunk length limit
- Add callbacks to the list and table structures to automatically delete the elements
they contain when their respective destroy methods are invoked
- Perhaps best-fit maps should also have the replacement character?
- Test double-decoding with IIS4 or IIS5

@ -0,0 +1,614 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "bstr.h"
#include <ctype.h>
/**
* Allocate a zero-length bstring, but reserving space for at least len bytes.
*
* @param len
* @return New string
*/
bstr *bstr_alloc(size_t len) {
unsigned char *s = malloc(sizeof (bstr_t) + len);
if (s == NULL) return NULL;
bstr_t *b = (bstr_t *) s;
b->len = 0;
b->size = len;
b->ptr = NULL;
return (bstr *) s;
}
/**
* Deallocate a bstring. Allows a NULL bstring on input.
*
* @param b
*/
void bstr_free(bstr *b) {
if (b == NULL) return;
free(b);
}
/**
* Append source bstring to destination bstring, growing
* destination if necessary.
*
* @param destination
* @param source
* @return destination, at a potentially different memory location
*/
bstr *bstr_add_str(bstr *destination, bstr *source) {
return bstr_add_mem(destination, bstr_ptr(source), bstr_len(source));
}
/**
* Append a NUL-terminated source to destination, growing
* destination if necessary.
*
* @param destination
* @param source
* @return destination, at a potentially different memory location
*/
bstr *bstr_add_cstr(bstr *destination, char *source) {
return bstr_add_mem(destination, source, strlen(source));
}
/**
* Append a memory region to destination, growing destination
* if necessary.
*
* @param destination
* @param data
* @param len
* @return destination, at a potentially different memory location
*/
bstr *bstr_add_mem(bstr *destination, char *data, size_t len) {
if (bstr_size(destination) < bstr_len(destination) + len) {
destination = bstr_expand(destination, bstr_len(destination) + len);
if (destination == NULL) return NULL;
}
bstr_t *b = (bstr_t *) destination;
memcpy(bstr_ptr(destination) + b->len, data, len);
b->len = b->len + len;
return destination;
}
/**
* Append source bstring to destination bstring, growing
* destination if necessary.
*
* @param destination
* @param source
* @return destination, at a potentially different memory location
*/
bstr *bstr_add_str_noex(bstr *destination, bstr *source) {
return bstr_add_mem_noex(destination, bstr_ptr(source), bstr_len(source));
}
/**
* Append a NUL-terminated source to destination, growing
* destination if necessary.
*
* @param destination
* @param source
* @return destination, at a potentially different memory location
*/
bstr *bstr_add_cstr_noex(bstr *destination, char *source) {
return bstr_add_mem_noex(destination, source, strlen(source));
}
/**
* Append a memory region to destination, growing destination
* if necessary.
*
* @param destination
* @param data
* @param len
* @return destination, at a potentially different memory location
*/
bstr *bstr_add_mem_noex(bstr *destination, char *data, size_t len) {
size_t copylen = len;
if (bstr_size(destination) < bstr_len(destination) + copylen) {
copylen = bstr_size(destination) - bstr_len(destination);
if (copylen <= 0) return destination;
}
bstr_t *b = (bstr_t *) destination;
memcpy(bstr_ptr(destination) + b->len, data, copylen);
b->len = b->len + copylen;
return destination;
}
/**
* Expand a string to support at least newsize bytes. The input bstring
* is not changed if it is big enough to accommodate the desired size. If
* the input bstring is smaller, however, it is expanded. The pointer to
* the bstring may change. If the expansion fails, the original bstring
* is left untouched (it is not freed).
*
* @param s
* @param newsize
* @return new bstring, or NULL if memory allocation failed
*/
bstr *bstr_expand(bstr *s, size_t newsize) {
if (((bstr_t *) s)->ptr != NULL) {
void * newblock = realloc(((bstr_t *) s)->ptr, newsize);
if (newblock == NULL) {
return NULL;
} else {
((bstr_t *) s)->ptr = newblock;
}
} else {
void *newblock = realloc(s, sizeof (bstr_t) + newsize);
if (newblock == NULL) {
return NULL;
} else {
s = newblock;
}
}
((bstr_t *) s)->size = newsize;
return s;
}
/**
* Create a new bstring by copying the provided NUL-terminated string.
*
* @param data
* @return new bstring
*/
bstr *bstr_cstrdup(char *data) {
return bstr_memdup(data, strlen(data));
}
/**
* Create a new bstring by copying the provided memory region.
*
* @param data
* @param len
* @return new bstring
*/
bstr *bstr_memdup(char *data, size_t len) {
bstr *b = bstr_alloc(len);
if (b == NULL) return NULL;
memcpy(bstr_ptr(b), data, len);
((bstr_t *) b)->len = len;
return b;
}
/**
* Create a new bstring by copying the provided bstring.
*
* @param b
* @return new bstring
*/
bstr *bstr_strdup(bstr *b) {
return bstr_strdup_ex(b, 0, bstr_len(b));
}
/**
* Create a new bstring by copying a part of the provided
* bstring.
*
* @param b
* @param offset
* @param len
* @return new bstring
*/
bstr *bstr_strdup_ex(bstr *b, size_t offset, size_t len) {
bstr *bnew = bstr_alloc(len);
if (bnew == NULL) return NULL;
memcpy(bstr_ptr(bnew), bstr_ptr(b) + offset, len);
((bstr_t *) bnew)->len = len;
return bnew;
}
/**
* Take the provided memory region and construct a NUL-terminated
* string, replacing NUL bytes with "\0".
*
* @param data
* @param len
* @return new NUL-terminated string
*/
char *bstr_memtocstr(char *data, size_t len) {
// Count how many NUL bytes we have in the string.
size_t i, nulls = 0;
for (i = 0; i < len; i++) {
if (data[i] == '\0') {
nulls++;
}
}
// Now copy the string into a NUL-terminated buffer.
char *r, *t;
r = t = malloc(len + nulls + 1);
if (t == NULL) return NULL;
while (len--) {
// Escape NUL bytes, but just copy everything else.
if (*data == '\0') {
data++;
*t++ = '\\';
*t++ = '0';
} else {
*t++ = *data++;
}
}
// Terminate string.
*t = '\0';
return r;
}
/**
* Create a new NUL-terminated string out of the provided bstring.
*
* @param b
* @return new NUL-terminated string
*/
char *bstr_tocstr(bstr *b) {
if (b == NULL) return NULL;
return bstr_memtocstr(bstr_ptr(b), bstr_len(b));
}
/**
* Return the first position of the provided character (byte).
*
* @param b
* @param c
* @return the first position of the character, or -1 if it could not be found
*/
int bstr_chr(bstr *b, int c) {
char *data = bstr_ptr(b);
size_t len = bstr_len(b);
size_t i = 0;
while (i < len) {
if (data[i] == c) {
return i;
}
i++;
}
return -1;
}
/**
* Return the last position of a character (byte).
*
* @param b
* @param c
* @return the last position of the character, or -1 if it could not be found
*/
int bstr_rchr(bstr *b, int c) {
char *data = bstr_ptr(b);
size_t len = bstr_len(b);
int i = len;
while (i >= 0) {
if (data[i] == c) {
return i;
}
i--;
}
return -1;
}
/**
* Compare two memory regions.
*
* @param s1
* @param l1
* @param s2
* @param l2
* @return 0 if the memory regions are identical, -1 or +1 if they're not
*/
int bstr_cmp_ex(char *s1, size_t l1, char *s2, size_t l2) {
size_t p1 = 0, p2 = 0;
while ((p1 < l1) && (p2 < l2)) {
if (s1[p1] != s2[p2]) {
// Difference
return (s1[p1] < s2[p2]) ? -1 : 1;
}
p1++;
p2++;
}
if ((p1 == l2) && (p2 == l1)) {
// They're identical
return 0;
} else {
// One string is shorter
if (p1 == l1) return -1;
else return 1;
}
}
/**
* Compare a bstring with a NUL-terminated string.
*
* @param b
* @param c
* @return 0, -1 or +1
*/
int bstr_cmpc(bstr *b, char *c) {
return bstr_cmp_ex(bstr_ptr(b), bstr_len(b), c, strlen(c));
}
/**
* Compare two bstrings.
*
* @param b1
* @param b2
* @return 0, -1 or +1
*/
int bstr_cmp(bstr *b1, bstr *b2) {
return bstr_cmp_ex(bstr_ptr(b1), bstr_len(b1), bstr_ptr(b2), bstr_len(b2));
}
/**
* Convert bstring to lowercase.
*
* @param b
* @return b
*/
bstr *bstr_tolowercase(bstr *b) {
if (b == NULL) return NULL;
unsigned char *data = (unsigned char *)bstr_ptr(b);
size_t len = bstr_len(b);
size_t i = 0;
while (i < len) {
data[i] = tolower(data[i]);
i++;
}
return b;
}
/**
* Create a copy of the provided bstring, then convert it to lowercase.
*
* @param b
* @return bstring copy
*/
bstr *bstr_dup_lower(bstr *b) {
return bstr_tolowercase(bstr_strdup(b));
}
/**
*
*/
int bstr_util_memtoip(char *data, size_t len, int base, size_t *lastlen) {
int rval = 0, tval = 0, tflag = 0;
size_t i = *lastlen = 0;
for (i = 0; i < len; i++) {
int d = data[i];
*lastlen = i;
// Convert character to digit.
if ((d >= '0') && (d <= '9')) {
d -= '0';
} else if ((d >= 'a') && (d <= 'z')) {
d -= 'a' - 10;
} else if ((d >= 'A') && (d <= 'Z')) {
d -= 'A' - 10;
} else {
d = -1;
}
// Check that the digit makes sense with the base
// we are using.
if ((d == -1) || (d >= base)) {
if (tflag) {
// Return what we have so far; lastlen points
// to the first non-digit position.
return rval;
} else {
// We didn't see a single digit.
return -1;
}
}
if (tflag) {
rval *= base;
if (tval > rval) {
// Overflow
return -2;
}
rval += d;
if (tval > rval) {
// Overflow
return -2;
}
tval = rval;
} else {
tval = rval = d;
tflag = 1;
}
}
*lastlen = i + 1;
return rval;
}
/**
* Find needle in a haystack.
*
* @param haystack
* @param needle
* @return
*/
int bstr_indexof(bstr *haystack, bstr *needle) {
return bstr_indexofmem(haystack, bstr_ptr(needle), bstr_len(needle));
}
/**
* Find index in the haystack, with the needle being a NUL-terminated string.
*
* @param haystack
* @param needle
* @return
*/
int bstr_indexofc(bstr *haystack, char *needle) {
return bstr_indexofmem(haystack, needle, strlen(needle));
}
/**
* Find index in the haystack. Ignore case differences.
*
* @param haystack
* @param needle
* @return
*/
int bstr_indexof_nocase(bstr *haystack, bstr *needle) {
return bstr_indexofmem_nocase(haystack, bstr_ptr(needle), bstr_len(needle));
}
/**
* Find index in the haystack, with the needle being a NUL-terminated string.
* Ignore case differences.
*
* @param haystack
* @param needle
* @return
*/
int bstr_indexofc_nocase(bstr *haystack, char *needle) {
return bstr_indexofmem_nocase(haystack, needle, strlen(needle));
}
/**
* Find index in the haystack, with the needle being a memory region.
*
* @param haystack
* @param data2
* @param len2
* @return
*/
int bstr_indexofmem(bstr *haystack, char *data2, size_t len2) {
unsigned char *data = (unsigned char *)bstr_ptr(haystack);
size_t len = bstr_len(haystack);
size_t i, j;
// TODO Is an optimisation here justified?
// http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
for (i = 0; i < len; i++) {
size_t k = i;
for (j = 0; ((j < len2) && (k < len)); j++) {
if (data[k++] != data2[j]) break;
}
if ((k - i) == len2) {
return i;
}
}
return -1;
}
/**
* Find index in the haystack, with the needle being a memory region.
* Ignore case differences.
*
* @param haystack
* @param data2
* @param len2
* @return
*/
int bstr_indexofmem_nocase(bstr *haystack, char *data2, size_t len2) {
unsigned char *data = (unsigned char *)bstr_ptr(haystack);
size_t len = bstr_len(haystack);
size_t i, j;
// TODO No need to inspect the last len2 - 1 bytes
for (i = 0; i < len; i++) {
size_t k = i;
for (j = 0; ((j < len2) && (k < len)); j++) {
if (toupper(data[k++]) != toupper((unsigned char)data2[j])) break;
}
if ((k - i) == len2) {
return i;
}
}
return -1;
}
/**
* Remove one byte from the end of the string.
*
* @param s
*/
void bstr_chop(bstr *s) {
bstr_t *b = (bstr_t *) s;
if (b->len > 0) {
b->len--;
}
}
/**
* Adjust bstring length. You will need to use this method whenever
* you work directly with the string contents, and you end up changing
* its length.
*
* @param s
* @param newlen
*/
void bstr_len_adjust(bstr *s, size_t newlen) {
bstr_t *b = (bstr_t *) s;
b->len = newlen;
}
/**
* Return the character (byte) at the given position.
*
* @param s
* @param pos
* @return the character, or -1 if the bstring is too short
*/
char bstr_char_at(bstr *s, size_t pos) {
unsigned char *data = (unsigned char *)bstr_ptr(s);
size_t len = bstr_len(s);
if (pos > len) return -1;
return data[pos];
}

@ -0,0 +1,105 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _BSTR_H
#define _BSTR_H
typedef struct bstr_t bstr_t;
typedef void * bstr;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "bstr_builder.h"
// IMPORTANT This binary string library is used internally by the parser and you should
// not rely on it in your code. The implementation may change.
//
// TODO
// - Add a function that wraps an existing data
// - Support Unicode bstrings
struct bstr_t {
/** The length of the string stored in the buffer. */
size_t len;
/** The current size of the buffer. If the buffer is bigger than the
* string then it will be able to expand without having to reallocate.
*/
size_t size;
/** Optional buffer pointer. If this pointer is NUL (as it currently is
* in virtually all cases, the string buffer will immediatelly follow
* this structure. If the pointer is not NUL, it points to the actual
* buffer used, and there's no data following this structure.
*/
char *ptr;
};
// Defines
#define bstr_len(X) ((*(bstr_t *)(X)).len)
#define bstr_size(X) ((*(bstr_t *)(X)).size)
#define bstr_ptr(X) ( ((*(bstr_t *)(X)).ptr == NULL) ? (char *)((char *)(X) + sizeof(bstr_t)) : (char *)(*(bstr_t *)(X)).ptr )
// Functions
bstr *bstr_alloc(size_t newsize);
void bstr_free(bstr *s);
bstr *bstr_expand(bstr *s, size_t newsize);
bstr *bstr_cstrdup(char *);
bstr *bstr_memdup(char *data, size_t len);
bstr *bstr_strdup(bstr *b);
bstr *bstr_strdup_ex(bstr *b, size_t offset, size_t len);
char *bstr_tocstr(bstr *);
int bstr_chr(bstr *, int);
int bstr_rchr(bstr *, int);
int bstr_cmpc(bstr *, char *);
int bstr_cmp(bstr *, bstr *);
bstr *bstr_dup_lower(bstr *);
bstr *bstr_tolowercase(bstr *);
bstr *bstr_add_mem(bstr *, char *, size_t);
bstr *bstr_add_str(bstr *, bstr *);
bstr *bstr_add_cstr(bstr *, char *);
bstr *bstr_add_mem_noex(bstr *, char *, size_t);
bstr *bstr_add_str_noex(bstr *, bstr *);
bstr *bstr_add_cstr_noex(bstr *, char *);
int bstr_util_memtoip(char *data, size_t len, int base, size_t *lastlen);
char *bstr_memtocstr(char *data, size_t len);
int bstr_indexof(bstr *haystack, bstr *needle);
int bstr_indexofc(bstr *haystack, char *needle);
int bstr_indexof_nocase(bstr *haystack, bstr *needle);
int bstr_indexofc_nocase(bstr *haystack, char *needle);
int bstr_indexofmem(bstr *haystack, char *data, size_t len);
int bstr_indexofmem_nocase(bstr *haystack, char *data, size_t len);
void bstr_chop(bstr *b);
void bstr_len_adjust(bstr *s, size_t newlen);
char bstr_char_at(bstr *s, size_t pos);
#endif /* _BSTR_H */

@ -0,0 +1,161 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "bstr.h"
#include "bstr_builder.h"
#include "dslib.h"
/**
* Returns the size (the number of pieces) currently in a string builder.
*
* @param bb
* @return size
*/
size_t bstr_builder_size(bstr_builder_t *bb) {
return list_size(bb->pieces);
}
/**
* Clears this string builder, destroying all existing pieces. You may
* want to clear a builder once you've either read all the pieces and
* done something with them, or after you've converted the builder into
* a single string.
*
* @param bb
*/
void bstr_builder_clear(bstr_builder_t *bb) {
// Do nothing if the list is empty
if (list_size(bb->pieces) == 0) return;
// Destroy any pieces we might have
bstr *b = NULL;
list_iterator_reset(bb->pieces);
while ((b = list_iterator_next(bb->pieces)) != NULL) {
bstr_free(b);
}
list_destroy(bb->pieces);
bb->pieces = list_array_create(BSTR_BUILDER_DEFAULT_SIZE);
// TODO What should we do on allocation failure?
}
/**
* Creates a new string builder.
*
* @return New string builder
*/
bstr_builder_t * bstr_builder_create() {
bstr_builder_t *bb = calloc(1, sizeof(bstr_builder_t));
if (bb == NULL) return NULL;
bb->pieces = list_array_create(BSTR_BUILDER_DEFAULT_SIZE);
if (bb->pieces == NULL) {
free(bb);
return NULL;
}
return bb;
}
/**
* Destroys an existing string builder, also destroying all
* the pieces stored within.
*
* @param bb
*/
void bstr_builder_destroy(bstr_builder_t *bb) {
if (bb == NULL) return;
// Destroy any pieces we might have
bstr *b = NULL;
list_iterator_reset(bb->pieces);
while ((b = list_iterator_next(bb->pieces)) != NULL) {
bstr_free(b);
}
list_destroy(bb->pieces);
free(bb);
}
/**
* Adds one new string to the builder.
*
* @param bb
* @param b
* @return Success indication
*/
int bstr_builder_append(bstr_builder_t *bb, bstr *b) {
return list_push(bb->pieces, b);
}
/**
* Adds one new piece, defined with the supplied pointer and
* length, to the builder.
*
* @param bb
* @param data
* @param len
* @return Success indication
*/
int bstr_builder_append_mem(bstr_builder_t *bb, char *data, size_t len) {
bstr *b = bstr_memdup(data, len);
if (b == NULL) return -1; // TODO Is the return code correct?
return list_push(bb->pieces, b);
}
/**
* Adds one new piece, in the form of a NUL-terminated string, to
* the builder.
*
* @param bb
* @param cstr
* @return Success indication
*/
int bstr_builder_append_cstr(bstr_builder_t *bb, char *cstr) {
bstr *b = bstr_cstrdup(cstr);
if (b == NULL) return -1; // TODO Is the return code correct?
return list_push(bb->pieces, b);
}
/**
* Creates a single string out of all the pieces held in a
* string builder. This method will not destroy any of the pieces.
*
* @param bb
* @return New string
*/
bstr * bstr_builder_to_str(bstr_builder_t *bb) {
bstr *b = NULL;
size_t len = 0;
// Determine the size of the string
list_iterator_reset(bb->pieces);
while ((b = list_iterator_next(bb->pieces)) != NULL) {
len += bstr_len(b);
}
// Allocate string
bstr *bnew = bstr_alloc(len);
if (bnew == NULL) return NULL;
// Determine the size of the string
list_iterator_reset(bb->pieces);
while ((b = list_iterator_next(bb->pieces)) != NULL) {
bstr_add_str_noex(bnew, b);
}
return bnew;
}

@ -0,0 +1,40 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _BSTR_BUILDER_H
#define _BSTR_BUILDER_H
typedef struct bstr_builder_t bstr_builder_t;
#include "dslib.h"
struct bstr_builder_t {
list_t *pieces;
};
#define BSTR_BUILDER_DEFAULT_SIZE 16
bstr_builder_t * bstr_builder_create();
void bstr_builder_destroy(bstr_builder_t *bb);
size_t bstr_builder_size(bstr_builder_t *bb);
void bstr_builder_clear(bstr_builder_t *bb);
int bstr_builder_append(bstr_builder_t *bb, bstr *b);
int bstr_builder_append_mem(bstr_builder_t *bb, char *data, size_t len);
int bstr_builder_append_cstr(bstr_builder_t *bb, char *str);
bstr * bstr_builder_to_str(bstr_builder_t *bb);
#endif /* _BSTR_BUILDER_H */

@ -0,0 +1,554 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include <stdlib.h>
#include <stdio.h>
#include "dslib.h"
// -- Queue List --
/**
* Add element to list.
*
* @param list
* @param element
* @return 1 on success, -1 on error (memory allocation failure)
*/
static int list_linked_push(list_t *_q, void *element) {
list_linked_t *q = (list_linked_t *) _q;
list_linked_element_t *qe = calloc(1, sizeof (list_linked_element_t));
if (qe == NULL) return -1;
// Rememeber the element
qe->data = element;
// If the queue is empty, make this element first
if (!q->first) {
q->first = qe;
}
if (q->last) {
q->last->next = qe;
}
q->last = qe;
return 1;
}
/**
* Remove one element from the beginning of the list.
*
* @param list
* @return a pointer to the removed element, or NULL if the list is empty.
*/
static void *list_linked_pop(list_t *_q) {
list_linked_t *q = (list_linked_t *) _q;
void *r = NULL;
if (!q->first) {
return NULL;
}
list_linked_element_t *qe = q->first;
q->first = qe->next;
r = qe->data;
if (!q->first) {
q->last = NULL;
}
free(qe);
return r;
}
/**
* Is the list empty?
*
* @param list
* @return 1 if the list is empty, 0 if it is not
*/
static int list_linked_empty(list_t *_q) {
list_linked_t *q = (list_linked_t *) _q;
if (!q->first) {
return 1;
} else {
return 0;
}
}
/**
* Destroy list. This function will not destroy any of the
* data stored in it. You'll have to do that manually beforehand.
*
* @param l
*/
void list_linked_destroy(list_linked_t *l) {
// Free the list structures
list_linked_element_t *temp = l->first;
list_linked_element_t *prev = NULL;
while (temp != NULL) {
free(temp->data);
prev = temp;
temp = temp->next;
free(prev);
}
// Free the list itself
free(l);
}
/**
* Create a new linked list.
*
* @return a pointer to the newly creted list (list_t), or NULL on memory allocation failure
*/
list_t *list_linked_create(void) {
list_linked_t *q = calloc(1, sizeof (list_linked_t));
if (q == NULL) return NULL;
q->push = list_linked_push;
q->pop = list_linked_pop;
q->empty = list_linked_empty;
q->destroy = (void (*)(list_t *))list_linked_destroy;
return (list_t *) q;
}
// -- Queue Array --
/**
* Add new element to the end of the list, expanding the list
* as necessary.
*
* @param list
* @param element
*
* @return 1 on success or -1 on failure (memory allocation)
*/
static int list_array_push(list_t *_q, void *element) {
list_array_t *q = (list_array_t *) _q;
// Check if we're full
if (q->current_size >= q->max_size) {
int new_size = q->max_size * 2;
void *newblock = NULL;
if (q->first == 0) {
// The simple case of expansion is when the first
// element in the list resides in the first slot. In
// that case we just add some new space to the end,
// adjust the max_size and that's that.
newblock = realloc(q->elements, new_size * sizeof (void *));
if (newblock == NULL) return -1;
} else {
// When the first element is not in the first
// memory slot, we need to rearrange the order
// of the elements in order to expand the storage area.
newblock = malloc(new_size * sizeof (void *));
if (newblock == NULL) return -1;
// Copy the beginning of the list to the beginning of the new memory block
memcpy(newblock, (char *)q->elements + q->first * sizeof (void *), (q->max_size - q->first) * sizeof (void *));
// Append the second part of the list to the end
memcpy((char *)newblock + (q->max_size - q->first) * sizeof (void *), q->elements, q->first * sizeof (void *));
free(q->elements);
}
q->first = 0;
q->last = q->current_size;
q->max_size = new_size;
q->elements = newblock;
}
q->elements[q->last] = element;
q->current_size++;
q->last++;
if (q->last == q->max_size) {
q->last = 0;
}
return 1;
}
/**
* Remove one element from the beginning of the list.
*
* @param list
* @return the removed element, or NULL if the list is empty
*/
static void *list_array_pop(list_t *_q) {
list_array_t *q = (list_array_t *) _q;
void *r = NULL;
if (q->current_size == 0) {
return NULL;
}
r = q->elements[q->first];
q->first++;
if (q->first == q->max_size) {
q->first = 0;
}
q->current_size--;
return r;
}
/**
* Returns the size of the list.
*
* @param list
*/
static size_t list_array_size(list_t *_l) {
return ((list_array_t *) _l)->current_size;
}
/**
* Return the element at the given index.
*
* @param list
* @param index
* @return the desired element, or NULL if the list is too small, or
* if the element at that position carries a NULL
*/
static void *list_array_get(list_t *_l, size_t idx) {
list_array_t *l = (list_array_t *) _l;
void *r = NULL;
if (idx + 1 > l->current_size) return NULL;
size_t i = l->first;
r = l->elements[l->first];
while (idx--) {
if (++i == l->max_size) {
i = 0;
}
r = l->elements[i];
}
return r;
}
/**
* Replace the element at the given index with the provided element.
*
* @param list
* @param index
* @param element
*
* @return 1 if the element was replaced, or 0 if the list is too small
*/
static int list_array_replace(list_t *_l, size_t idx, void *element) {
list_array_t *l = (list_array_t *) _l;
if (idx + 1 > l->current_size) return 0;
size_t i = l->first;
while (idx--) {
if (++i == l->max_size) {
i = 0;
}
}
l->elements[i] = element;
return 1;
}
/**
* Reset the list iterator.
*
* @param l
*/
void list_array_iterator_reset(list_array_t *l) {
l->iterator_index = 0;
}
/**
* Advance to the next list value.
*
* @param l
* @return the next list value, or NULL if there aren't more elements
* left to iterate over or if the element itself is NULL
*/
void *list_array_iterator_next(list_array_t *l) {
void *r = NULL;
if (l->iterator_index < l->current_size) {
r = list_get(l, l->iterator_index);
l->iterator_index++;
}
return r;
}
/**
* Free the memory occupied by this list. This function assumes
* the data elements were freed beforehand.
*
* @param l
*/
void list_array_destroy(list_array_t *l) {
free(l->elements);
free(l);
}
/**
* Create new array-based list.
*
* @param size
* @return newly allocated list (list_t)
*/
list_t *list_array_create(size_t size) {
// Allocate the list structure
list_array_t *q = calloc(1, sizeof (list_array_t));
if (q == NULL) return NULL;
// Allocate the initial batch of elements
q->elements = malloc(size * sizeof (void *));
if (q->elements == NULL) {
free(q);
return NULL;
}
// Initialise structure
q->first = 0;
q->last = 0;
q->max_size = size;
q->push = list_array_push;
q->pop = list_array_pop;
q->get = list_array_get;
q->replace = list_array_replace;
q->size = list_array_size;
q->iterator_reset = (void (*)(list_t *))list_array_iterator_reset;
q->iterator_next = (void *(*)(list_t *))list_array_iterator_next;
q->destroy = (void (*)(list_t *))list_array_destroy;
return (list_t *) q;
}
// -- Table --
/**
* Create a new table structure.
*
* @param size
* @return newly created table_t
*/
table_t *table_create(size_t size) {
table_t *t = calloc(1, sizeof (table_t));
if (t == NULL) return NULL;
// Use a list behind the scenes
t->list = list_array_create(size * 2);
if (t->list == NULL) {
free(t);
return NULL;
}
return t;
}
/**
* Destroy a table.
*
* @param table
*/
void table_destroy(table_t * table) {
// Free keys only
int counter = 0;
void *data = NULL;
list_iterator_reset(table->list);
while ((data = list_iterator_next(table->list)) != NULL) {
// Free key
if ((counter % 2) == 0) {
free(data);
}
counter++;
}
list_destroy(table->list);
free(table);
}
/**
* Add a new table element. This function currently makes a copy of
* the key, which is inefficient.
*
* @param table
* @param key
* @param element
*/
int table_add(table_t *table, bstr *key, void *element) {
// Lowercase key
bstr *lkey = bstr_dup_lower(key);
if (lkey == NULL) {
return -1;
}
// Add key
if (list_add(table->list, lkey) != 1) {
free(lkey);
return -1;
}
// Add element
if (list_add(table->list, element) != 1) {
list_pop(table->list);
free(lkey);
return -1;
}
return 1;
}
/**
* @param table
* @param key
*/
static void *table_get_internal(table_t *table, bstr *key) {
// Iterate through the list, comparing
// keys with the parameter, return data if found.
bstr *ts = NULL;
list_iterator_reset(table->list);
while ((ts = list_iterator_next(table->list)) != NULL) {
void *data = list_iterator_next(table->list);
if (bstr_cmp(ts, key) == 0) {
return data;
}
}
return NULL;
}
/**
* Retrieve the first element in the table with the given
* key (as a NUL-terminated string).
*
* @param table
* @param cstr
* @return table element, or NULL if not found
*/
void *table_getc(table_t *table, char *cstr) {
// TODO This is very inefficient
bstr *key = bstr_cstrdup(cstr);
bstr_tolowercase(key);
void *data = table_get_internal(table, key);
free(key);
return data;
}
/**
* Retrieve the first element in the table with the given key.
*
* @param table
* @param key
* @return table element, or NULL if not found
*/
void *table_get(table_t *table, bstr *key) {
// TODO This is very inefficient
bstr *lkey = bstr_dup_lower(key);
void *data = table_get_internal(table, lkey);
free(lkey);
return data;
}
/**
* Reset the table iterator.
*
* @param table
*/
void table_iterator_reset(table_t *table) {
list_iterator_reset(table->list);
}
/**
* Advance to the next table element.
*
* @param t
* @param data
* @return pointer to the key and the element if there is a next element, NULL otherwise
*/
bstr *table_iterator_next(table_t *t, void **data) {
bstr *s = list_iterator_next(t->list);
if (s != NULL) {
*data = list_iterator_next(t->list);
}
return s;
}
/**
* Returns the size of the table.
*
* @param table
* @return table size
*/
size_t table_size(table_t *table) {
return list_size(table->list) / 2;
}
/**
* Remove all elements from the table.
*
* @param table
*/
void table_clear(table_t *table) {
// TODO Clear table by removing the existing elements
size_t size = list_size(table->list);
list_destroy(table->list);
// Use a list behind the scenes
table->list = list_array_create(size == 0 ? 10 : size);
if (table->list == NULL) {
free(table);
}
}
#if 0
int main(int argc, char **argv) {
list_t *q = list_linked_create();
list_push(q, "1");
list_push(q, "2");
list_push(q, "3");
list_push(q, "4");
char *s = NULL;
while ((s = (char *) list_pop(q)) != NULL) {
printf("Got: %s\n", s);
}
free(q);
}
#endif

@ -0,0 +1,105 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _DSLIB_H
#define _DSLIB_H
typedef struct list_t list_t;
typedef struct list_array_t list_array_t;
typedef struct list_linked_element_t list_linked_element_t;
typedef struct list_linked_t list_linked_t;
typedef struct table_t table_t;
#include "bstr.h"
// IMPORTANT This library is used internally by the parser and you should
// not rely on it in your code. The implementation may change at
// some point in the future.
// What we have here is two implementations of a list structure (array- and link-list-based),
// and one implementation of a table (case-insensitive keys; multiple key values are allowed).
// The lists can be used as a stack.
//
// TODO The table element retrieval if very inefficient at the moment.
#define list_push(L, E) (L)->push(L, E)
#define list_pop(L) (L)->pop(L)
#define list_empty(L) (L)->empty(L)
#define list_get(L, N) (L)->get((list_t *)L, N)
#define list_replace(L, N, E) (L)->replace((list_t *)L, N, E)
#define list_add(L, N) (L)->push(L, N)
#define list_size(L) (L)->size(L)
#define list_iterator_reset(L) (L)->iterator_reset(L)
#define list_iterator_next(L) (L)->iterator_next(L)
#define list_destroy(L) (L)->destroy(L)
#define LIST_COMMON \
int (*push)(list_t *, void *); \
void *(*pop)(list_t *); \
int (*empty)(list_t *); \
void *(*get)(list_t *, size_t index); \
int (*replace)(list_t *, size_t index, void *); \
size_t (*size)(list_t *); \
void (*iterator_reset)(list_t *); \
void *(*iterator_next)(list_t *); \
void (*destroy)(list_t *)
struct list_t {
LIST_COMMON;
};
struct list_linked_element_t {
void *data;
list_linked_element_t *next;
};
struct list_linked_t {
LIST_COMMON;
list_linked_element_t *first;
list_linked_element_t *last;
};
struct list_array_t {
LIST_COMMON;
size_t first;
size_t last;
size_t max_size;
size_t current_size;
void **elements;
size_t iterator_index;
};
list_t *list_linked_create(void);
list_t *list_array_create(size_t size);
struct table_t {
list_t *list;
};
table_t *table_create(size_t size);
int table_add(table_t *, bstr *, void *);
void table_set(table_t *, bstr *, void *);
void *table_get(table_t *, bstr *);
void *table_getc(table_t *, char *);
void table_iterator_reset(table_t *);
bstr *table_iterator_next(table_t *, void **);
size_t table_size(table_t *t);
void table_destroy(table_t *);
void table_clear(table_t *);
#endif /* _DSLIB_H */

@ -0,0 +1,167 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "hooks.h"
/**
* Creates a new hook.
*
* @return New htp_hook_t structure on success, NULL on failure
*/
htp_hook_t *hook_create() {
htp_hook_t *hook = calloc(1, sizeof (htp_hook_t));
if (hook == NULL) return NULL;
hook->callbacks = list_array_create(4);
if (hook->callbacks == NULL) {
free(hook);
return NULL;
}
return hook;
}
/**
* Creates a copy of the provided hook. The hook is allowed to be NULL,
* in which case this function simply returns a NULL.
*
* @param hook
* @return A copy of the hook, or NULL (if the provided hook was NULL
* or, if it wasn't, if there was a memory allocation problem while
* constructing a copy).
*/
htp_hook_t * hook_copy(htp_hook_t *hook) {
if (hook == NULL) return NULL;
htp_hook_t *copy = hook_create();
if (copy == NULL) return NULL;
htp_callback_t *callback = NULL;
list_iterator_reset(hook->callbacks);
while ((callback = list_iterator_next(hook->callbacks)) != NULL) {
if (hook_register(&copy, callback->fn) < 0) {
hook_destroy(copy);
return NULL;
}
}
return copy;
}
/**
* Destroys an existing hook. It is all right to send a NULL
* to this method because it will simply return straight away.
*
* @param hook
*/
void hook_destroy(htp_hook_t *hook) {
if (hook == NULL) return;
htp_callback_t *callback = NULL;
list_iterator_reset(hook->callbacks);
while ((callback = list_iterator_next(hook->callbacks)) != NULL) {
free(callback);
}
list_destroy(hook->callbacks);
free(hook);
}
/**
* Registers a new callback with the hook.
*
* @param hook
* @param callback_fn
* @return 1 on success, -1 on memory allocation error
*/
int hook_register(htp_hook_t **hook, int (*callback_fn)()) {
int hook_created = 0;
htp_callback_t *callback = calloc(1, sizeof (htp_callback_t));
if (callback == NULL) return -1;
callback->fn = callback_fn;
// Create a new hook if one does not exist
if (*hook == NULL) {
*hook = hook_create();
if (*hook == NULL) {
free(callback);
return -1;
}
hook_created = 1;
}
// Add callback
if (list_add((*hook)->callbacks, callback) < 0) {
if (hook_created) {
free(*hook);
}
free(callback);
return -1;
}
return 1;
}
/**
* Runs all the callbacks associated with a given hook. Only stops if
* one of the callbacks returns an error (HOOK_ERROR).
*
* @param hook
* @param data
* @return HOOK_OK or HOOK_ERROR
*/
int hook_run_all(htp_hook_t *hook, void *data) {
if (hook == NULL) {
return HOOK_OK;
}
htp_callback_t *callback = NULL;
list_iterator_reset(hook->callbacks);
while ((callback = list_iterator_next(hook->callbacks)) != NULL) {
if (callback->fn(data) == HOOK_ERROR) {
return HOOK_ERROR;
}
}
return HOOK_OK;
}
/**
* Run callbacks until one of them accepts to service the hook.
*
* @param hook
* @param data
* @return HOOK_OK on success, HOOK_DECLINED if no callback wanted to run and HOOK_ERROR on error.
*/
int hook_run_one(htp_hook_t *hook, void *data) {
if (hook == NULL) {
return HOOK_DECLINED;
}
htp_callback_t *callback = NULL;
list_iterator_reset(hook->callbacks);
while ((callback = list_iterator_next(hook->callbacks)) != NULL) {
int status = callback->fn(data);
// Both HOOK_OK and HOOK_ERROR will stop hook processing
if (status != HOOK_DECLINED) {
return status;
}
}
return HOOK_DECLINED;
}

@ -0,0 +1,51 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _HOOKS_H
#define _HOOKS_H
#include "dslib.h"
#ifdef _HTP_H
#define HOOK_ERROR HTP_ERROR
#define HOOK_OK HTP_OK
#define HOOK_DECLINED HTP_DECLINED
#else
#define HOOK_ERROR -1
#define HOOK_OK 0
#define HOOK_DECLINED 1
#endif
typedef struct htp_hook_t htp_hook_t;
typedef struct htp_callback_t htp_callback_t;
struct htp_hook_t {
list_t *callbacks;
};
struct htp_callback_t {
int (*fn)();
};
int hook_register(htp_hook_t **hook, int (*callback_fn)());
int hook_run_one(htp_hook_t *hook, void *data);
int hook_run_all(htp_hook_t *hook, void *data);
htp_hook_t *hook_create();
htp_hook_t *hook_copy(htp_hook_t *hook);
void hook_destroy(htp_hook_t *hook);
#endif /* _HOOKS_H */

@ -0,0 +1,27 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include <stdlib.h>
#include "htp.h"
/**
* Returns the library version.
*/
const char *htp_get_version() {
return HTP_BASE_VERSION_TEXT " (r$REVISION_MISSING)";
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,716 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* This map is used by default for best-fit mapping from the Unicode
* values U+0100-FFFF.
*/
static unsigned char bestfit_1252[] =
{ 0x01, 0x00, 0x41, 0x01, 0x01, 0x61, 0x01, 0x02, 0x41, 0x01, 0x03, 0x61,
0x01, 0x04, 0x41, 0x01, 0x05, 0x61, 0x01, 0x06, 0x43, 0x01, 0x07, 0x63,
0x01, 0x08, 0x43, 0x01, 0x09, 0x63, 0x01, 0x0a, 0x43, 0x01, 0x0b, 0x63,
0x01, 0x0c, 0x43, 0x01, 0x0d, 0x63, 0x01, 0x0e, 0x44, 0x01, 0x0f, 0x64,
0x01, 0x11, 0x64, 0x01, 0x12, 0x45, 0x01, 0x13, 0x65, 0x01, 0x14, 0x45,
0x01, 0x15, 0x65, 0x01, 0x16, 0x45, 0x01, 0x17, 0x65, 0x01, 0x18, 0x45,
0x01, 0x19, 0x65, 0x01, 0x1a, 0x45, 0x01, 0x1b, 0x65, 0x01, 0x1c, 0x47,
0x01, 0x1d, 0x67, 0x01, 0x1e, 0x47, 0x01, 0x1f, 0x67, 0x01, 0x20, 0x47,
0x01, 0x21, 0x67, 0x01, 0x22, 0x47, 0x01, 0x23, 0x67, 0x01, 0x24, 0x48,
0x01, 0x25, 0x68, 0x01, 0x26, 0x48, 0x01, 0x27, 0x68, 0x01, 0x28, 0x49,
0x01, 0x29, 0x69, 0x01, 0x2a, 0x49, 0x01, 0x2b, 0x69, 0x01, 0x2c, 0x49,
0x01, 0x2d, 0x69, 0x01, 0x2e, 0x49, 0x01, 0x2f, 0x69, 0x01, 0x30, 0x49,
0x01, 0x31, 0x69, 0x01, 0x34, 0x4a, 0x01, 0x35, 0x6a, 0x01, 0x36, 0x4b,
0x01, 0x37, 0x6b, 0x01, 0x39, 0x4c, 0x01, 0x3a, 0x6c, 0x01, 0x3b, 0x4c,
0x01, 0x3c, 0x6c, 0x01, 0x3d, 0x4c, 0x01, 0x3e, 0x6c, 0x01, 0x41, 0x4c,
0x01, 0x42, 0x6c, 0x01, 0x43, 0x4e, 0x01, 0x44, 0x6e, 0x01, 0x45, 0x4e,
0x01, 0x46, 0x6e, 0x01, 0x47, 0x4e, 0x01, 0x48, 0x6e, 0x01, 0x4c, 0x4f,
0x01, 0x4d, 0x6f, 0x01, 0x4e, 0x4f, 0x01, 0x4f, 0x6f, 0x01, 0x50, 0x4f,
0x01, 0x51, 0x6f, 0x01, 0x54, 0x52, 0x01, 0x55, 0x72, 0x01, 0x56, 0x52,
0x01, 0x57, 0x72, 0x01, 0x58, 0x52, 0x01, 0x59, 0x72, 0x01, 0x5a, 0x53,
0x01, 0x5b, 0x73, 0x01, 0x5c, 0x53, 0x01, 0x5d, 0x73, 0x01, 0x5e, 0x53,
0x01, 0x5f, 0x73, 0x01, 0x62, 0x54, 0x01, 0x63, 0x74, 0x01, 0x64, 0x54,
0x01, 0x65, 0x74, 0x01, 0x66, 0x54, 0x01, 0x67, 0x74, 0x01, 0x68, 0x55,
0x01, 0x69, 0x75, 0x01, 0x6a, 0x55, 0x01, 0x6b, 0x75, 0x01, 0x6c, 0x55,
0x01, 0x6d, 0x75, 0x01, 0x6e, 0x55, 0x01, 0x6f, 0x75, 0x01, 0x70, 0x55,
0x01, 0x71, 0x75, 0x01, 0x72, 0x55, 0x01, 0x73, 0x75, 0x01, 0x74, 0x57,
0x01, 0x75, 0x77, 0x01, 0x76, 0x59, 0x01, 0x77, 0x79, 0x01, 0x79, 0x5a,
0x01, 0x7b, 0x5a, 0x01, 0x7c, 0x7a, 0x01, 0x80, 0x62, 0x01, 0x97, 0x49,
0x01, 0x9a, 0x6c, 0x01, 0x9f, 0x4f, 0x01, 0xa0, 0x4f, 0x01, 0xa1, 0x6f,
0x01, 0xab, 0x74, 0x01, 0xae, 0x54, 0x01, 0xaf, 0x55, 0x01, 0xb0, 0x75,
0x01, 0xb6, 0x7a, 0x01, 0xc0, 0x7c, 0x01, 0xc3, 0x21, 0x01, 0xcd, 0x41,
0x01, 0xce, 0x61, 0x01, 0xcf, 0x49, 0x01, 0xd0, 0x69, 0x01, 0xd1, 0x4f,
0x01, 0xd2, 0x6f, 0x01, 0xd3, 0x55, 0x01, 0xd4, 0x75, 0x01, 0xd5, 0x55,
0x01, 0xd6, 0x75, 0x01, 0xd7, 0x55, 0x01, 0xd8, 0x75, 0x01, 0xd9, 0x55,
0x01, 0xda, 0x75, 0x01, 0xdb, 0x55, 0x01, 0xdc, 0x75, 0x01, 0xde, 0x41,
0x01, 0xdf, 0x61, 0x01, 0xe4, 0x47, 0x01, 0xe5, 0x67, 0x01, 0xe6, 0x47,
0x01, 0xe7, 0x67, 0x01, 0xe8, 0x4b, 0x01, 0xe9, 0x6b, 0x01, 0xea, 0x4f,
0x01, 0xeb, 0x6f, 0x01, 0xec, 0x4f, 0x01, 0xed, 0x6f, 0x01, 0xf0, 0x6a,
0x02, 0x61, 0x67, 0x02, 0xb9, 0x27, 0x02, 0xba, 0x22, 0x02, 0xbc, 0x27,
0x02, 0xc4, 0x5e, 0x02, 0xc8, 0x27, 0x02, 0xcb, 0x60, 0x02, 0xcd, 0x5f,
0x03, 0x00, 0x60, 0x03, 0x02, 0x5e, 0x03, 0x03, 0x7e, 0x03, 0x0e, 0x22,
0x03, 0x31, 0x5f, 0x03, 0x32, 0x5f, 0x03, 0x7e, 0x3b, 0x03, 0x93, 0x47,
0x03, 0x98, 0x54, 0x03, 0xa3, 0x53, 0x03, 0xa6, 0x46, 0x03, 0xa9, 0x4f,
0x03, 0xb1, 0x61, 0x03, 0xb4, 0x64, 0x03, 0xb5, 0x65, 0x03, 0xc0, 0x70,
0x03, 0xc3, 0x73, 0x03, 0xc4, 0x74, 0x03, 0xc6, 0x66, 0x04, 0xbb, 0x68,
0x05, 0x89, 0x3a, 0x06, 0x6a, 0x25, 0x20, 0x00, 0x20, 0x20, 0x01, 0x20,
0x20, 0x02, 0x20, 0x20, 0x03, 0x20, 0x20, 0x04, 0x20, 0x20, 0x05, 0x20,
0x20, 0x06, 0x20, 0x20, 0x10, 0x2d, 0x20, 0x11, 0x2d, 0x20, 0x17, 0x3d,
0x20, 0x32, 0x27, 0x20, 0x35, 0x60, 0x20, 0x44, 0x2f, 0x20, 0x74, 0x34,
0x20, 0x75, 0x35, 0x20, 0x76, 0x36, 0x20, 0x77, 0x37, 0x20, 0x78, 0x38,
0x20, 0x7f, 0x6e, 0x20, 0x80, 0x30, 0x20, 0x81, 0x31, 0x20, 0x82, 0x32,
0x20, 0x83, 0x33, 0x20, 0x84, 0x34, 0x20, 0x85, 0x35, 0x20, 0x86, 0x36,
0x20, 0x87, 0x37, 0x20, 0x88, 0x38, 0x20, 0x89, 0x39, 0x20, 0xa7, 0x50,
0x21, 0x02, 0x43, 0x21, 0x07, 0x45, 0x21, 0x0a, 0x67, 0x21, 0x0b, 0x48,
0x21, 0x0c, 0x48, 0x21, 0x0d, 0x48, 0x21, 0x0e, 0x68, 0x21, 0x10, 0x49,
0x21, 0x11, 0x49, 0x21, 0x12, 0x4c, 0x21, 0x13, 0x6c, 0x21, 0x15, 0x4e,
0x21, 0x18, 0x50, 0x21, 0x19, 0x50, 0x21, 0x1a, 0x51, 0x21, 0x1b, 0x52,
0x21, 0x1c, 0x52, 0x21, 0x1d, 0x52, 0x21, 0x24, 0x5a, 0x21, 0x28, 0x5a,
0x21, 0x2a, 0x4b, 0x21, 0x2c, 0x42, 0x21, 0x2d, 0x43, 0x21, 0x2e, 0x65,
0x21, 0x2f, 0x65, 0x21, 0x30, 0x45, 0x21, 0x31, 0x46, 0x21, 0x33, 0x4d,
0x21, 0x34, 0x6f, 0x22, 0x12, 0x2d, 0x22, 0x15, 0x2f, 0x22, 0x16, 0x5c,
0x22, 0x17, 0x2a, 0x22, 0x1a, 0x76, 0x22, 0x1e, 0x38, 0x22, 0x23, 0x7c,
0x22, 0x29, 0x6e, 0x22, 0x36, 0x3a, 0x22, 0x3c, 0x7e, 0x22, 0x61, 0x3d,
0x22, 0x64, 0x3d, 0x22, 0x65, 0x3d, 0x23, 0x03, 0x5e, 0x23, 0x20, 0x28,
0x23, 0x21, 0x29, 0x23, 0x29, 0x3c, 0x23, 0x2a, 0x3e, 0x25, 0x00, 0x2d,
0x25, 0x0c, 0x2b, 0x25, 0x10, 0x2b, 0x25, 0x14, 0x2b, 0x25, 0x18, 0x2b,
0x25, 0x1c, 0x2b, 0x25, 0x2c, 0x2d, 0x25, 0x34, 0x2d, 0x25, 0x3c, 0x2b,
0x25, 0x50, 0x2d, 0x25, 0x52, 0x2b, 0x25, 0x53, 0x2b, 0x25, 0x54, 0x2b,
0x25, 0x55, 0x2b, 0x25, 0x56, 0x2b, 0x25, 0x57, 0x2b, 0x25, 0x58, 0x2b,
0x25, 0x59, 0x2b, 0x25, 0x5a, 0x2b, 0x25, 0x5b, 0x2b, 0x25, 0x5c, 0x2b,
0x25, 0x5d, 0x2b, 0x25, 0x64, 0x2d, 0x25, 0x65, 0x2d, 0x25, 0x66, 0x2d,
0x25, 0x67, 0x2d, 0x25, 0x68, 0x2d, 0x25, 0x69, 0x2d, 0x25, 0x6a, 0x2b,
0x25, 0x6b, 0x2b, 0x25, 0x6c, 0x2b, 0x25, 0x84, 0x5f, 0x27, 0x58, 0x7c,
0x30, 0x00, 0x20, 0x30, 0x08, 0x3c, 0x30, 0x09, 0x3e, 0x30, 0x1a, 0x5b,
0x30, 0x1b, 0x5d, 0xff, 0x01, 0x21, 0xff, 0x02, 0x22, 0xff, 0x03, 0x23,
0xff, 0x04, 0x24, 0xff, 0x05, 0x25, 0xff, 0x06, 0x26, 0xff, 0x07, 0x27,
0xff, 0x08, 0x28, 0xff, 0x09, 0x29, 0xff, 0x0a, 0x2a, 0xff, 0x0b, 0x2b,
0xff, 0x0c, 0x2c, 0xff, 0x0d, 0x2d, 0xff, 0x0e, 0x2e, 0xff, 0x0f, 0x2f,
0xff, 0x10, 0x30, 0xff, 0x11, 0x31, 0xff, 0x12, 0x32, 0xff, 0x13, 0x33,
0xff, 0x14, 0x34, 0xff, 0x15, 0x35, 0xff, 0x16, 0x36, 0xff, 0x17, 0x37,
0xff, 0x18, 0x38, 0xff, 0x19, 0x39, 0xff, 0x1a, 0x3a, 0xff, 0x1b, 0x3b,
0xff, 0x1c, 0x3c, 0xff, 0x1d, 0x3d, 0xff, 0x1e, 0x3e, 0xff, 0x20, 0x40,
0xff, 0x21, 0x41, 0xff, 0x22, 0x42, 0xff, 0x23, 0x43, 0xff, 0x24, 0x44,
0xff, 0x25, 0x45, 0xff, 0x26, 0x46, 0xff, 0x27, 0x47, 0xff, 0x28, 0x48,
0xff, 0x29, 0x49, 0xff, 0x2a, 0x4a, 0xff, 0x2b, 0x4b, 0xff, 0x2c, 0x4c,
0xff, 0x2d, 0x4d, 0xff, 0x2e, 0x4e, 0xff, 0x2f, 0x4f, 0xff, 0x30, 0x50,
0xff, 0x31, 0x51, 0xff, 0x32, 0x52, 0xff, 0x33, 0x53, 0xff, 0x34, 0x54,
0xff, 0x35, 0x55, 0xff, 0x36, 0x56, 0xff, 0x37, 0x57, 0xff, 0x38, 0x58,
0xff, 0x39, 0x59, 0xff, 0x3a, 0x5a, 0xff, 0x3b, 0x5b, 0xff, 0x3c, 0x5c,
0xff, 0x3d, 0x5d, 0xff, 0x3e, 0x5e, 0xff, 0x3f, 0x5f, 0xff, 0x40, 0x60,
0xff, 0x41, 0x61, 0xff, 0x42, 0x62, 0xff, 0x43, 0x63, 0xff, 0x44, 0x64,
0xff, 0x45, 0x65, 0xff, 0x46, 0x66, 0xff, 0x47, 0x67, 0xff, 0x48, 0x68,
0xff, 0x49, 0x69, 0xff, 0x4a, 0x6a, 0xff, 0x4b, 0x6b, 0xff, 0x4c, 0x6c,
0xff, 0x4d, 0x6d, 0xff, 0x4e, 0x6e, 0xff, 0x4f, 0x6f, 0xff, 0x50, 0x70,
0xff, 0x51, 0x71, 0xff, 0x52, 0x72, 0xff, 0x53, 0x73, 0xff, 0x54, 0x74,
0xff, 0x55, 0x75, 0xff, 0x56, 0x76, 0xff, 0x57, 0x77, 0xff, 0x58, 0x78,
0xff, 0x59, 0x79, 0xff, 0x5a, 0x7a, 0xff, 0x5b, 0x7b, 0xff, 0x5c, 0x7c,
0xff, 0x5d, 0x7d, 0xff, 0x5e, 0x7e, 0x00, 0x00, 0x00
};
/**
* Creates a new configuration structure. Configuration structures created at
* configuration time must not be changed afterwards in order to support lock-less
* copying.
*
* @return New configuration structure.
*/
htp_cfg_t *htp_config_create() {
htp_cfg_t *cfg = calloc(1, sizeof(htp_cfg_t));
if (cfg == NULL) return NULL;
cfg->field_limit_hard = HTP_HEADER_LIMIT_HARD;
cfg->field_limit_soft = HTP_HEADER_LIMIT_SOFT;
cfg->log_level = HTP_LOG_NOTICE;
cfg->path_u_bestfit_map = bestfit_1252;
cfg->path_replacement_char = '?';
// No need to create hooks here; they will be created on-demand,
// during callback registration
// Set the default personality before we return
htp_config_set_server_personality(cfg, HTP_SERVER_MINIMAL);
return cfg;
}
/**
* Creates a copy of the supplied configuration structure. The idea is to create
* one or more configuration objects at configuration-time, but to use this
* function to create per-connection copies. That way it will be possible to
* adjust per-connection configuration as necessary, without affecting the
* global configuration. Make sure no other thread changes the configuration
* object while this function is operating.
*
* @param cfg
* @return A copy of the configuration structure.
*/
htp_cfg_t *htp_config_copy(htp_cfg_t *cfg) {
htp_cfg_t *copy = calloc(1, sizeof(htp_cfg_t));
if (copy == NULL) return NULL;
// Create copies of the hooks' structures
if (cfg->hook_transaction_start != NULL) {
copy->hook_transaction_start = hook_copy(cfg->hook_transaction_start);
if (copy->hook_transaction_start == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_request_line != NULL) {
copy->hook_request_line = hook_copy(cfg->hook_request_line);
if (copy->hook_request_line == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_request_headers != NULL) {
copy->hook_request_headers = hook_copy(cfg->hook_request_headers);
if (copy->hook_request_headers == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_request_body_data != NULL) {
copy->hook_request_body_data = hook_copy(cfg->hook_request_body_data);
if (copy->hook_request_body_data == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_request_trailer != NULL) {
copy->hook_request_trailer = hook_copy(cfg->hook_request_trailer);
if (copy->hook_request_trailer == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_request != NULL) {
copy->hook_request = hook_copy(cfg->hook_request);
if (copy->hook_request == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_response_line != NULL) {
copy->hook_response_line = hook_copy(cfg->hook_response_line);
if (copy->hook_response_line == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_response_headers != NULL) {
copy->hook_response_headers = hook_copy(cfg->hook_response_headers);
if (copy->hook_response_headers == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_response_body_data != NULL) {
copy->hook_response_body_data = hook_copy(cfg->hook_response_body_data);
if (copy->hook_response_body_data == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_response_trailer != NULL) {
copy->hook_response_trailer = hook_copy(cfg->hook_response_trailer);
if (copy->hook_response_trailer == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_response != NULL) {
copy->hook_response = hook_copy(cfg->hook_response);
if (copy->hook_response == NULL) {
free(copy);
return NULL;
}
}
if (cfg->hook_log != NULL) {
copy->hook_log = hook_copy(cfg->hook_log);
if (copy->hook_log == NULL) {
free(copy);
return NULL;
}
}
return copy;
}
/**
* Destroy a configuration structure.
*
* @param cfg
*/
void htp_config_destroy(htp_cfg_t *cfg) {
// Destroy the hooks
hook_destroy(cfg->hook_transaction_start);
hook_destroy(cfg->hook_request_line);
hook_destroy(cfg->hook_request_headers);
hook_destroy(cfg->hook_request_body_data);
hook_destroy(cfg->hook_request_trailer);
hook_destroy(cfg->hook_request);
hook_destroy(cfg->hook_response_line);
hook_destroy(cfg->hook_response_headers);
hook_destroy(cfg->hook_response_body_data);
hook_destroy(cfg->hook_response_trailer);
hook_destroy(cfg->hook_response);
hook_destroy(cfg->hook_log);
// Free the structure itself
free(cfg);
}
/**
* Registers a transaction_start callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_transaction_start(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_transaction_start, callback_fn);
}
/**
* Registers a request_line callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_request_line(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_request_line, callback_fn);
}
/**
* Registers a request_headers callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_request_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_request_headers, callback_fn);
}
/**
* Registers a request_trailer callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_request_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_request_trailer, callback_fn);
}
/**
* Registers a request_body_data callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_request_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) {
hook_register(&cfg->hook_request_body_data, callback_fn);
}
/**
* Registers a request callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_request(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_request, callback_fn);
}
/**
* Registers a request_line callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_response_line(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_response_line, callback_fn);
}
/**
* Registers a request_headers callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_response_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_response_headers, callback_fn);
}
/**
* Registers a request_trailer callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_response_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_response_trailer, callback_fn);
}
/**
* Registers a request_body_data callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_response_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) {
hook_register(&cfg->hook_response_body_data, callback_fn);
}
/**
* Registers a request callback.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_response(htp_cfg_t *cfg, int (*callback_fn)(htp_connp_t *)) {
hook_register(&cfg->hook_response, callback_fn);
}
/**
* Registers a callback that is invoked every time there is a log message.
*
* @param cfg
* @param callback_fn
*/
void htp_config_register_log(htp_cfg_t *cfg, int (*callback_fn)(htp_log_t *)) {
hook_register(&cfg->hook_log, callback_fn);
}
/**
* Update the best-fit map, which is used to convert UCS-2 characters into
* single-byte characters. By default a Windows 1252 best-fit map is used. The map
* is an list of triplets, the first 2 bytes being an UCS-2 character to map from,
* and the third byte being the single byte to map to. Make sure that your map contains
* the mappings to cover the fullwidth form characters (U+FF00-FFEF).
*
* @param cfg
* @param map
*/
void htp_config_set_bestfit_map(htp_cfg_t *cfg, unsigned char *map) {
cfg->path_u_bestfit_map = map;
}
/**
* Whether to generate the request_uri_normalized field.
*
* @param cfg
* @param generate
*/
void htp_config_set_generate_request_uri_normalized(htp_cfg_t *cfg, int generate) {
cfg->generate_request_uri_normalized = generate;
}
/**
* Configures whether backslash characters are treated as path segment separators. They
* are not on Unix systems, but are on Windows systems. If this setting is enabled, a path
* such as "/one\two/three" will be converted to "/one/two/three".
*
* @param cfg
* @param backslash_separators
*/
void htp_config_set_path_backslash_separators(htp_cfg_t *cfg, int backslash_separators) {
cfg->path_backslash_separators = backslash_separators;
}
/**
* Configures filesystem sensitivity. This setting affects
* how URL paths are normalized. There are no path modifications by default, but
* on a case-insensitive systems path will be converted to lowercase.
*
* @param cfg
* @param case_insensitive
*/
void htp_config_set_path_case_insensitive(htp_cfg_t *cfg, int case_insensitive) {
cfg->path_case_insensitive = case_insensitive;
}
/**
* Configures whether consecutive path segment separators will be compressed. When
* enabled, a path such as "/one//two" will be normalized to "/one/two". The backslash_separators
* and decode_separators parameters are used before compression takes place. For example, if
* backshasl_deparators and decode_separators are both enabled, the path "/one\\/two\/%5cthree/%2f//four"
* will be converted to "/one/two/three/four".
*
* @param cfg
* @param compress_separators
*/
void htp_config_set_path_compress_separators(htp_cfg_t *cfg, int compress_separators) {
cfg->path_compress_separators = compress_separators;
}
/**
* This parameter is used to predict how a server will react when control
* characters are present in a request path, but does not affect path
* normalization.
*
* @param cfg
* @param control_char_handling Use NONE with servers that ignore control characters in
* request path, and STATUS_400 with servers that respond
* with 400.
*/
void htp_config_set_path_control_char_handling(htp_cfg_t *cfg, int control_char_handling) {
cfg->path_control_char_handling = control_char_handling;
}
/**
* Controls the UTF-8 treatment of request paths. One option is to only validate
* path as UTF-8. In this case, the UTF-8 flags will be raised as appropriate, and
* the path will remain in UTF-8 (if it was UTF-8in the first place). The other option
* is to convert a UTF-8 path into a single byte stream using best-fit mapping.
*
* @param cfg
* @param convert_utf8
*/
void htp_config_set_path_convert_utf8(htp_cfg_t *cfg, int convert_utf8) {
cfg->path_convert_utf8 = convert_utf8;
}
/**
* Configures whether encoded path segment separators will be decoded. Apache does
* not do this, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized
* to "/one/two". If the backslash_separators option is also enabled, encoded backslash
* characters will be converted too (and subseqently normalized to forward slashes).
*
* @param cfg
* @param decode_separators
*/
void htp_config_set_path_decode_separators(htp_cfg_t *cfg, int decode_separators) {
cfg->path_decode_separators = decode_separators;
}
/**
* Configures whether %u-encoded sequences in path will be decoded. Such sequences
* will be treated as invalid URL encoding if decoding is not desireable.
*
* @param cfg
* @param decode_u_encoding
*/
void htp_config_set_path_decode_u_encoding(htp_cfg_t *cfg, int decode_u_encoding) {
cfg->path_decode_u_encoding = decode_u_encoding;
}
/**
* Configures how server reacts to invalid encoding in path.
*
* @param cfg
* @param invalid_encoding_handling The available options are: URL_DECODER_PRESERVE_PERCENT,
* URL_DECODER_REMOVE_PERCENT, URL_DECODER_DECODE_INVALID
* and URL_DECODER_STATUS_400.
*/
void htp_config_set_path_invalid_encoding_handling(htp_cfg_t *cfg, int invalid_encoding_handling) {
cfg->path_invalid_encoding_handling = invalid_encoding_handling;
}
/**
* Configures how server reacts to invalid UTF-8 characters in path. This setting will
* not affect path normalization; it only controls what response status we expect for
* a request that contains invalid UTF-8 characters.
*
* @param cfg
* @param invalid_utf8_handling Possible values: NONE or STATUS_400.
*/
void htp_config_set_path_invalid_utf8_handling(htp_cfg_t *cfg, int invalid_utf8_handling) {
cfg->path_invalid_utf8_handling = invalid_utf8_handling;
}
/**
* Configures how server reacts to encoded NUL bytes. Some servers will terminate
* path at NUL, while some will respond with 400 or 404. When the termination option
* is not used, the NUL byte will remain in the path.
*
* @param cfg
* @param nul_encoded_handling Possible values: TERMINATE, STATUS_400, STATUS_404
*/
void htp_config_set_path_nul_encoded_handling(htp_cfg_t *cfg, int nul_encoded_handling) {
cfg->path_nul_encoded_handling = nul_encoded_handling;
}
/**
* Configures how server reacts to raw NUL bytes. Some servers will terminate
* path at NUL, while some will respond with 400 or 404. When the termination option
* is not used, the NUL byte will remain in the path.
*
* @param cfg
* @param nul_raw_handling Possible values: TERMINATE, STATUS_400, STATUS_404
*/
void htp_config_set_path_nul_raw_handling(htp_cfg_t *cfg, int nul_raw_handling) {
cfg->path_nul_raw_handling = nul_raw_handling;
}
/**
* Sets the replacement characater that will be used to in the lossy best-fit
* mapping from Unicode characters into single-byte streams. The question mark
* is the default replacement character.
*
* @param cfg
* @param replacement_char
*/
void htp_config_set_path_replacement_char(htp_cfg_t *cfg, int replacement_char) {
cfg->path_replacement_char = replacement_char;
}
/**
* Controls what the library does when it encounters an Unicode character where
* only a single-byte would do (e.g., the %u-encoded characters). Conversion always
* takes place; this parameter is used to correctly predict the status code used
* in response. In the future there will probably be an option to convert such
* characters to UCS-2 or UTF-8.
*
* @param cfg
* @param unicode_mapping Possible values: BESTFIT, STATUS_400, STATUS_404.
*/
void htp_config_set_path_unicode_mapping(htp_cfg_t *cfg, int unicode_mapping) {
cfg->path_unicode_mapping = unicode_mapping;
}
/**
* Controls how server reacts to overlong UTF-8 characters.
* XXX Not used at the moment.
*
* @param cfg
* @param utf8_overlong_handling
*/
void htp_config_set_path_utf8_overlong_handling(htp_cfg_t *cfg, int utf8_overlong_handling) {
cfg->path_utf8_overlong_handling = utf8_overlong_handling;
}
/**
* Configure desired server personality.
*
* @param cfg
* @param personality
* @return HTP_OK if the personality is supported, HTP_ERROR if it isn't.
*/
int htp_config_set_server_personality(htp_cfg_t *cfg, int personality) {
switch (personality) {
case HTP_SERVER_MINIMAL:
cfg->parse_request_line = htp_parse_request_line_generic;
cfg->process_request_header = htp_process_request_header_generic;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
break;
case HTP_SERVER_GENERIC:
cfg->parse_request_line = htp_parse_request_line_generic;
cfg->process_request_header = htp_process_request_header_generic;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
cfg->path_backslash_separators = YES;
cfg->path_decode_separators = YES;
cfg->path_compress_separators = YES;
break;
case HTP_SERVER_IDS:
cfg->parse_request_line = htp_parse_request_line_generic;
cfg->process_request_header = htp_process_request_header_generic;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
cfg->path_backslash_separators = YES;
cfg->path_case_insensitive = YES;
cfg->path_decode_separators = YES;
cfg->path_compress_separators = YES;
cfg->path_decode_u_encoding = YES;
cfg->path_unicode_mapping = BESTFIT;
cfg->path_convert_utf8 = YES;
break;
case HTP_SERVER_APACHE :
case HTP_SERVER_APACHE_2_2:
cfg->parse_request_line = htp_parse_request_line_apache_2_2;
cfg->process_request_header = htp_process_request_header_apache_2_2;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
cfg->path_backslash_separators = NO;
cfg->path_decode_separators = NO;
cfg->path_compress_separators = YES;
cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400;
cfg->path_control_char_handling = NONE;
break;
case HTP_SERVER_IIS_5_1:
cfg->parse_request_line = htp_parse_request_line_generic;
cfg->process_request_header = htp_process_request_header_generic;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
cfg->path_backslash_separators = YES;
cfg->path_decode_separators = NO;
cfg->path_compress_separators = YES;
cfg->path_invalid_encoding_handling = URL_DECODER_PRESERVE_PERCENT;
cfg->path_decode_u_encoding = YES;
cfg->path_unicode_mapping = BESTFIT;
cfg->path_control_char_handling = NONE;
break;
case HTP_SERVER_IIS_6_0:
cfg->parse_request_line = htp_parse_request_line_generic;
cfg->process_request_header = htp_process_request_header_generic;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
cfg->path_backslash_separators = YES;
cfg->path_decode_separators = YES;
cfg->path_compress_separators = YES;
cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400;
cfg->path_decode_u_encoding = YES;
cfg->path_unicode_mapping = STATUS_400;
cfg->path_control_char_handling = STATUS_400;
break;
case HTP_SERVER_IIS_7_0:
case HTP_SERVER_IIS_7_5:
cfg->parse_request_line = htp_parse_request_line_generic;
cfg->process_request_header = htp_process_request_header_generic;
cfg->parse_response_line = htp_parse_response_line_generic;
cfg->process_response_header = htp_process_response_header_generic;
cfg->path_backslash_separators = YES;
cfg->path_decode_separators = YES;
cfg->path_compress_separators = YES;
cfg->path_invalid_encoding_handling = URL_DECODER_STATUS_400;
cfg->path_control_char_handling = STATUS_400;
break;
default:
return HTP_ERROR;
}
// Remember the personality
cfg->spersonality = personality;
return HTP_OK;
}

@ -0,0 +1,116 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* Creates a new connection structure.
*
* @param connp
* @return A new htp_connp_t structure on success, NULL on memory allocation failure.
*/
htp_conn_t *htp_conn_create(htp_connp_t *connp) {
htp_conn_t *conn = calloc(1, sizeof (htp_conn_t));
if (conn == NULL) return NULL;
conn->connp = connp;
conn->transactions = list_array_create(16);
if (conn->transactions == NULL) {
free(conn);
return NULL;
}
conn->messages = list_array_create(8);
if (conn->messages == NULL) {
list_destroy(conn->transactions);
free(conn);
return NULL;
}
return conn;
}
/**
* Destroys a connection, as well as all the transactions it contains. It is
* not possible to destroy a connection structure yet leave any of its
* transactions intact. This is because transactions need its connection and
* connection structures hold little data anyway. The opposite is true, though
* it is possible to delete a transaction but leave its connection alive.
*
* @param conn
*/
void htp_conn_destroy(htp_conn_t *conn) {
if (conn == NULL) return;
// Destroy individual transactions. Do note that iterating
// using the iterator does not work here because some of the
// list element may be NULL (and with the iterator it is impossible
// to distinguish a NULL element from the end of the list).
size_t i;
for (i = 0; i < list_size(conn->transactions); i++) {
htp_tx_t *tx = (htp_tx_t *)list_get(conn->transactions, i);
if (tx != NULL) {
htp_tx_destroy(tx);
}
}
list_destroy(conn->transactions);
// Destroy individual messages
htp_log_t *l = NULL;
list_iterator_reset(conn->messages);
while ((l = list_iterator_next(conn->messages)) != NULL) {
free((void *)l->msg);
free(l);
}
list_destroy(conn->messages);
if (conn->local_addr != NULL) {
free(conn->local_addr);
}
if (conn->remote_addr != NULL) {
free(conn->remote_addr);
}
// Finally, destroy the connection
// structure itself.
free(conn);
}
/**
* Removes the given transaction structure, which makes it possible to
* safely destroy it. It is safe to destroy transactions in this way
* because the index of the transactions (in a connection) is preserved.
*
* @param conn
* @param tx
* @return 1 if transaction was removed or 0 if it wasn't found
*/
int htp_conn_remove_tx(htp_conn_t *conn, htp_tx_t *tx) {
if ((tx == NULL)||(conn == NULL)) return 0;
unsigned int i = 0;
for (i = 0; i < list_size(conn->transactions); i++) {
htp_tx_t *etx = list_get(conn->transactions, i);
if (tx == etx) {
list_replace(conn->transactions, i, NULL);
return 1;
}
}
return 0;
}

@ -0,0 +1,274 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
// NOTE The parser contains a lot of duplicated code. That is on purpose.
//
// Within the request parser alone there are several states in which
// bytes are copied into the line buffer and lines are processed one at a time.
// This code could be made more elegant by adding a new line-reading state along
// with a what-fn-to-invoke-to-handle-line pointer.
//
// Furthermore, the entire request parser is terribly similar to the response parser.
// It is imaginable that a single parser could handle both.
//
// After some thought, I decided not to make any changes (at least not for the time
// being). State-based parsers are sometimes difficult to understand. I remember trying
// to figure one once and I had a hard time following the logic because each function
// was small and did exactly one thing. There was jumping all around. You could probably
// say that it was elegant, but I saw it as difficult to understand, verify and maintain.
//
// Thus, I am keeping this inelegant but quite straightforward parser with duplicated code,
// mostly for the sake of maintenance.
//
// For the time being, anyway. I will review at a later time.
/**
* Clears an existing parser error, if any.
*
* @param connp
*/
void htp_connp_clear_error(htp_connp_t *connp) {
connp->last_error = NULL;
}
/**
* Closes the connection associated with the supplied parser.
*
* @param connp
* @param timestamp
*/
void htp_connp_close(htp_connp_t *connp, htp_time_t timestamp) {
// Update internal information
connp->conn->close_timestamp = timestamp;
connp->in_status = STREAM_STATE_CLOSED;
connp->out_status = STREAM_STATE_CLOSED;
// Call the parsers one last time, which will allow them
// to process the events that depend on stream closure
htp_connp_req_data(connp, timestamp, NULL, 0);
htp_connp_res_data(connp, timestamp, NULL, 0);
}
/**
* Creates a new connection parser using the provided configuration. Because
* the configuration structure is used directly, in a multithreaded environment
* you are not allowed to change the structure, ever. If you have a need to
* change configuration on per-connection basis, make a copy of the configuration
* structure to go along with every connection parser.
*
* @param cfg
* @return A pointer to a newly created htp_connp_t instance.
*/
htp_connp_t *htp_connp_create(htp_cfg_t *cfg) {
htp_connp_t *connp = calloc(1, sizeof (htp_connp_t));
if (connp == NULL) return NULL;
// Use the supplied configuration structure
connp->cfg = cfg;
// Create a new connection object
connp->conn = htp_conn_create(connp);
if (connp->conn == NULL) {
free(connp);
return NULL;
}
connp->in_status = HTP_OK;
// Request parsing
connp->in_line_size = cfg->field_limit_hard;
connp->in_line_len = 0;
connp->in_line = malloc(connp->in_line_size);
if (connp->in_line == NULL) {
htp_conn_destroy(connp->conn);
free(connp);
return NULL;
}
connp->in_header_line_index = -1;
connp->in_state = htp_connp_REQ_IDLE;
// Response parsing
connp->out_line_size = cfg->field_limit_hard;
connp->out_line_len = 0;
connp->out_line = malloc(connp->out_line_size);
if (connp->out_line == NULL) {
free(connp->in_line);
htp_conn_destroy(connp->conn);
free(connp);
return NULL;
}
connp->out_header_line_index = -1;
connp->out_state = htp_connp_RES_IDLE;
connp->in_status = STREAM_STATE_NEW;
connp->out_status = STREAM_STATE_NEW;
return connp;
}
/**
* Creates a new configuration parser, making a copy of the supplied
* configuration structure.
*
* @param cfg
* @return A pointer to a newly created htp_connp_t instance.
*/
htp_connp_t *htp_connp_create_copycfg(htp_cfg_t *cfg) {
htp_connp_t *connp = htp_connp_create(cfg);
if (connp == NULL) return NULL;
connp->cfg = htp_config_copy(cfg);
connp->is_cfg_private = 1;
return connp;
}
/**
* Destroys the connection parser and its data structures, leaving
* the connection data intact.
*
* @param connp
*/
void htp_connp_destroy(htp_connp_t *connp) {
if (connp->out_decompressor != NULL) {
connp->out_decompressor->destroy(connp->out_decompressor);
connp->out_decompressor = NULL;
}
if (connp->in_header_line != NULL) {
if (connp->in_header_line->line != NULL) {
free(connp->in_header_line->line);
}
free(connp->in_header_line);
}
free(connp->in_line);
if (connp->out_header_line != NULL) {
if (connp->out_header_line->line != NULL) {
free(connp->out_header_line->line);
}
free(connp->out_header_line);
}
free(connp->out_line);
// Destroy the configuration structure, but only
// if it is our private copy
if (connp->is_cfg_private) {
htp_config_destroy(connp->cfg);
}
free(connp);
}
/**
* Destroys the connection parser, its data structures, as well
* as the connection and its transactions.
*
* @param connp
*/
void htp_connp_destroy_all(htp_connp_t *connp) {
if (connp->conn == NULL) {
fprintf(stderr, "HTP: htp_connp_destroy_all was invoked, but conn is NULL\n");
return;
}
// Destroy connection
htp_conn_destroy(connp->conn);
connp->conn = NULL;
// Destroy everything else
htp_connp_destroy(connp);
}
/**
* Retrieve the user data associated with this connection parser.
*
* @param connp
* @return User data, or NULL if there isn't any.
*/
void *htp_connp_get_data(htp_connp_t *connp) {
return connp->user_data;
}
/**
* Returns the last error that occured with this connection parser. Do note, however,
* that the value in this field will only be valid immediately after an error condition,
* but it is not guaranteed to remain valid if the parser is invoked again.
*
* @param connp
* @return A pointer to an htp_log_t instance if there is an error, or NULL
* if there isn't.
*/
htp_log_t *htp_connp_get_last_error(htp_connp_t *connp) {
return connp->last_error;
}
/**
* Opens connection.
*
* @param connp
* @param remote_addr Remote address
* @param remote_port Remote port
* @param local_addr Local address
* @param local_port Local port
* @param timestamp
*/
void htp_connp_open(htp_connp_t *connp, const char *remote_addr, int remote_port, const char *local_addr, int local_port, htp_time_t timestamp) {
if ((connp->in_status != STREAM_STATE_NEW) || (connp->out_status != STREAM_STATE_NEW)) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Connection is already open");
return;
}
if (remote_addr != NULL) {
connp->conn->remote_addr = strdup(remote_addr);
if (connp->conn->remote_addr == NULL) return;
}
connp->conn->remote_port = remote_port;
if (local_addr != NULL) {
connp->conn->local_addr = strdup(local_addr);
if (connp->conn->local_addr == NULL) {
if (connp->conn->remote_addr != NULL) {
free(connp->conn->remote_addr);
}
return;
}
}
connp->conn->local_port = local_port;
connp->conn->open_timestamp = timestamp;
connp->in_status = STREAM_STATE_OPEN;
connp->out_status = STREAM_STATE_OPEN;
}
/**
* Associate user data with the supplied parser.
*
* @param connp
* @param user_data
*/
void htp_connp_set_user_data(htp_connp_t *connp, void *user_data) {
connp->user_data = user_data;
}

@ -0,0 +1,214 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
#include "htp_decompressors.h"
/**
* Decompress a chunk of gzip-compressed data.
*
* @param drec
* @param d
*/
static int htp_gzip_decompressor_decompress(htp_decompressor_gzip_t *drec, htp_tx_data_t *d) {
size_t consumed = 0;
// Return if we've previously had an error
if (drec->initialized < 0) {
return drec->initialized;
}
// Do we need to initialize?
if (drec->initialized == 0) {
// Check the header
if ((drec->header_len == 0) && (d->len >= 10)) {
// We have received enough data initialize; use the input buffer directly
if ((d->data[0] != DEFLATE_MAGIC_1) || (d->data[1] != DEFLATE_MAGIC_2)) {
htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"GZip decompressor: Magic bytes mismatch");
drec->initialized = -1;
return -1;
}
if (d->data[3] != 0) {
htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"GZip decompressor: Unable to handle flags: %d", d->data[3]);
drec->initialized = -1;
return -1;
}
drec->initialized = 1;
consumed = 10;
} else {
// We do not (or did not) have enough bytes, so we have
// to copy some data into our internal header buffer.
// How many bytes do we need?
size_t copylen = 10 - drec->header_len;
// Is there enough in input?
if (copylen > d->len) copylen = d->len;
// Copy the bytes
memcpy(drec->header + drec->header_len, d->data, copylen);
drec->header_len += copylen;
consumed = copylen;
// Do we have enough now?
if (drec->header_len == 10) {
// We do!
if ((drec->header[0] != DEFLATE_MAGIC_1) || (drec->header[1] != DEFLATE_MAGIC_2)) {
htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"GZip decompressor: Magic bytes mismatch");
drec->initialized = -1;
return -1;
}
if (drec->header[3] != 0) {
htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"GZip decompressor: Unable to handle flags: %d", d->data[3]);
drec->initialized = -1;
return -1;
}
drec->initialized = 1;
} else {
// Need more data
return 1;
}
}
}
// Decompress data
int rc = 0;
drec->stream.next_in = d->data + consumed;
drec->stream.avail_in = d->len - consumed;
while (drec->stream.avail_in != 0) {
// If there's no more data left in the
// buffer, send that information out
if (drec->stream.avail_out == 0) {
drec->crc = crc32(drec->crc, drec->buffer, GZIP_BUF_SIZE);
// Prepare data for callback
htp_tx_data_t d2;
d2.tx = d->tx;
d2.data = drec->buffer;
d2.len = GZIP_BUF_SIZE;
// Send decompressed data to callback
if (drec->super.callback(&d2) < 0) {
inflateEnd(&drec->stream);
drec->zlib_initialized = 0;
return -1;
}
drec->stream.next_out = drec->buffer;
drec->stream.avail_out = GZIP_BUF_SIZE;
}
rc = inflate(&drec->stream, Z_NO_FLUSH);
if (rc == Z_STREAM_END) {
// How many bytes do we have?
size_t len = GZIP_BUF_SIZE - drec->stream.avail_out;
// Update CRC
drec->crc = crc32(drec->crc, drec->buffer, len);
// Prepare data for callback
htp_tx_data_t d2;
d2.tx = d->tx;
d2.data = drec->buffer;
d2.len = len;
// Send decompressed data to callback
if (drec->super.callback(&d2) < 0) {
inflateEnd(&drec->stream);
drec->zlib_initialized = 0;
return -1;
}
// TODO Handle trailer
return 1;
}
if (rc != Z_OK) {
htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"GZip decompressor: inflate failed with %d", rc);
inflateEnd(&drec->stream);
drec->zlib_initialized = 0;
return -1;
}
}
return 1;
}
/**
* Shut down gzip decompressor.
*
* @param drec
*/
static void htp_gzip_decompressor_destroy(htp_decompressor_gzip_t * drec) {
if (drec == NULL) return;
if (drec->zlib_initialized) {
inflateEnd(&drec->stream);
drec->zlib_initialized = 0;
}
free(drec->buffer);
free(drec);
}
/**
* Initialize gzip decompressor.
*
* @param connp
*/
htp_decompressor_t * htp_gzip_decompressor_create(htp_connp_t *connp) {
htp_decompressor_gzip_t *drec = calloc(1, sizeof (htp_decompressor_gzip_t));
if (drec == NULL) return NULL;
drec->super.decompress = (int (*)(htp_decompressor_t *, htp_tx_data_t *)) htp_gzip_decompressor_decompress;
drec->super.destroy = (void (*)(htp_decompressor_t *))htp_gzip_decompressor_destroy;
drec->buffer = malloc(GZIP_BUF_SIZE);
if (drec->buffer == NULL) {
free(drec);
return NULL;
}
int rc = inflateInit2(&drec->stream, GZIP_WINDOW_SIZE);
if (rc != Z_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"GZip decompressor: inflateInit2 failed with code %d", rc);
inflateEnd(&drec->stream);
free(drec->buffer);
free(drec);
return NULL;
}
drec->zlib_initialized = 1;
drec->stream.avail_out = GZIP_BUF_SIZE;
drec->stream.next_out = drec->buffer;
return (htp_decompressor_t *) drec;
}

@ -0,0 +1,50 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _HTP_DECOMPRESSORS_H
#define _HTP_DECOMPRESSORS_H
typedef struct htp_decompressor_gzip_t htp_decompressor_gzip_t;
typedef struct htp_decompressor_t htp_decompressor_t;
#include "htp.h"
#include "zlib.h"
#define GZIP_BUF_SIZE 8192
#define GZIP_WINDOW_SIZE -15
#define DEFLATE_MAGIC_1 0x1f
#define DEFLATE_MAGIC_2 0x8b
struct htp_decompressor_t {
int (*decompress)(htp_decompressor_t *, htp_tx_data_t *);
int (*callback)(htp_tx_data_t *);
void (*destroy)(htp_decompressor_t *);
};
struct htp_decompressor_gzip_t {
htp_decompressor_t super;
int initialized;
int zlib_initialized;
uint8_t header[10];
uint8_t header_len;
z_stream stream;
unsigned char *buffer;
unsigned long crc;
};
htp_decompressor_t * htp_gzip_decompressor_create(htp_connp_t *connp);
#endif /* _HTP_DECOMPRESSORS_H */

@ -0,0 +1,894 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
#include "htp_multipart.h"
#define PARAM_OTHER 0
#define PARAM_NAME 1
#define PARAM_FILENAME 2
/**
* Determines the type of a Content-Disposition parameter.
*
* @param data
* @param startpos
* @param pos
* @return PARAM_OTHER, PARAM_NAME or PARAM_FILENAME
*/
static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t pos) {
if ((pos - startpos) == 4) {
if (memcmp(data + startpos, "name", 4) == 0) return PARAM_NAME;
} else if ((pos - startpos) == 8) {
if (memcmp(data + startpos, "filename", 8) == 0) return PARAM_FILENAME;
}
return PARAM_OTHER;
}
/**
* Process part headers. In the current implementation, we only parse the
* Content-Disposition header if it is present.
*
* @param part
* @return Success indication
*/
int htp_mpart_part_process_headers(htp_mpart_part_t *part) {
// Find C-D header
htp_header_t *h = (htp_header_t *) table_getc(part->headers, "content-disposition");
if (h == NULL) {
// TODO Error message
return 0;
}
// fprint_raw_data(stderr, "C-D", (unsigned char *) bstr_ptr(h->value), bstr_len(h->value));
if (bstr_indexofc(h->value, "form-data") != 0) {
return -1;
}
// The parsing starts here
unsigned char *data = (unsigned char *) bstr_ptr(h->value);
size_t len = bstr_len(h->value);
size_t pos = 9; // Start after "form-data"
// Main parameter parsing loop (once per parameter)
while (pos < len) {
// Find semicolon and go over it
while ((pos < len) && ((data[pos] == '\t') || (data[pos] == ' '))) pos++;
if (pos == len) return -2;
// Semicolon
if (data[pos] != ';') return -3;
pos++;
// Go over the whitespace before parameter name
while ((pos < len) && ((data[pos] == '\t') || (data[pos] == ' '))) pos++;
if (pos == len) return -4;
// Found starting position (name)
size_t start = pos;
// Look for ending position
while ((pos < len) && (data[pos] != '\t') && (data[pos] != ' ') && (data[pos] != '=')) pos++;
if (pos == len) return -5;
// Ending position is in "pos" now
// Is it a parameter we are interested in?
int param_type = htp_mpartp_cd_param_type(data, start, pos);
// Ignore whitespace
while ((pos < len) && ((data[pos] == '\t') || (data[pos] == ' '))) pos++;
if (pos == len) return -6;
// Equals
if (data[pos] != '=') return -7;
pos++;
// Go over the whitespace before value
while ((pos < len) && ((data[pos] == '\t') || (data[pos] == ' '))) pos++;
if (pos == len) return -8;
// Found starting point (value)
start = pos;
// Quoting char indicator
int qchar = -1;
// Different handling for quoted and bare strings
if (data[start] == '"') {
// Quoted string
qchar = data[start];
start = ++pos;
// Find the end of the value
while ((pos < len) && (data[pos] != qchar)) {
if (data[pos] == '\\') {
// Ignore invalid quoting pairs
if (pos + 1 < len) return -9;
// Go over the quoted character
pos++;
}
pos++;
}
} else {
// Bare string
while ((pos < len) && (!htp_is_token(data[pos]))) pos++;
}
switch (param_type) {
case PARAM_NAME:
// TODO Unquote
part->name = bstr_memdup((char *)data + start, pos - start);
// fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(part->name), bstr_len(part->name));
break;
case PARAM_FILENAME:
// TODO Unquote
part->filename = bstr_memdup((char *)data + start, pos - start);
// fprint_raw_data(stderr, "FILENAME", (unsigned char *) bstr_ptr(part->filename), bstr_len(part->filename));
break;
default:
// Ignore unknown parameter
// TODO Warn/log?
break;
}
// Skip over the quoting character
if (qchar != -1) {
pos++;
}
// Continue to parse the next parameter, if any
}
return 1;
}
/**
* Parses one part header.
*
* @param data
* @param len
* @param Success indication
*/
int htp_mpartp_parse_header(htp_mpart_part_t *part, unsigned char *data, size_t len) {
size_t name_start, name_end;
size_t value_start, value_end;
name_start = 0;
// Look for the colon
size_t colon_pos = 0;
while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
if (colon_pos == len) {
// Missing colon
// TODO Error message
return -1;
}
if (colon_pos == 0) {
// Empty header name
// TODO Error message
}
name_end = colon_pos;
// Ignore LWS after field-name
size_t prev = name_end - 1;
while ((prev > name_start) && (htp_is_lws(data[prev]))) {
prev--;
name_end--;
// LWS after field name
// TODO Error message
}
// Value
value_start = colon_pos;
// Go over the colon
if (value_start < len) {
value_start++;
}
// Ignore LWS before field-content
while ((value_start < len) && (htp_is_lws(data[value_start]))) {
value_start++;
}
// Look for the end of field-content
value_end = value_start;
while (value_end < len) value_end++;
// Ignore LWS after field-content
prev = value_end - 1;
while ((prev > value_start) && (htp_is_lws(data[prev]))) {
prev--;
value_end--;
}
// Check that the header name is a token
size_t i = name_start;
while (i < name_end) {
if (!htp_is_token(data[i])) {
// Request field is not a token
// TODO Error message
break;
}
i++;
}
// Now extract the name and the value
htp_header_t *h = calloc(1, sizeof (htp_header_t));
if (h == NULL) return -1;
h->name = bstr_memdup((char *) data + name_start, name_end - name_start);
h->value = bstr_memdup((char *) data + value_start, value_end - value_start);
// Check if the header already exists
htp_header_t * h_existing = table_get(part->headers, h->name);
if (h_existing != NULL) {
// Add to existing header
h_existing->value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
+ 2 + bstr_len(h->value));
bstr_add_mem_noex(h_existing->value, ", ", 2);
bstr_add_str_noex(h_existing->value, h->value);
// The header is no longer needed
bstr_free(h->name);
bstr_free(h->value);
free(h);
// Keep track of same-name headers
h_existing->flags |= HTP_FIELD_REPEATED;
} else {
// Add as a new header
table_add(part->headers, h->name, h);
}
return 1;
}
/**
* Creates new multipart part.
*
* @param mpartp
*/
htp_mpart_part_t *htp_mpart_part_create(htp_mpartp_t *mpartp) {
htp_mpart_part_t * part = calloc(1, sizeof (htp_mpart_part_t));
if (part == NULL) return NULL;
part->headers = table_create(32);
if (part->headers == NULL) {
free(part);
return NULL;
}
part->mpartp = mpartp;
part->mpartp->pieces_form_line = 0;
bstr_builder_clear(mpartp->part_pieces);
return part;
}
/**
* Destroys multipart part.
*
* @param part
*/
void htp_mpart_part_destroy(htp_mpart_part_t *part) {
if (part == NULL) return;
bstr_free(part->name);
bstr_free(part->filename);
bstr_free(part->value);
if (part->headers != NULL) {
// Destroy request_headers
htp_header_t *h = NULL;
table_iterator_reset(part->headers);
while (table_iterator_next(part->headers, (void **) & h) != NULL) {
bstr_free(h->name);
bstr_free(h->value);
free(h);
}
table_destroy(part->headers);
}
free(part);
}
/**
* Finalizes part processing.
*
* @param part
*/
int htp_mpart_part_finalize_data(htp_mpart_part_t *part) {
// We currently do not process the preamble and epilogue parts
if ((part->type == MULTIPART_PART_PREAMBLE) || (part->type == MULTIPART_PART_EPILOGUE)) return 1;
if (bstr_builder_size(part->mpartp->part_pieces) > 0) {
part->value = bstr_builder_to_str(part->mpartp->part_pieces);
bstr_builder_clear(part->mpartp->part_pieces);
// fprint_raw_data(stderr, "PART DATA", (unsigned char *) bstr_ptr(part->value), bstr_len(part->value));
}
return 1;
}
/**
* Handles part data.
*
* @param part
* @param data
* @param len
* @param is_line
*/
int htp_mpart_part_handle_data(htp_mpart_part_t *part, unsigned char *data, size_t len, int is_line) {
// fprint_raw_data_ex(stderr, "PART DATA", data, 0, len);
// printf("PART DATA is_line %d mode %d\n", is_line, part->mpartp->current_mode);
// TODO We don't actually need the is_line parameter, because we can
// discover that ourselves by looking at the last byte in the buffer.
// Keep track of part length
part->len += len;
// We currently do not process the preamble and epilogue parts
if ((part->type == MULTIPART_PART_PREAMBLE) || (part->type == MULTIPART_PART_EPILOGUE)) return 1;
if (part->mpartp->current_mode == MULTIPART_MODE_LINE) {
// Line mode
// TODO Remove the extra characters from folded lines
if (is_line) {
// End of line
// Ignore the line ending
if (len > 1) {
if (data[len - 1] == LF) len--;
if (data[len - 1] == CR) len--;
} else if (len > 0) {
if (data[len - 1] == LF) len--;
}
// Is it an empty line?
if ((len == 0) && ((bstr_builder_size(part->mpartp->part_pieces) == 0))) {
// Empty line; switch to data mode
part->mpartp->current_mode = MULTIPART_MODE_DATA;
htp_mpart_part_process_headers(part); // TODO RC
} else {
// Not an empty line
// Is there a folded line coming after this one?
if ((part->mpartp->first_boundary_byte != ' ') && (part->mpartp->first_boundary_byte != '\t')) {
// No folded lines after this one, so process header
// Do we have more than once piece?
if (bstr_builder_size(part->mpartp->part_pieces) > 0) {
// Line in pieces
bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len);
bstr *line = bstr_builder_to_str(part->mpartp->part_pieces); // TODO RC
// fprint_raw_data(stderr, "LINE(1)", (unsigned char *) bstr_ptr(line), bstr_len(line));
htp_mpartp_parse_header(part, (unsigned char *) bstr_ptr(line), bstr_len(line)); // TODO RC
bstr_free(line);
bstr_builder_clear(part->mpartp->part_pieces);
} else {
// Just this line
htp_mpartp_parse_header(part, data, len); // TODO RC
}
part->mpartp->pieces_form_line = 0;
} else {
// Folded line, just store this piece for later
bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len);
part->mpartp->pieces_form_line = 1;
}
}
} else {
// Not end of line; keep the data chunk for later
bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len);
part->mpartp->pieces_form_line = 0;
}
} else {
// Data mode; keep the data chunk for later (but not if it is a file)
if (part->type != MULTIPART_PART_FILE) {
bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len);
}
}
return 1;
}
/**
* Handles data, creating new parts as necessary.
*
* @param mpartp
* @param data
* @param len
* @param is_line
*/
static int htp_mpartp_handle_data(htp_mpartp_t *mpartp, unsigned char *data, size_t len, int is_line) {
if (len == 0) return 1;
// Do we have a part already?
if (mpartp->current_part == NULL) {
// Create new part
mpartp->current_part = htp_mpart_part_create(mpartp);
if (mpartp->current_part == NULL) return -1; // TODO RC
if (mpartp->boundary_count == 0) {
mpartp->current_part->type = MULTIPART_PART_PREAMBLE;
mpartp->current_mode = MULTIPART_MODE_DATA;
} else {
if (mpartp->seen_last_boundary) {
mpartp->current_part->type = MULTIPART_PART_EPILOGUE;
mpartp->current_mode = MULTIPART_MODE_DATA;
}
}
// Add part to the list.
// TODO Perhaps we need a flag to know if a part has been finalized.
list_push(mpartp->parts, mpartp->current_part);
}
// Send data to part
htp_mpart_part_handle_data(mpartp->current_part, data, len, is_line); // TODO RC
return 1;
}
/**
* Handles a boundary event, which means that it will finalize a part
* if one exists.
*
* @param mpartp
*/
static int htp_mpartp_handle_boundary(htp_mpartp_t * mpartp) {
// TODO Having mpartp->seen_last_boundary set here means that there's
// a boundary after the "last boundary".
if (mpartp->current_part != NULL) {
if (htp_mpart_part_finalize_data(mpartp->current_part) < 0) return -1; // TODO RC
// We're done with this part
mpartp->current_part = NULL;
// Revert to line mode
mpartp->current_mode = MULTIPART_MODE_LINE;
}
return 1;
}
/**
* Creates a new multipart/form-data parser.
*
* @param boundary
* @return New parser, or NULL on memory allocation failure.
*/
htp_mpartp_t * htp_mpartp_create(char *boundary) {
htp_mpartp_t *mpartp = calloc(1, sizeof (htp_mpartp_t));
if (mpartp == NULL) return NULL;
mpartp->boundary_pieces = bstr_builder_create();
if (mpartp->boundary_pieces == NULL) {
free(mpartp);
return NULL;
}
mpartp->part_pieces = bstr_builder_create();
if (mpartp->part_pieces == NULL) {
bstr_builder_destroy(mpartp->boundary_pieces);
free(mpartp);
return NULL;
}
mpartp->parts = list_array_create(64);
if (mpartp->parts == NULL) {
bstr_builder_destroy(mpartp->part_pieces);
bstr_builder_destroy(mpartp->boundary_pieces);
free(mpartp);
return NULL;
}
// Copy the boundary and convert it to lowercase
mpartp->boundary_len = strlen(boundary) + 4;
mpartp->boundary = malloc(mpartp->boundary_len + 1);
if (mpartp->boundary == NULL) {
bstr_builder_destroy(mpartp->boundary_pieces);
free(mpartp);
return NULL;
}
// TODO Not using the CR and LF any more
mpartp->boundary[0] = CR;
mpartp->boundary[1] = LF;
mpartp->boundary[2] = '-';
mpartp->boundary[3] = '-';
size_t i = 4;
while (i < mpartp->boundary_len) {
mpartp->boundary[i] = tolower((int) ((unsigned char) boundary[i - 4]));
i++;
}
mpartp->state = MULTIPART_STATE_BOUNDARY;
mpartp->bpos = 2;
mpartp->handle_data = htp_mpartp_handle_data;
mpartp->handle_boundary = htp_mpartp_handle_boundary;
return mpartp;
}
/**
* Destroys a multipart/form-data parser.
*
* @param mpartp
*/
void htp_mpartp_destroy(htp_mpartp_t * mpartp) {
if (mpartp == NULL) return;
free(mpartp->boundary);
bstr_builder_destroy(mpartp->part_pieces);
bstr_builder_destroy(mpartp->boundary_pieces);
// Free parts
htp_mpart_part_t * part = NULL;
list_iterator_reset(mpartp->parts);
while ((part = list_iterator_next(mpartp->parts)) != NULL) {
htp_mpart_part_destroy(part);
}
list_destroy(mpartp->parts);
free(mpartp);
}
/**
* Processes set-aside data.
*
* @param mpartp
* @param data
* @param pos
* @param startpos
* @param return_pos
* @param matched
*/
static int htp_martp_process_aside(htp_mpartp_t *mpartp, int matched) {
// The store data pieces can contain up to one line. If we're in data mode and there
// was no boundary match, things are straightforward -- we process everything as data.
// If there was a match, we need to take care to not send the line ending as data, nor
// anything that follows (because it's going to be a part of the boundary). Similary,
// when we are in line mode, we need to split the first data chunk, processing the first
// part as line and the second part as data.
// Do we need to do any chunk splitting?
if (matched || (mpartp->current_mode == MULTIPART_MODE_LINE)) {
// Line mode or boundary match
// In line mode, we ignore lone CR bytes
mpartp->cr_aside = 0;
// We know that we went to match a boundary because
// we saw a new line. Now we have to find that line and
// process it. It's either going to be in the current chunk,
// or in the first stored chunk.
if (bstr_builder_size(mpartp->boundary_pieces) > 0) {
// We have stored chunks
bstr *b = NULL;
int first = 1;
list_iterator_reset(mpartp->boundary_pieces->pieces);
while ((b = list_iterator_next(mpartp->boundary_pieces->pieces)) != NULL) {
if (first) {
// Split the first chunk
if (!matched) {
// In line mode, we are OK with line endings
mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b), mpartp->boundarypos, 1);
} else {
// But if there was a match, the line ending belongs to the boundary
unsigned char *dx = (unsigned char *) bstr_ptr(b);
size_t lx = mpartp->boundarypos;
// Remove LF or CRLF
if ((lx > 0) && (dx[lx - 1] == LF)) {
lx--;
// Remove CR
if ((lx > 0) && (dx[lx - 1] == CR)) {
lx--;
}
}
mpartp->handle_data(mpartp, dx, lx, 0);
}
// The second part of the split chunks belongs to the boundary
// when matched, data otherwise.
if (!matched) {
mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b) + mpartp->boundarypos,
bstr_len(b) - mpartp->boundarypos, 0);
}
first = 0;
} else {
// Do not send data if there was a boundary match. The stored
// data belongs to the boundary.
if (!matched) {
mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b), bstr_len(b), 0);
}
}
}
bstr_builder_clear(mpartp->boundary_pieces);
}
} else {
// Data mode and no match
// In data mode, we process the lone CR byte as data
if (mpartp->cr_aside) {
mpartp->handle_data(mpartp, (unsigned char *) &"\r", 1, 0 /* Not end of line */);
mpartp->cr_aside = 0;
}
// We then process any pieces that we might have stored, also as data
if (bstr_builder_size(mpartp->boundary_pieces) > 0) {
bstr *b = NULL;
list_iterator_reset(mpartp->boundary_pieces->pieces);
while ((b = list_iterator_next(mpartp->boundary_pieces->pieces)) != NULL) {
mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b), bstr_len(b), 0);
}
bstr_builder_clear(mpartp->boundary_pieces);
}
}
return 1;
}
/**
* Finalize parsing.
*
* @param mpartp
*/
int htp_mpartp_finalize(htp_mpartp_t * mpartp) {
if (mpartp->current_part != NULL) {
htp_martp_process_aside(mpartp, 0);
if (htp_mpart_part_finalize_data(mpartp->current_part) < 0) return -1; // TODO RC
}
bstr_builder_clear(mpartp->boundary_pieces);
return 1;
}
/**
* Parses a chunk of multipart/form-data data. This function should be called
* as many times as necessary until all data has been consumed.
*
* @param mpartp
* @parma data
* @param len
* @return Status indicator
*/
int htp_mpartp_parse(htp_mpartp_t *mpartp, unsigned char *data, size_t len) {
// fprint_raw_data_ex(stderr, "INPUT", data, 0, len);
size_t pos = 0; // Current position in the input chunk.
size_t startpos = 0; // The starting position of data.
size_t data_return_pos = 0; // The position of the (possible) boundary.
//size_t local_aside_len = 0; // How many bytes have we put side from this chunk only?
// Loop while there's data in the buffer
while (pos < len) {
STATE_SWITCH:
// fprintf(stderr, "STATE %d pos %d\n", mpartp->state, pos);
switch (mpartp->state) {
case MULTIPART_STATE_DATA:
// We don't need a local aside any more since we're back
// local_aside_len = 0;
if ((pos == 0) && (mpartp->cr_aside) && (pos < len)) {
mpartp->handle_data(mpartp, (unsigned char *) &"\r", 1, 0);
mpartp->cr_aside = 0;
}
// Loop through available data
while (pos < len) {
if (data[pos] == CR) {
// We have a CR byte
// Is this CR the last byte?
if (pos + 1 == len) {
// We have CR as the last byte in input. We are going to process
// what we have in the buffer as data, except for the CR byte,
// which we're going to leave for later. If it happens that a
// CR is followed by a LF and then a boundary, the CR is going
// to be discarded.
pos++; // Take CR from input
mpartp->cr_aside = 1;
// local_aside_len = 1;
} else {
// We have CR and at least one more byte in the buffer, so we
// are able to test for the LF byte too.
if (data[pos + 1] == LF) {
pos += 2; // Take CR and LF from input
// Prepare to switch to boundary testing
data_return_pos = pos;
mpartp->boundarypos = pos - startpos;
mpartp->bpos = 2; // After LF/first dash
mpartp->state = MULTIPART_STATE_BOUNDARY;
goto STATE_SWITCH;
}
}
} else if (data[pos] == LF) {
// Possible boundary start position (LF line)
pos++; // Take LF from input
// Prepare to switch to boundary testing
data_return_pos = pos;
mpartp->boundarypos = pos - startpos;
mpartp->bpos = 2; // After LF/first dash
mpartp->state = MULTIPART_STATE_BOUNDARY;
goto STATE_SWITCH;
} else {
// Take one byte from input
pos++;
mpartp->cr_aside = 0;
}
} // while
// End of data; process data chunk
mpartp->handle_data(mpartp, data + startpos, pos - startpos - mpartp->cr_aside, 0);
break;
case MULTIPART_STATE_BOUNDARY:
// Possible boundary
while (pos < len) {
// fprintf(stderr, "B byte %d desired %d\n", data[pos], mpartp->boundary[mpartp->bpos]);
// Remember the first byte in the new line; we'll need to
// determine if the line is a part of a folder header.
if (mpartp->bpos == 2) {
mpartp->first_boundary_byte = data[pos];
}
// Check if the bytes match
if (!(data[pos] == mpartp->boundary[mpartp->bpos])) {
// Boundary mismatch
// Process stored data
htp_martp_process_aside(mpartp, 0);
// Return back where DATA parsing left off
if (mpartp->current_mode == MULTIPART_MODE_LINE) {
// In line mode, we process the line
mpartp->handle_data(mpartp, data + startpos, data_return_pos - startpos, 1);
startpos = data_return_pos;
} else {
// In data mode, we go back where we left off
pos = data_return_pos;
}
mpartp->state = MULTIPART_STATE_DATA;
goto STATE_SWITCH;
}
// Consume one matched boundary byte
pos++;
// Have we seen all boundary bytes?
if (++mpartp->bpos == mpartp->boundary_len) {
// Boundary match!
// Process stored data
htp_martp_process_aside(mpartp, 1);
// Process data prior to the boundary in the local chunk
mpartp->handle_data(mpartp, data + startpos, data_return_pos - startpos, 0);
// Keep track of how many boundaries we've seen.
mpartp->boundary_count++;
// Run boundary match.
mpartp->handle_boundary(mpartp);
// We now need to check if this is the last boundary in the payload
mpartp->state = MULTIPART_STATE_BOUNDARY_IS_LAST2;
goto STATE_SWITCH;
}
} // while
// No more data in the local chunk; store the unprocessed part for later
bstr_builder_append_mem(mpartp->boundary_pieces, (char *) data + startpos, len - startpos);
break;
case MULTIPART_STATE_BOUNDARY_IS_LAST2:
// We're expecting two dashes
if (data[pos] == '-') {
// Still hoping!
pos++;
mpartp->state = MULTIPART_STATE_BOUNDARY_IS_LAST1;
} else {
// Hmpf, it's not the last boundary.
mpartp->state = MULTIPART_STATE_BOUNDARY_EAT_LF;
}
break;
case MULTIPART_STATE_BOUNDARY_IS_LAST1:
// One more dash left to go
if (data[pos] == '-') {
// This is indeed the last boundary in the payload
pos++;
mpartp->seen_last_boundary = 1;
mpartp->state = MULTIPART_STATE_BOUNDARY_EAT_LF;
} else {
// The second character is not a dash. This means that we have
// an error in the payload. We should report the error and
// continue to eat the rest of the line.
// TODO Error
mpartp->state = MULTIPART_STATE_BOUNDARY_EAT_LF;
}
break;
case MULTIPART_STATE_BOUNDARY_EAT_LF:
if (data[pos] == LF) {
pos++;
startpos = pos;
mpartp->state = MULTIPART_STATE_DATA;
} else {
// Error!
// Unexpected byte; remain in the same state
pos++;
}
break;
} // switch
}
return 1;
}

@ -0,0 +1,120 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _HTP_MULTIPART_H
#define _HTP_MULTIPART_H
typedef struct htp_mpartp_t htp_mpartp_t;
typedef struct htp_mpart_part_t htp_mpart_part_t;
#include "bstr.h"
#include "dslib.h"
#define MULTIPART_PART_UNKNOWN 0
#define MULTIPART_PART_TEXT 1
#define MULTIPART_PART_FILE 2
#define MULTIPART_PART_PREAMBLE 3
#define MULTIPART_PART_EPILOGUE 4
#define MULTIPART_MODE_LINE 0
#define MULTIPART_MODE_DATA 1
#define MULTIPART_STATE_DATA 1
#define MULTIPART_STATE_BOUNDARY 2
#define MULTIPART_STATE_BOUNDARY_IS_LAST1 3
#define MULTIPART_STATE_BOUNDARY_IS_LAST2 4
//#define MULTIPART_STATE_BOUNDARY_EAT_CRLF 5
#define MULTIPART_STATE_BOUNDARY_EAT_LF 6
#ifndef CR
#define CR '\r'
#endif
#ifndef LF
#define LF '\n'
#endif
struct htp_mpart_part_t {
/** Pointer to the parser. */
htp_mpartp_t *mpartp;
/** Part type; see the MULTIPART_PART_* constants. */
int type;
/** Raw part length. */
size_t len;
/** Part name, from the Content-Disposition header. */
bstr *name;
/** Part filename, from the Content-Disposition header. */
bstr *filename;
/** Part value; currently only available for MULTIPART_PART_TEXT parts. */
bstr *value;
/** Part headers (htp_header_t instances), indexed by name. */
table_t *headers;
};
struct htp_mpartp_t {
/** Boundary to be used to extract parts. */
char *boundary;
/** Boundary length. */
size_t boundary_len;
/** How many boundaries were seen? */
int boundary_count;
/** Did we see the last boundary? */
int seen_last_boundary;
/** List of parts. */
list_t *parts;
// Parsing callbacks
int (*handle_data)(htp_mpartp_t *mpartp, unsigned char *data, size_t len, int line_end);
int (*handle_boundary)(htp_mpartp_t *mpartp);
// Internal parsing fields
// TODO Consider prefixing them with an underscore.
int state;
size_t bpos;
unsigned char *current_data;
htp_mpart_part_t *current_part;
int current_mode;
size_t current_len;
bstr_builder_t *boundary_pieces;
bstr_builder_t *part_pieces;
int pieces_form_line;
unsigned char first_boundary_byte;
size_t boundarypos;
int cr_aside;
};
htp_mpartp_t *htp_mpartp_create(char *boundary);
void htp_mpartp_destroy(htp_mpartp_t *mpartp);
int htp_mpartp_parse(htp_mpartp_t *mpartp, unsigned char *data, size_t len);
int htp_mpartp_finalize(htp_mpartp_t *mpartp);
htp_mpart_part_t *htp_mpart_part_create(htp_mpartp_t *mpartp);
int htp_mpart_part_receive_data(htp_mpart_part_t *part, unsigned char *data, size_t len, int line);
int htp_mpart_part_finalize_data(htp_mpart_part_t *part);
void htp_mpart_part_destroy(htp_mpart_part_t *part);
#endif /* _HTP_MULTIPART_H */

@ -0,0 +1,56 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* Determines protocol number from a textual representation (i.e., "HTTP/1.1"). This
* function will only understand a properly formatted protocol information. It does
* not try to be flexible.
*
* @param protocol
* @return Protocol version or PROTOCOL_UKNOWN.
*/
int htp_parse_protocol(bstr *protocol) {
if (bstr_len(protocol) == 8) {
char *ptr = bstr_ptr(protocol);
if ((ptr[0] == 'H') && (ptr[1] == 'T') && (ptr[2] == 'T') && (ptr[3] == 'P')
&& (ptr[4] == '/') && (ptr[6] == '.')) {
// Check the version numbers
if (ptr[5] == '0') {
if (ptr[7] == '9') {
return HTTP_0_9;
}
} else if (ptr[5] == '1') {
if (ptr[7] == '0') {
return HTTP_1_0;
} else if (ptr[7] == '1') {
return HTTP_1_1;
}
}
}
}
return PROTOCOL_UNKNOWN;
}
/**
* Determines the numerical value of a response status given as a string.
*
* @param status
* @return Status code on success, or -1 on error.
*/
int htp_parse_status(bstr *status) {
return htp_parse_positive_integer_whitespace((unsigned char *)bstr_ptr(status), bstr_len(status), 10);
}

@ -0,0 +1,861 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include <stdlib.h>
#include "htp.h"
/**
* Performs check for a CONNECT transaction to decide whether inbound
* parsing needs to be suspended.
*
* @param connp
* @return HTP_OK if the request does not use CONNECT, HTP_DATA_OTHER if
* inbound parsing needs to be suspended until we hear from the
* other side
*/
int htp_connp_REQ_CONNECT_CHECK(htp_connp_t *connp) {
// If the request uses the CONNECT method, then there will
// not be a request body, but first we need to wait to see the
// response in order to determine if the tunneling request
// was a success.
if (connp->in_tx->request_method_number == M_CONNECT) {
connp->in_state = htp_connp_REQ_CONNECT_WAIT_RESPONSE;
connp->in_status = STREAM_STATE_DATA_OTHER;
connp->in_tx->progress = TX_PROGRESS_WAIT;
return HTP_DATA_OTHER;
}
// Continue to the next step to determine the presence
// of the request body
connp->in_state = htp_connp_REQ_BODY_DETERMINE;
return HTP_OK;
}
/**
* Determines whether inbound parsing, which was suspended after
* encountering a CONNECT transaction, can proceed (after receiving
* the response).
*
* @param connp
* @return HTP_OK if the parser can resume parsing, HTP_DATA_OTHER if
* it needs to continue waiting.
*/
int htp_connp_REQ_CONNECT_WAIT_RESPONSE(htp_connp_t *connp) {
// Check that we saw the response line of the current
// inbound transaction.
if (connp->in_tx->progress <= TX_PROGRESS_RES_LINE) {
return HTP_DATA_OTHER;
}
// A 2xx response means a tunnel was established. Anything
// else means we continue to follow the HTTP stream.
if ((connp->in_tx->response_status_number >= 200) && (connp->in_tx->response_status_number <= 299)) {
// TODO Check that the server did not accept a connection
// to itself.
// The requested tunnel was established: we are going
// to ignore the remaining data on this stream
connp->in_status = STREAM_STATE_TUNNEL;
connp->in_state = htp_connp_REQ_IDLE;
} else {
// No tunnel; continue to the next transaction
connp->in_state = htp_connp_REQ_IDLE;
}
return HTP_OK;
}
/**
* Consumes bytes until the end of the current line.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_BODY_CHUNKED_DATA_END(htp_connp_t *connp) {
// TODO We shouldn't really see anything apart from CR and LF,
// so we should warn about anything else.
for (;;) {
IN_NEXT_BYTE_OR_RETURN(connp);
connp->in_tx->request_message_len++;
if (connp->in_next_byte == LF) {
connp->in_state = htp_connp_REQ_BODY_CHUNKED_LENGTH;
return HTP_OK;
}
}
}
/**
* Processes a chunk of data.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_BODY_CHUNKED_DATA(htp_connp_t *connp) {
htp_tx_data_t d;
d.tx = connp->in_tx;
d.data = &connp->in_current_data[connp->in_current_offset];
d.len = 0;
for (;;) {
IN_NEXT_BYTE(connp);
if (connp->in_next_byte == -1) {
// Send data to callbacks
int rc = hook_run_all(connp->cfg->hook_request_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request body data callback returned error (%d)", rc);
return HTP_ERROR;
}
// Ask for more data
return HTP_DATA;
} else {
connp->in_tx->request_message_len++;
connp->in_tx->request_entity_len++;
connp->in_chunked_length--;
d.len++;
if (connp->in_chunked_length == 0) {
// End of data chunk
// Send data to callbacks
int rc = hook_run_all(connp->cfg->hook_request_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request body data callback returned error (%d)", rc);
return HTP_ERROR;
}
connp->in_state = htp_connp_REQ_BODY_CHUNKED_DATA_END;
return HTP_OK;
}
}
}
}
/**
* Extracts chunk length.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_BODY_CHUNKED_LENGTH(htp_connp_t *connp) {
for (;;) {
IN_COPY_BYTE_OR_RETURN(connp);
connp->in_tx->request_message_len++;
// Have we reached the end of the line?
if (connp->in_next_byte == LF) {
htp_chomp(connp->in_line, &connp->in_line_len);
// Extract chunk length
connp->in_chunked_length = htp_parse_chunked_length(connp->in_line, connp->in_line_len);
// Cleanup for the next line
connp->in_line_len = 0;
// Handle chunk length
if (connp->in_chunked_length > 0) {
// More data available
// TODO Add a check for chunk length
connp->in_state = htp_connp_REQ_BODY_CHUNKED_DATA;
} else if (connp->in_chunked_length == 0) {
// End of data
connp->in_state = htp_connp_REQ_HEADERS;
connp->in_tx->progress = TX_PROGRESS_REQ_TRAILER;
} else {
// Invalid chunk length
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request chunk encoding: Invalid chunk length");
return HTP_ERROR;
}
return HTP_OK;
}
}
}
/**
* Processes identity request body.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_BODY_IDENTITY(htp_connp_t *connp) {
htp_tx_data_t d;
d.tx = connp->in_tx;
d.data = &connp->in_current_data[connp->in_current_offset];
d.len = 0;
for (;;) {
IN_NEXT_BYTE(connp);
if (connp->in_next_byte == -1) {
// End of chunk
if (d.len != 0) {
int rc = hook_run_all(connp->cfg->hook_request_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request body data callback returned error (%d)", rc);
return HTP_ERROR;
}
}
// Ask for more data
return HTP_DATA;
} else {
connp->in_tx->request_message_len++;
connp->in_tx->request_entity_len++;
connp->in_body_data_left--;
d.len++;
if (connp->in_body_data_left == 0) {
// End of body
if (d.len != 0) {
int rc = hook_run_all(connp->cfg->hook_request_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request body data callback returned error (%d)", rc);
return HTP_ERROR;
}
}
// Done
connp->in_state = htp_connp_REQ_IDLE;
connp->in_tx->progress = TX_PROGRESS_WAIT;
return HTP_OK;
}
}
}
}
/**
* Determines presence (and encoding) of a request body.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_BODY_DETERMINE(htp_connp_t *connp) {
htp_header_t *cl = table_getc(connp->in_tx->request_headers, "content-length");
htp_header_t *te = table_getc(connp->in_tx->request_headers, "transfer-encoding");
// Check for the Transfer-Encoding header, which
// would indicate a chunked request body
if (te != NULL) {
// Make sure it contains "chunked" only
if (bstr_cmpc(te->value, "chunked") != 0) {
// Invalid T-E header value
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Invalid T-E value in request");
}
// Chunked encoding is a HTTP/1.1 feature. Check
// that some other protocol is not used. The flag will
// also be set if the protocol could not be parsed.
//
// TODO IIS 7.0, for example, would ignore the T-E header when it
// it is used with a protocol below HTTP 1.1.
if (connp->in_tx->request_protocol_number < HTTP_1_1) {
connp->in_tx->flags |= HTP_INVALID_CHUNKING;
// TODO Log
}
// If the T-E header is present we are going to use it.
connp->in_tx->request_transfer_coding = CHUNKED;
// We are still going to check for the presence of C-L
if (cl != NULL) {
// This is a violation of the RFC
connp->in_tx->flags |= HTP_REQUEST_SMUGGLING;
// TODO Log
}
connp->in_state = htp_connp_REQ_BODY_CHUNKED_LENGTH;
connp->in_tx->progress = TX_PROGRESS_REQ_BODY;
} else
// Next check for the presence of the Content-Length header
if (cl != NULL) {
// It seems that we have a request body.
connp->in_tx->request_transfer_coding = IDENTITY;
// Check for a folded C-L header
if (cl->flags & HTP_FIELD_FOLDED) {
connp->in_tx->flags |= HTP_REQUEST_SMUGGLING;
// TODO Log
}
// Check for multiple C-L headers
if (cl->flags & HTP_FIELD_REPEATED) {
connp->in_tx->flags |= HTP_REQUEST_SMUGGLING;
// TODO Log
}
// Get body length
int i = htp_parse_content_length(cl->value);
if (i < 0) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Invalid C-L field in request");
return HTP_ERROR;
} else {
connp->in_content_length = i;
connp->in_body_data_left = connp->in_content_length;
if (connp->in_content_length != 0) {
connp->in_state = htp_connp_REQ_BODY_IDENTITY;
connp->in_tx->progress = TX_PROGRESS_REQ_BODY;
} else {
connp->in_state = htp_connp_REQ_IDLE;
connp->in_tx->progress = TX_PROGRESS_WAIT;
}
}
} else {
// This request does not have a body, which
// means that we're done with it
connp->in_state = htp_connp_REQ_IDLE;
connp->in_tx->progress = TX_PROGRESS_WAIT;
}
// Host resolution
htp_header_t *h = table_getc(connp->in_tx->request_headers, "host");
if (h == NULL) {
// No host information in the headers
// HTTP/1.1 requires host information in the headers
if (connp->in_tx->request_protocol_number >= HTTP_1_1) {
connp->in_tx->flags |= HTP_HOST_MISSING;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"Host information in request headers required by HTTP/1.1");
}
} else {
// Host information available in the headers
// Is there host information in the URI?
if (connp->in_tx->parsed_uri->hostname == NULL) {
// There is no host information in the URI. Place the
// hostname from the headers into the parsed_uri structure.
htp_replace_hostname(connp, connp->in_tx->parsed_uri, h->value);
} else {
// The host information is present both in the
// headers and the URI. The HTTP RFC states that
// we should ignore the headers copy.
connp->in_tx->flags |= HTP_AMBIGUOUS_HOST;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Host information ambiguous");
}
}
// Run hook REQUEST_HEADERS
int rc = hook_run_all(connp->cfg->hook_request_headers, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request headers callback returned error (%d)", rc);
return HTP_ERROR;
}
return HTP_OK;
}
/**
* Parses request headers.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_HEADERS(htp_connp_t *connp) {
for (;;) {
IN_COPY_BYTE_OR_RETURN(connp);
if (connp->in_header_line == NULL) {
connp->in_header_line = calloc(1, sizeof (htp_header_line_t));
if (connp->in_header_line == NULL) return HTP_ERROR;
connp->in_header_line->first_nul_offset = -1;
}
// Keep track of NUL bytes
if (connp->in_next_byte == 0) {
// Store the offset of the first NUL
if (connp->in_header_line->has_nulls == 0) {
connp->in_header_line->first_nul_offset = connp->in_line_len;
}
// Remember how many NULs there were
connp->in_header_line->flags |= HTP_FIELD_NUL_BYTE;
connp->in_header_line->has_nulls++;
}
// Have we reached the end of the line?
if (connp->in_next_byte == LF) {
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, connp->in_line, connp->in_line_len);
#endif
// Should we terminate headers?
if (htp_connp_is_line_terminator(connp, connp->in_line, connp->in_line_len)) {
// Terminator line
// Parse previous header, if any
if (connp->in_header_line_index != -1) {
if (connp->cfg->process_request_header(connp) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Reset index
connp->in_header_line_index = -1;
}
// Cleanup
free(connp->in_header_line);
connp->in_line_len = 0;
connp->in_header_line = NULL;
connp->in_header_line_index = -1;
connp->in_header_line_counter = 0;
// We've seen all request headers
if (connp->in_chunk_count != connp->in_chunk_request_index) {
connp->in_tx->flags |= HTP_MULTI_PACKET_HEAD;
}
// Move onto the next processing phase
if (connp->in_tx->progress == TX_PROGRESS_REQ_HEADERS) {
// Determine if this request has a body
//connp->in_state = htp_connp_REQ_BODY_DETERMINE;
connp->in_state = htp_connp_REQ_CONNECT_CHECK;
} else {
// Run hook REQUEST_TRAILER
int rc = hook_run_all(connp->cfg->hook_request_trailer, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request trailer callback returned error (%d)", rc);
return HTP_ERROR;
}
// We've completed parsing this request
connp->in_state = htp_connp_REQ_IDLE;
connp->in_tx->progress = TX_PROGRESS_WAIT;
}
return HTP_OK;
}
// Prepare line for consumption
htp_chomp(connp->in_line, &connp->in_line_len);
// Check for header folding
if (htp_connp_is_line_folded(connp->in_line, connp->in_line_len) == 0) {
// New header line
// Parse previous header, if any
if (connp->in_header_line_index != -1) {
if (connp->cfg->process_request_header(connp) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Reset index
connp->in_header_line_index = -1;
}
// Remember the index of the fist header line
connp->in_header_line_index = connp->in_header_line_counter;
} else {
// Folding; check that there's a previous header line to add to
if (connp->in_header_line_index == -1) {
if (!(connp->in_tx->flags & HTP_INVALID_FOLDING)) {
connp->in_tx->flags |= HTP_INVALID_FOLDING;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
"Invalid request field folding");
}
}
}
// Add the raw header line to the list
connp->in_header_line->line = bstr_memdup((char *) connp->in_line, connp->in_line_len);
list_add(connp->in_tx->request_header_lines, connp->in_header_line);
connp->in_header_line = NULL;
// Cleanup for the next line
connp->in_line_len = 0;
if (connp->in_header_line_index == -1) {
connp->in_header_line_index = connp->in_header_line_counter;
}
connp->in_header_line_counter++;
}
}
}
/**
* Determines request protocol.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_PROTOCOL(htp_connp_t *connp) {
// Is this a short-style HTTP/0.9 request? If it is,
// we will not want to parse request headers.
if (connp->in_tx->protocol_is_simple == 0) {
// Switch to request header parsing.
connp->in_state = htp_connp_REQ_HEADERS;
connp->in_tx->progress = TX_PROGRESS_REQ_HEADERS;
} else {
// We're done with this request.
connp->in_state = htp_connp_REQ_IDLE;
connp->in_tx->progress = TX_PROGRESS_WAIT;
}
return HTP_OK;
}
/**
* Parses request line.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_LINE(htp_connp_t *connp) {
for (;;) {
// Get one byte
IN_COPY_BYTE_OR_RETURN(connp);
// Keep track of NUL bytes
if (connp->in_next_byte == 0) {
// Remember how many NULs there were
connp->in_tx->request_line_nul++;
// Store the offset of the first NUL byte
if (connp->in_tx->request_line_nul_offset == -1) {
connp->in_tx->request_line_nul_offset = connp->in_line_len;
}
}
// Have we reached the end of the line?
if (connp->in_next_byte == LF) {
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, connp->in_line, connp->in_line_len);
#endif
// Is this a line that should be ignored?
if (htp_connp_is_line_ignorable(connp, connp->in_line, connp->in_line_len)) {
// We have an empty/whitespace line, which we'll note, ignore and move on
connp->in_tx->request_ignored_lines++;
// TODO How many empty lines are we willing to accept?
// Start again
connp->in_line_len = 0;
return HTP_OK;
}
// Process request line
htp_chomp(connp->in_line, &connp->in_line_len);
connp->in_tx->request_line = bstr_memdup((char *) connp->in_line, connp->in_line_len);
// Parse request line
if (connp->cfg->parse_request_line(connp) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
if (connp->in_tx->request_method_number == M_CONNECT) {
// Parse authority
if (htp_parse_authority(connp, connp->in_tx->request_uri, &(connp->in_tx->parsed_uri_incomplete)) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
} else {
// Parse the request URI
if (htp_parse_uri(connp->in_tx->request_uri, &(connp->in_tx->parsed_uri_incomplete)) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Keep the original URI components, but
// create a copy which we can normalize and use internally
if (htp_normalize_parsed_uri(connp, connp->in_tx->parsed_uri_incomplete, connp->in_tx->parsed_uri)) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Now is a good time to generate request_uri_normalized, before we finalize
// parsed_uri (and lose the information which parts were provided in the request and
// which parts we added).
if (connp->cfg->generate_request_uri_normalized) {
connp->in_tx->request_uri_normalized = htp_unparse_uri_noencode(connp->in_tx->parsed_uri);
if (connp->in_tx->request_uri_normalized == NULL) {
// There's no sense in logging anything on a memory allocation failure
return HTP_ERROR;
}
#ifdef HTP_DEBUG
fprint_raw_data(stderr, "request_uri_normalized",
(unsigned char *) bstr_ptr(connp->in_tx->request_uri_normalized),
bstr_len(connp->in_tx->request_uri_normalized));
#endif
}
// Finalize parsed_uri
// Scheme
if (connp->in_tx->parsed_uri->scheme != NULL) {
if (bstr_cmpc(connp->in_tx->parsed_uri->scheme, "http") != 0) {
// TODO Invalid scheme
}
} else {
connp->in_tx->parsed_uri->scheme = bstr_cstrdup("http");
}
// Port
if (connp->in_tx->parsed_uri->port != NULL) {
if (connp->in_tx->parsed_uri->port_number != -1) {
// Check that the port in the URI is the same
// as the port on which the client is talking
// to the server
if (connp->in_tx->parsed_uri->port_number != connp->conn->local_port) {
// Incorrect port; use the real port instead
connp->in_tx->parsed_uri->port_number = connp->conn->local_port;
// TODO Log
}
} else {
// Invalid port; use the real port instead
connp->in_tx->parsed_uri->port_number = connp->conn->local_port;
// TODO Log
}
} else {
connp->in_tx->parsed_uri->port_number = connp->conn->local_port;
}
// Path
if (connp->in_tx->parsed_uri->path == NULL) {
connp->in_tx->parsed_uri->path = bstr_cstrdup("/");
}
}
// Run hook REQUEST_LINE
int rc = hook_run_all(connp->cfg->hook_request_line, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request line callback returned error (%d)", rc);
return HTP_ERROR;
}
// Clean up.
connp->in_line_len = 0;
// Move on to the next phase.
connp->in_state = htp_connp_REQ_PROTOCOL;
return HTP_OK;
}
}
}
/**
* The idle state is invoked before and after every transaction. Consequently,
* it will start a new transaction when data is available and finalise a transaction
* which has been processed.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_REQ_IDLE(htp_connp_t * connp) {
// If we're here and a transaction object exists that
// means we've just completed parsing a request. We need
// to run the final hook and start over.
if (connp->in_tx != NULL) {
// Run hook REQUEST
int rc = hook_run_all(connp->cfg->hook_request, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Request callback returned error (%d)", rc);
return HTP_ERROR;
}
// Start afresh
connp->in_tx = NULL;
}
// We want to start parsing the next request (and change
// the state from IDLE) only if there's at least one
// byte of data available. Otherwise we could be creating
// new structures even if there's no more data on the
// connection.
IN_TEST_NEXT_BYTE_OR_RETURN(connp);
// Detect pipelining
if (list_size(connp->conn->transactions) > connp->out_next_tx_index) {
connp->conn->flags |= PIPELINED_CONNECTION;
}
// Parsing a new request
connp->in_tx = htp_tx_create(connp->cfg, CFG_SHARED, connp->conn);
if (connp->in_tx == NULL) return HTP_ERROR;
connp->in_tx->connp = connp;
list_add(connp->conn->transactions, connp->in_tx);
connp->in_content_length = -1;
connp->in_body_data_left = -1;
connp->in_header_line_index = -1;
connp->in_header_line_counter = 0;
connp->in_chunk_request_index = connp->in_chunk_count;
// Run hook TRANSACTION_START
int rc = hook_run_all(connp->cfg->hook_transaction_start, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Transaction start callback returned error (%d)", rc);
return HTP_ERROR;
}
// Change state into request line parsing
connp->in_state = htp_connp_REQ_LINE;
connp->in_tx->progress = TX_PROGRESS_REQ_LINE;
return HTP_OK;
}
size_t htp_connp_req_data_consumed(htp_connp_t *connp) {
return connp->in_current_offset;
}
/**
* Process a chunk of inbound (client or request) data.
*
* @param connp
* @param timestamp
* @param data
* @param len
* @return HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_req_data(htp_connp_t *connp, htp_time_t timestamp, unsigned char *data, size_t len) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data(connp->in_status %x)\n", connp->in_status);
fprint_raw_data(stderr, __FUNCTION__, data, len);
#endif
// Return if the connection has had a fatal error
if (connp->in_status == STREAM_STATE_ERROR) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Inbound parser is in STREAM_STATE_ERROR");
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA (previous error)\n");
#endif
return STREAM_STATE_ERROR;
}
// If the length of the supplied data chunk is zero, proceed
// only if the stream has been closed. We do not allow zero-sized
// chunks in the API, but we use it internally to force the parsers
// to finalize parsing.
if ((len == 0) && (connp->in_status != STREAM_STATE_CLOSED)) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Zero-length data chunks are not allowed");
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA (zero-length chunk)\n");
#endif
return STREAM_STATE_ERROR;
}
// Store the current chunk information
connp->in_timestamp = timestamp;
connp->in_current_data = data;
connp->in_current_len = len;
connp->in_current_offset = 0;
connp->in_chunk_count++;
connp->conn->in_data_counter += len;
connp->conn->in_packet_counter++;
// Return without processing any data if the stream is in tunneling
// mode (which it would be after an initial CONNECT transaction).
if (connp->in_status == STREAM_STATE_TUNNEL) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA (tunnel)\n");
#endif
return STREAM_STATE_DATA;
}
// Invoke a processor, in a loop, until an error
// occurs or until we run out of data. Many processors
// will process a request, each pointing to the next
// processor that needs to run.
for (;;) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: in state=%s, progress=%s\n",
htp_connp_in_state_as_string(connp),
htp_tx_progress_as_string(connp->in_tx));
#endif
// Return if there's been an error
// or if we've run out of data. We are relying
// on processors to add error messages, so we'll
// keep quiet here.
int rc = connp->in_state(connp);
if (rc != HTP_OK) {
// Do we need more data?
if (rc == HTP_DATA) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA\n");
#endif
return STREAM_STATE_DATA;
}
// Check for suspended parsing
if (rc == HTP_DATA_OTHER) {
// We might have actually consumed the entire data chunk?
if (connp->in_current_offset >= connp->in_current_len) {
// Do not send STREAM_DATE_DATA_OTHER if we've
// consumed the entire chunk
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA (suspended parsing)\n");
#endif
return STREAM_STATE_DATA;
} else {
// Partial chunk consumption
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA_OTHER\n");
#endif
return STREAM_STATE_DATA_OTHER;
}
}
// Remember that we've had an error. Errors are
// (at least at present) not possible to recover from.
connp->in_status = STREAM_STATE_ERROR;
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_ERROR (state response)\n");
#endif
return STREAM_STATE_ERROR;
}
}
}

@ -0,0 +1,300 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* Extract one request header. A header can span multiple lines, in
* which case they will be folded into one before parsing is attempted.
*
* @param connp
* @return HTP_OK or HTP_ERROR
*/
int htp_process_request_header_apache_2_2(htp_connp_t *connp) {
bstr *tempstr = NULL;
unsigned char *data = NULL;
size_t len = 0;
// Create new header structure
htp_header_t *h = calloc(1, sizeof (htp_header_t));
if (h == NULL) return HTP_ERROR;
// Ensure we have the necessary header data in a single buffer
if (connp->in_header_line_index + 1 == connp->in_header_line_counter) {
// Single line
htp_header_line_t *hl = list_get(connp->in_tx->request_header_lines,
connp->in_header_line_index);
if (hl == NULL) {
// Internal error
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Process request header (Apache 2.2): Internal error");
free(h);
return HTP_ERROR;
}
data = (unsigned char *) bstr_ptr(hl->line);
len = bstr_len(hl->line);
hl->header = h;
} else {
// Multiple lines (folded)
int i = 0;
for (i = connp->in_header_line_index; i < connp->in_header_line_counter; i++) {
htp_header_line_t *hl = list_get(connp->in_tx->request_header_lines, i);
len += bstr_len(hl->line);
}
tempstr = bstr_alloc(len);
if (tempstr == NULL) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Process request header (Apache 2.2): Failed to allocate bstring of %d bytes", len);
free(h);
return HTP_ERROR;
}
for (i = connp->in_header_line_index; i < connp->in_header_line_counter; i++) {
htp_header_line_t *hl = list_get(connp->in_tx->request_header_lines, i);
bstr_add_str_noex(tempstr, hl->line);
hl->header = h;
}
data = (unsigned char *) bstr_ptr(tempstr);
}
// Now try to oparse the header
if (htp_parse_request_header_apache_2_2(connp, h, data, len) != HTP_OK) {
// Note: downstream responsible for error logging
if (tempstr != NULL) {
free(tempstr);
}
free(h);
return HTP_ERROR;
}
// Do we already have a header with the same name?
htp_header_t *h_existing = table_get(connp->in_tx->request_headers, h->name);
if (h_existing != NULL) {
// TODO Do we want to keep a list of the headers that are
// allowed to be combined in this way?
// Add to existing header
h_existing->value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
+ 2 + bstr_len(h->value));
bstr_add_mem_noex(h_existing->value, ", ", 2);
bstr_add_str_noex(h_existing->value, h->value);
// The header is no longer needed
free(h->name);
free(h->value);
free(h);
// Keep track of same-name headers
h_existing->flags |= HTP_FIELD_REPEATED;
} else {
// Add as a new header
table_add(connp->in_tx->request_headers, h->name, h);
}
if (tempstr != NULL) {
free(tempstr);
}
return HTP_OK;
}
/**
* Parses a message header line as Apache 2.2 does.
*
* @param connp
* @param h
* @param data
* @param len
* @return HTP_OK or HTP_ERROR
*/
int htp_parse_request_header_apache_2_2(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
size_t name_start, name_end;
size_t value_start, value_end;
name_start = 0;
// Look for the colon
size_t colon_pos = 0;
while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++;
if ((colon_pos == len) || (data[colon_pos] == '\0')) {
// Missing colon
h->flags |= HTP_FIELD_UNPARSEABLE;
if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
// Only log once per transaction
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request field invalid: colon missing");
}
return HTP_ERROR;
}
if (colon_pos == 0) {
// Empty header name
h->flags |= HTP_FIELD_INVALID;
if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
connp->in_tx->flags |= HTP_FIELD_INVALID;
// Only log once per transaction
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
}
}
name_end = colon_pos;
// Ignore LWS after field-name
size_t prev = name_end - 1;
while ((prev > name_start) && (htp_is_lws(data[prev]))) {
prev--;
name_end--;
h->flags |= HTP_FIELD_INVALID;
if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
connp->in_tx->flags |= HTP_FIELD_INVALID;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
}
}
// Value
value_start = colon_pos;
// Go over the colon
if (value_start < len) {
value_start++;
}
// Ignore LWS before field-content
while ((value_start < len) && (htp_is_lws(data[value_start]))) {
value_start++;
}
// Look for the end of field-content
value_end = value_start;
while ((value_end < len) && (data[value_end] != '\0')) value_end++;
// Ignore LWS after field-content
prev = value_end - 1;
while ((prev > value_start) && (htp_is_lws(data[prev]))) {
prev--;
value_end--;
}
// Check that the header name is a token
size_t i = name_start;
while (i < name_end) {
if (!htp_is_token(data[i])) {
h->flags |= HTP_FIELD_INVALID;
if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
connp->in_tx->flags |= HTP_FIELD_INVALID;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
}
break;
}
i++;
}
// Now extract the name and the value
h->name = bstr_memdup((char *) data + name_start, name_end - name_start);
h->value = bstr_memdup((char *) data + value_start, value_end - value_start);
return HTP_OK;
}
/**
* Parse request line as Apache 2.2 does.
*
* @param connp
* @return HTP_OK or HTP_ERROR
*/
int htp_parse_request_line_apache_2_2(htp_connp_t *connp) {
htp_tx_t *tx = connp->in_tx;
unsigned char *data = (unsigned char *) bstr_ptr(tx->request_line);
size_t len = bstr_len(tx->request_line);
size_t pos = 0;
// In this implementation we assume the
// line ends with the first NUL byte.
if (tx->request_line_nul_offset != -1) {
len = tx->request_line_nul_offset - 1;
}
// The request method starts at the beginning of the
// line and ends with the first whitespace character.
while ((pos < len) && (!htp_is_space(data[pos]))) {
pos++;
}
// No, we don't care if the method is empty.
tx->request_method = bstr_memdup((char *) data, pos);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->request_method), bstr_len(tx->request_method));
#endif
tx->request_method_number = htp_convert_method_to_number(tx->request_method);
// Ignore whitespace after request method. The RFC allows
// for only one SP, but then suggests any number of SP and HT
// should be permitted. Apache uses isspace(), which is even
// more permitting, so that's what we use here.
while ((pos < len) && (isspace(data[pos]))) {
pos++;
}
size_t start = pos;
// The URI ends with the first whitespace.
while ((pos < len) && (!htp_is_space(data[pos]))) {
pos++;
}
tx->request_uri = bstr_memdup((char *) data + start, pos - start);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->request_uri), bstr_len(tx->request_uri));
#endif
// Ignore whitespace after URI
while ((pos < len) && (htp_is_space(data[pos]))) {
pos++;
}
// Is there protocol information available?
if (pos == len) {
// No, this looks like a HTTP/0.9 request.
tx->protocol_is_simple = 1;
return HTP_OK;
}
// The protocol information spreads until the end of the line.
tx->request_protocol = bstr_memdup((char *) data + pos, len - pos);
tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol));
#endif
return HTP_OK;
}

@ -0,0 +1,285 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* Extract one request header. A header can span multiple lines, in
* which case they will be folded into one before parsing is attempted.
*
* @param connp
* @return HTP_OK or HTP_ERROR
*/
int htp_process_request_header_generic(htp_connp_t *connp) {
bstr *tempstr = NULL;
unsigned char *data = NULL;
size_t len = 0;
// Create new header structure
htp_header_t *h = calloc(1, sizeof (htp_header_t));
if (h == NULL) {
// TODO
return HTP_ERROR;
}
// Ensure we have the necessary header data in a single buffer
if (connp->in_header_line_index + 1 == connp->in_header_line_counter) {
// Single line
htp_header_line_t *hl = list_get(connp->in_tx->request_header_lines,
connp->in_header_line_index);
if (hl == NULL) {
// Internal error
// TODO
free(h);
return HTP_ERROR;
}
data = (unsigned char *)bstr_ptr(hl->line);
len = bstr_len(hl->line);
hl->header = h;
} else {
// Multiple lines (folded)
int i = 0;
for (i = connp->in_header_line_index; i < connp->in_header_line_counter; i++) {
htp_header_line_t *hl = list_get(connp->in_tx->request_header_lines, i);
len += bstr_len(hl->line);
}
tempstr = bstr_alloc(len);
if (tempstr == NULL) {
// TODO
free(h);
return HTP_ERROR;
}
for (i = connp->in_header_line_index; i < connp->in_header_line_counter; i++) {
htp_header_line_t *hl = list_get(connp->in_tx->request_header_lines, i);
bstr_add_str_noex(tempstr, hl->line);
hl->header = h;
}
data = (unsigned char *)bstr_ptr(tempstr);
}
// Now try to oparse the header
if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) {
if (tempstr != NULL) {
free(tempstr);
}
free(h);
return HTP_ERROR;
}
// Do we already have a header with the same name?
htp_header_t *h_existing = table_get(connp->in_tx->request_headers, h->name);
if (h_existing != NULL) {
// TODO Do we want to keep a list of the headers that are
// allowed to be combined in this way?
// Add to existing header
h_existing->value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
+ 2 + bstr_len(h->value));
bstr_add_mem_noex(h_existing->value, ", ", 2);
bstr_add_str_noex(h_existing->value, h->value);
// The header is no longer needed
bstr_free(h->name);
bstr_free(h->value);
free(h);
// Keep track of same-name headers
h_existing->flags |= HTP_FIELD_REPEATED;
} else {
// Add as a new header
table_add(connp->in_tx->request_headers, h->name, h);
}
if (tempstr != NULL) {
free(tempstr);
}
return HTP_OK;
}
/**
* Generic request header parser.
*
* @param connp
* @param h
* @param data
* @param len
* @return HTP_OK or HTP_ERROR
*/
int htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
size_t name_start, name_end;
size_t value_start, value_end;
name_start = 0;
// Look for the colon
size_t colon_pos = 0;
while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
if (colon_pos == len) {
// Missing colon
h->flags |= HTP_FIELD_UNPARSEABLE;
if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
// Only log once per transaction
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request field invalid: colon missing");
}
return HTP_ERROR;
}
if (colon_pos == 0) {
// Empty header name
h->flags |= HTP_FIELD_INVALID;
if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
connp->in_tx->flags |= HTP_FIELD_INVALID;
// Only log once per transaction
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
}
}
name_end = colon_pos;
// Ignore LWS after field-name
size_t prev = name_end - 1;
while ((prev > name_start) && (htp_is_lws(data[prev]))) {
prev--;
name_end--;
h->flags |= HTP_FIELD_INVALID;
if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
connp->in_tx->flags |= HTP_FIELD_INVALID;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
}
}
// Value
value_start = colon_pos;
// Go over the colon
if (value_start < len) {
value_start++;
}
// Ignore LWS before field-content
while ((value_start < len) && (htp_is_lws(data[value_start]))) {
value_start++;
}
// Look for the end of field-content
value_end = value_start;
while (value_end < len) value_end++;
// Ignore LWS after field-content
prev = value_end - 1;
while ((prev > value_start) && (htp_is_lws(data[prev]))) {
prev--;
value_end--;
}
// Check that the header name is a token
size_t i = name_start;
while (i < name_end) {
if (!htp_is_token(data[i])) {
h->flags |= HTP_FIELD_INVALID;
if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
connp->in_tx->flags |= HTP_FIELD_INVALID;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
}
break;
}
i++;
}
// Now extract the name and the value
h->name = bstr_memdup((char *)data + name_start, name_end - name_start);
h->value = bstr_memdup((char *)data + value_start, value_end - value_start);
return HTP_OK;
}
/**
* Generic request line parser.
*
* @param connp
* @return HTP_OK or HTP_ERROR
*/
int htp_parse_request_line_generic(htp_connp_t *connp) {
htp_tx_t *tx = connp->in_tx;
unsigned char *data = (unsigned char *)bstr_ptr(tx->request_line);
size_t len = bstr_len(tx->request_line);
size_t pos = 0;
// The request method starts at the beginning of the
// line and ends with the first whitespace character.
while ((pos < len) && (!htp_is_space(data[pos]))) {
pos++;
}
// No, we don't care if the method is empty.
tx->request_method = bstr_memdup((char *)data, pos);
tx->request_method_number = htp_convert_method_to_number(tx->request_method);
// Ignore whitespace after request method. The RFC allows
// for only one SP, but then suggests any number of SP and HT
// should be permitted.
while ((pos < len) && (isspace(data[pos]))) {
pos++;
}
size_t start = pos;
// The URI ends with the first whitespace.
while ((pos < len) && (!htp_is_space(data[pos]))) {
pos++;
}
tx->request_uri = bstr_memdup((char *)data + start, pos - start);
// Ignore whitespace after URI
while ((pos < len) && (htp_is_space(data[pos]))) {
pos++;
}
// Is there protocol information available?
if (pos == len) {
// No, this looks like a HTTP/0.9 request.
tx->protocol_is_simple = 1;
return HTP_OK;
}
// The protocol information spreads until the end of the line.
tx->request_protocol = bstr_memdup((char *)data + pos, len - pos);
tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
return HTP_OK;
}

@ -0,0 +1,123 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
#if 0
/**
*
*/
int htp_header_parse_internal_strict(unsigned char *data, size_t len, htp_header_t *h) {
size_t name_start, name_end;
size_t value_start, value_end;
// Deal with the name first
name_start = name_end = 0;
// Find where the header name ends
while (name_end < len) {
if (htp_is_lws(data[name_end]) || data[name_end] == ':') break;
name_end++;
}
if (name_end == 0) {
// Empty header name
return -1;
}
if (name_end == len) {
// TODO
return -1;
}
// Is there any LWS before colon?
size_t pos = name_end;
while (pos < len) {
if (!htp_is_lws(data[pos])) break;
pos++;
// TODO
// return -1;
}
if (pos == len) {
// TODO
return -1;
}
// The next character must be a colon
if (data[pos] != ':') {
// TODO
return -1;
}
// Move over the colon
pos++;
// Again, ignore any LWS
while (pos < len) {
if (!htp_is_lws(data[pos])) break;
pos++;
}
if (pos == len) {
// TODO
return -1;
}
value_start = value_end = pos;
while (value_end < len) {
if (htp_is_lws(data[value_end])) break;
value_end++;
}
h->name_offset = name_start;
h->name_len = name_end - name_start;
h->value_offset = value_start;
h->value_len = value_end - value_start;
return 1;
}
*/
/**
*
*/
htp_header_t *htp_connp_header_parse(htp_connp_t *reqp, unsigned char *data, size_t len) {
htp_header_t *h = calloc(1, sizeof (htp_header_t));
if (h == NULL) return NULL;
// Parse the header line
if (reqp->impl_header_parse(data, len, h) < 0) {
// Invalid header line
h->is_parsed = 0;
h->name = bstr_memdup(data, len);
return h;
}
// Now extract the name and the value
h->name = bstr_memdup(data + h->name_offset, h->name_len);
h->value = bstr_memdup(data + h->value_offset, h->value_len);
h->is_parsed = 1;
// Because header names are case-insensitive, we will convert
// the name to lowercase to use it as a lookup key.
h->name_lowercase = bstr_tolowercase(h->name);
return h;
}
#endif

@ -0,0 +1,806 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include <stdlib.h>
#include "htp.h"
/**
* Invoked whenever decompressed response body data becomes available.
*
* @param d
* @return HTP_OK on state change, HTP_ERROR on error.
*/
static int htp_connp_RES_BODY_DECOMPRESSOR_CALLBACK(htp_tx_data_t *d) {
// Invoke all callbacks
int rc = hook_run_all(d->tx->connp->cfg->hook_response_body_data, d);
if (rc != HTP_OK) {
htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response body data callback returned error (%d)", rc);
return HTP_ERROR;
}
return HTP_OK;
}
/**
* Consumes bytes until the end of the current line.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_BODY_CHUNKED_DATA_END(htp_connp_t *connp) {
// TODO We shouldn't really see anything apart from CR and LF,
// so we should warn about anything else.
for (;;) {
OUT_NEXT_BYTE_OR_RETURN(connp);
connp->out_tx->request_message_len++;
if (connp->out_next_byte == LF) {
connp->out_state = htp_connp_RES_BODY_CHUNKED_LENGTH;
return HTP_OK;
}
}
}
/**
* Processes a chunk of data.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_BODY_CHUNKED_DATA(htp_connp_t *connp) {
htp_tx_data_t d;
d.tx = connp->out_tx;
d.data = &connp->out_current_data[connp->out_current_offset];
d.len = 0;
for (;;) {
OUT_NEXT_BYTE(connp);
if (connp->out_next_byte == -1) {
if (connp->out_tx->response_content_encoding != COMPRESSION_NONE) {
connp->out_decompressor->decompress(connp->out_decompressor, &d);
} else {
// Send data to callbacks
int rc = hook_run_all(connp->cfg->hook_response_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response body data callback returned error (%d)", rc);
return HTP_ERROR;
}
}
// Ask for more data
return HTP_DATA;
} else {
connp->out_tx->response_message_len++;
connp->out_tx->response_entity_len++;
connp->out_chunked_length--;
d.len++;
if (connp->out_chunked_length == 0) {
// End of data chunk
if (connp->out_tx->response_content_encoding != COMPRESSION_NONE) {
connp->out_decompressor->decompress(connp->out_decompressor, &d);
} else {
// Send data to callbacks
int rc = hook_run_all(connp->cfg->hook_response_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response body data callback returned error (%d)", rc);
return HTP_ERROR;
}
}
connp->out_state = htp_connp_RES_BODY_CHUNKED_DATA_END;
return HTP_OK;
}
}
}
}
/**
* Extracts chunk length.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp) {
for (;;) {
OUT_COPY_BYTE_OR_RETURN(connp);
connp->out_tx->response_message_len++;
// Have we reached the end of the line?
if (connp->out_next_byte == LF) {
htp_chomp(connp->out_line, &connp->out_line_len);
// Extract chunk length
connp->out_chunked_length = htp_parse_chunked_length(connp->out_line, connp->out_line_len);
// Cleanup for the next line
connp->out_line_len = 0;
// Handle chunk length
if (connp->out_chunked_length > 0) {
// More data available
// TODO Add a check for chunk length
connp->out_state = htp_connp_RES_BODY_CHUNKED_DATA;
} else if (connp->out_chunked_length == 0) {
// End of data
connp->out_state = htp_connp_RES_HEADERS;
connp->out_tx->progress = TX_PROGRESS_RES_TRAILER;
} else {
// Invalid chunk length
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response chunk encoding: Invalid chunk length: %d", connp->out_chunked_length);
return HTP_ERROR;
}
return HTP_OK;
}
}
}
/**
* Processes identity response body.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_BODY_IDENTITY(htp_connp_t *connp) {
htp_tx_data_t d;
d.tx = connp->out_tx;
d.data = &connp->out_current_data[connp->out_current_offset];
d.len = 0;
for (;;) {
OUT_NEXT_BYTE(connp);
if (connp->out_next_byte == -1) {
// End of chunk
// Send data to callbacks
if (d.len != 0) {
if (connp->out_tx->response_content_encoding != COMPRESSION_NONE) {
connp->out_decompressor->decompress(connp->out_decompressor, &d);
} else {
int rc = hook_run_all(connp->cfg->hook_response_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response body data callback returned error (%d)", rc);
return HTP_ERROR;
}
}
}
// If we don't know the length, then we must check
// to see if the stream closed; that would signal the
// end of the response body (and the end of the transaction).
if ((connp->out_content_length == -1) && (connp->out_status == STREAM_STATE_CLOSED)) {
connp->out_state = htp_connp_RES_IDLE;
connp->out_tx->progress = TX_PROGRESS_DONE;
return HTP_OK;
} else {
// Ask for more data
return HTP_DATA;
}
} else {
connp->out_tx->response_message_len++;
connp->out_tx->response_entity_len++;
if (connp->out_body_data_left > 0) {
// We know the length of response body
connp->out_body_data_left--;
d.len++;
if (connp->out_body_data_left == 0) {
// End of body
// Send data to callbacks
if (d.len != 0) {
if (connp->out_tx->response_content_encoding != COMPRESSION_NONE) {
connp->out_decompressor->decompress(connp->out_decompressor, &d);
} else {
int rc = hook_run_all(connp->cfg->hook_response_body_data, &d);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response body data callback returned error (%d)", rc);
return HTP_ERROR;
}
}
}
// Done
connp->out_state = htp_connp_RES_IDLE;
connp->out_tx->progress = TX_PROGRESS_DONE;
return HTP_OK;
}
} else {
// We don't know the length of the response body, which means
// that the body will consume all data until the connection
// is closed.
//
// We don't need to do anything here.
}
}
}
}
/**
* Determines presence (and encoding) of a response body.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_BODY_DETERMINE(htp_connp_t *connp) {
// If the request uses the CONNECT method, then not only are we
// to assume there's no body, but we need to ignore all
// subsequent data in the stream.
if ((connp->out_tx->request_method_number == M_CONNECT)
&&(connp->out_tx->response_status_number >= 200)
&&(connp->out_tx->response_status_number <= 299))
{
connp->out_status = STREAM_STATE_TUNNEL;
connp->out_state = htp_connp_RES_IDLE;
connp->out_tx->progress = TX_PROGRESS_DONE;
return HTP_OK;
}
// Check for an interim "100 Continue"
// response. Ignore it if found, and revert back to RES_FIRST_LINE.
if (connp->out_tx->response_status_number == 100) {
if (connp->out_tx->seen_100continue != 0) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Already seen 100-Continue");
return HTP_ERROR;
}
// Ignore any response headers set
table_clear(connp->out_tx->response_headers);
connp->out_state = htp_connp_RES_LINE;
connp->out_tx->progress = TX_PROGRESS_RES_LINE;
connp->out_tx->seen_100continue++;
return HTP_OK;
}
// Check for compression
htp_header_t *ce = table_getc(connp->out_tx->response_headers, "content-encoding");
if (ce != NULL) {
// TODO Improve detection
// TODO How would a Content-Range header affect us?
if ((bstr_cmpc(ce->value, "gzip") == 0) || (bstr_cmpc(ce->value, "x-gzip") == 0)) {
connp->out_decompressor = (htp_decompressor_t *) htp_gzip_decompressor_create(connp);
if (connp->out_decompressor != NULL) {
connp->out_tx->response_content_encoding = COMPRESSION_GZIP;
connp->out_decompressor->callback = htp_connp_RES_BODY_DECOMPRESSOR_CALLBACK;
} else {
// No need to do anything; the error will have already
// been reported by the failed decompressor.
}
}
}
// 1. Any response message which MUST NOT include a message-body
// (such as the 1xx, 204, and 304 responses and any response to a HEAD
// request) is always terminated by the first empty line after the
// header fields, regardless of the entity-header fields present in the
// message.
if (((connp->out_tx->response_status_number >= 100) && (connp->out_tx->response_status_number <= 199))
|| (connp->out_tx->response_status_number == 204) || (connp->out_tx->response_status_number == 304)
|| (connp->out_tx->request_method_number == M_HEAD)) {
// There's no response body
connp->out_state = htp_connp_RES_IDLE;
} else {
// We have a response body
htp_header_t *cl = table_getc(connp->out_tx->response_headers, "content-length");
htp_header_t *te = table_getc(connp->out_tx->response_headers, "transfer-encoding");
// 2. If a Transfer-Encoding header field (section 14.40) is present and
// indicates that the "chunked" transfer coding has been applied, then
// the length is defined by the chunked encoding (section 3.6).
if (te != NULL) {
if (bstr_cmpc(te->value, "chunked") != 0) {
// Invalid T-E header value
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Invalid T-E value in response");
}
// If the T-E header is present we are going to use it.
connp->out_tx->response_transfer_coding = CHUNKED;
// We are still going to check for the presence of C-L
if (cl != NULL) {
// This is a violation of the RFC
connp->out_tx->flags |= HTP_REQUEST_SMUGGLING;
// TODO
}
connp->out_state = htp_connp_RES_BODY_CHUNKED_LENGTH;
connp->out_tx->progress = TX_PROGRESS_RES_BODY;
}// 3. If a Content-Length header field (section 14.14) is present, its
// value in bytes represents the length of the message-body.
else if (cl != NULL) {
// We know the exact length
connp->out_tx->response_transfer_coding = IDENTITY;
// Check for multiple C-L headers
if (cl->flags & HTP_FIELD_REPEATED) {
connp->out_tx->flags |= HTP_REQUEST_SMUGGLING;
// TODO Log
}
// Get body length
int i = htp_parse_content_length(cl->value);
if (i < 0) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Invalid C-L field in response");
return HTP_ERROR;
} else {
connp->out_content_length = i;
connp->out_body_data_left = connp->out_content_length;
if (connp->out_content_length != 0) {
connp->out_state = htp_connp_RES_BODY_IDENTITY;
connp->out_tx->progress = TX_PROGRESS_RES_BODY;
} else {
connp->out_state = htp_connp_RES_IDLE;
connp->out_tx->progress = TX_PROGRESS_DONE;
}
}
} else {
// 4. If the message uses the media type "multipart/byteranges", which is
// self-delimiting, then that defines the length. This media type MUST
// NOT be used unless the sender knows that the recipient can parse it;
// the presence in a request of a Range header with multiple byte-range
// specifiers implies that the client can parse multipart/byteranges
// responses.
htp_header_t *ct = table_getc(connp->out_tx->response_headers, "content-type");
if (ct != NULL) {
// TODO Handle multipart/byteranges
if (bstr_indexofc_nocase(ct->value, "multipart/byteranges") != -1) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"C-T multipart/byteranges in responses not supported");
return HTP_ERROR;
}
}
// 5. By the server closing the connection. (Closing the connection
// cannot be used to indicate the end of a request body, since that
// would leave no possibility for the server to send back a response.)
connp->out_state = htp_connp_RES_BODY_IDENTITY;
connp->out_tx->progress = TX_PROGRESS_RES_BODY;
}
}
// NOTE We do not need to check for short-style HTTP/0.9 requests here because
// that is done earlier, before response line parsing begins
// Run hook RESPONSE_HEADERS_COMPLETE
int rc = hook_run_all(connp->cfg->hook_response_headers, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response headers callback returned error (%d)", rc);
return HTP_ERROR;
}
return HTP_OK;
}
/**
* Parses response headers.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_HEADERS(htp_connp_t *connp) {
for (;;) {
OUT_COPY_BYTE_OR_RETURN(connp);
if (connp->out_header_line == NULL) {
connp->out_header_line = calloc(1, sizeof (htp_header_line_t));
if (connp->out_header_line == NULL) return HTP_ERROR;
connp->out_header_line->first_nul_offset = -1;
}
// Keep track of NUL bytes
if (connp->out_next_byte == 0) {
// Store the offset of the first NUL
if (connp->out_header_line->has_nulls == 0) {
connp->out_header_line->first_nul_offset = connp->out_line_len;
}
// Remember how many NULs there were
connp->out_header_line->flags |= HTP_FIELD_NUL_BYTE;
connp->out_header_line->has_nulls++;
}
// Have we reached the end of the line?
if (connp->out_next_byte == LF) {
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, connp->out_line, connp->out_line_len);
#endif
// Should we terminate headers?
if (htp_connp_is_line_terminator(connp, connp->out_line, connp->out_line_len)) {
// Terminator line
// Parse previous header, if any
if (connp->out_header_line_index != -1) {
if (connp->cfg->process_response_header(connp) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Reset index
connp->out_header_line_index = -1;
}
// Cleanup
free(connp->out_header_line);
connp->out_line_len = 0;
connp->out_header_line = NULL;
connp->out_header_line_index = -1;
connp->out_header_line_counter = 0;
// We've seen all response headers
if (connp->out_tx->progress == TX_PROGRESS_RES_HEADERS) {
// Determine if this response has a body
connp->out_state = htp_connp_RES_BODY_DETERMINE;
} else {
// Run hook response_TRAILER
int rc = hook_run_all(connp->cfg->hook_response_trailer, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response trailer callback returned error (%d)", rc);
return HTP_ERROR;
}
// We've completed parsing this response
connp->out_state = htp_connp_RES_IDLE;
}
return HTP_OK;
}
// Prepare line for consumption
htp_chomp(connp->out_line, &connp->out_line_len);
// Check for header folding
if (htp_connp_is_line_folded(connp->out_line, connp->out_line_len) == 0) {
// New header line
// Parse previous header, if any
if (connp->out_header_line_index != -1) {
if (connp->cfg->process_response_header(connp) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Reset index
connp->out_header_line_index = -1;
}
// Remember the index of the fist header line
connp->out_header_line_index = connp->out_header_line_counter;
} else {
// Folding; check that there's a previous header line to add to
if (connp->out_header_line_index == -1) {
if (!(connp->out_tx->flags & HTP_INVALID_FOLDING)) {
connp->out_tx->flags |= HTP_INVALID_FOLDING;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid response field folding");
}
}
}
// Add the raw header line to the list
connp->out_header_line->line = bstr_memdup((char *) connp->out_line, connp->out_line_len);
list_add(connp->out_tx->response_header_lines, connp->out_header_line);
connp->out_header_line = NULL;
// Cleanup for the next line
connp->out_line_len = 0;
if (connp->out_header_line_index == -1) {
connp->out_header_line_index = connp->out_header_line_counter;
}
connp->out_header_line_counter++;
}
}
}
/**
* Parses response line.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_LINE(htp_connp_t *connp) {
for (;;) {
// Get one byte
OUT_COPY_BYTE_OR_RETURN(connp);
// Have we reached the end of the line?
if (connp->out_next_byte == LF) {
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, connp->out_line, connp->out_line_len);
#endif
// Is this a line that should be ignored?
if (htp_connp_is_line_ignorable(connp, connp->out_line, connp->out_line_len)) {
// We have an empty/whitespace line, which we'll note, ignore and move on
connp->out_tx->response_ignored_lines++;
// TODO How many lines are we willing to accept?
// Start again
connp->out_line_len = 0;
return HTP_OK;
}
// Process response line
htp_chomp(connp->out_line, &connp->out_line_len);
// Deallocate previous response line allocations, which we woud have on a 100 response
// TODO Consider moving elsewhere; no need to make these checks on every response
if (connp->out_tx->response_line != NULL) {
bstr_free(connp->out_tx->response_line);
}
if (connp->out_tx->response_protocol != NULL) {
bstr_free(connp->out_tx->response_protocol);
}
if (connp->out_tx->response_status != NULL) {
bstr_free(connp->out_tx->response_status);
}
if (connp->out_tx->response_message != NULL) {
bstr_free(connp->out_tx->response_message);
}
connp->out_tx->response_line = bstr_memdup((char *) connp->out_line, connp->out_line_len);
// Parse response line
if (connp->cfg->parse_response_line(connp) != HTP_OK) {
// Note: downstream responsible for error logging
return HTP_ERROR;
}
// Run hook RESPONSE_LINE
int rc = hook_run_all(connp->cfg->hook_response_line, connp);
if (rc != HOOK_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response line callback returned error (%d)", rc);
return HTP_ERROR;
}
// Clean up.
connp->out_line_len = 0;
// Move on to the next phase.
connp->out_state = htp_connp_RES_HEADERS;
connp->out_tx->progress = TX_PROGRESS_RES_HEADERS;
return HTP_OK;
}
}
}
size_t htp_connp_res_data_consumed(htp_connp_t *connp) {
return connp->out_current_offset;
}
/**
* The response idle state will initialize response processing, as well as
* finalize each transactions after we are done with it.
*
* @param connp
* @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed.
*/
int htp_connp_RES_IDLE(htp_connp_t * connp) {
// If we're here and an outgoing transaction object exists that
// means we've just completed parsing a response. We need
// to run the final hook in a transaction and start over.
if (connp->out_tx != NULL) {
// Shut down the decompressor, if we've used one
if (connp->out_decompressor != NULL) {
connp->out_decompressor->destroy(connp->out_decompressor);
connp->out_decompressor = NULL;
}
connp->out_tx->progress = TX_PROGRESS_DONE;
// Run hook RESPONSE
int rc = hook_run_all(connp->cfg->hook_response, connp);
if (rc != HTP_OK) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Response callback returned error (%d)", rc);
return HTP_ERROR;
}
// Check if the inbound parser is waiting on us. If it is that means that
// there might be request data that the inbound parser hasn't consumed yet.
// If we don't stop parsing we might encounter a response without a
// request.
if ((connp->in_status == STREAM_STATE_DATA_OTHER) && (connp->in_tx == connp->out_tx)) {
connp->out_tx = NULL;
return HTP_DATA_OTHER;
}
// Start afresh
connp->out_tx = NULL;
}
// We want to start parsing the next response (and change
// the state from IDLE) only if there's at least one
// byte of data available. Otherwise we could be creating
// new structures even if there's no more data on the
// connection.
OUT_TEST_NEXT_BYTE_OR_RETURN(connp);
// Parsing a new response
// Find the next outgoing transaction
connp->out_tx = list_get(connp->conn->transactions, connp->out_next_tx_index);
if (connp->out_tx == NULL) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Unable to match response to request");
return HTP_ERROR;
}
// We've used one transaction
connp->out_next_tx_index++;
// TODO Detect state mismatch
connp->out_content_length = -1;
connp->out_body_data_left = -1;
connp->out_header_line_index = -1;
connp->out_header_line_counter = 0;
// Change state into response line parsing, except if we're following
// a short HTTP/0.9 request, because such requests to not have a
// response line and headers.
if (connp->out_tx->protocol_is_simple) {
connp->out_tx->response_transfer_coding = IDENTITY;
connp->out_state = htp_connp_RES_BODY_IDENTITY;
connp->out_tx->progress = TX_PROGRESS_RES_BODY;
} else {
connp->out_state = htp_connp_RES_LINE;
connp->out_tx->progress = TX_PROGRESS_RES_LINE;
}
return HTP_OK;
}
/**
* Process a chunk of outbound (server or response) data.
*
* @param connp
* @param timestamp
* @param data
* @param len
* @return HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed
*/
int htp_connp_res_data(htp_connp_t *connp, htp_time_t timestamp, unsigned char *data, size_t len) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_res_data(connp->out_status %x)\n", connp->out_status);
fprint_raw_data(stderr, __FUNCTION__, data, len);
#endif
// Return if the connection has had a fatal error
if (connp->out_status == STREAM_STATE_ERROR) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Outbound parser is in STREAM_STATE_ERROR");
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_res_data: returning STREAM_STATE_DATA (previous error)\n");
#endif
return STREAM_STATE_ERROR;
}
// If the length of the supplied data chunk is zero, proceed
// only if the stream has been closed. We do not allow zero-sized
// chunks in the API, but we use it internally to force the parsers
// to finalize parsing.
if ((len == 0) && (connp->out_status != STREAM_STATE_CLOSED)) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Zero-length data chunks are not allowed");
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_res_data: returning STREAM_STATE_DATA (zero-length chunk)\n");
#endif
return STREAM_STATE_ERROR;
}
// Store the current chunk information
connp->out_timestamp = timestamp;
connp->out_current_data = data;
connp->out_current_len = len;
connp->out_current_offset = 0;
connp->conn->out_data_counter += len;
connp->conn->out_packet_counter++;
// Return without processing any data if the stream is in tunneling
// mode (which it would be after an initial CONNECT transaction.
if (connp->out_status == STREAM_STATE_TUNNEL) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_res_data: returning STREAM_STATE_DATA (tunnel)\n");
#endif
return STREAM_STATE_DATA;
}
// Invoke a processor, in a loop, until an error
// occurs or until we run out of data. Many processors
// will process a request, each pointing to the next
// processor that needs to run.
for (;;) {
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_res_data: out state=%s, progress=%s\n",
htp_connp_out_state_as_string(connp),
htp_tx_progress_as_string(connp->out_tx));
#endif
// Return if there's been an error
// or if we've run out of data. We are relying
// on processors to add error messages, so we'll
// keep quiet here.
int rc = connp->out_state(connp);
if (rc != HTP_OK) {
// Do we need more data?
if (rc == HTP_DATA) {
return STREAM_STATE_DATA;
}
// Check for suspended parsing
if (rc == HTP_DATA_OTHER) {
// We might have actually consumed the entire data chunk?
if (connp->out_current_offset >= connp->out_current_len) {
// Do not send STREAM_DATE_DATA_OTHER if we've
// consumed the entire chunk
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_res_data: returning STREAM_STATE_DATA (suspended parsing)\n");
#endif
return STREAM_STATE_DATA;
} else {
// Partial chunk consumption
#ifdef HTP_DEBUG
fprintf(stderr, "htp_connp_req_data: returning STREAM_STATE_DATA_OTHER\n");
#endif
return STREAM_STATE_DATA_OTHER;
}
}
// Remember that we've had an error. Errors are
// not possible to recover from.
connp->out_status = STREAM_STATE_ERROR;
return STREAM_STATE_ERROR;
}
}
}

@ -0,0 +1,279 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* Generic response line parser.
*
* @param connp
* @return HTP status
*/
int htp_parse_response_line_generic(htp_connp_t *connp) {
htp_tx_t *tx = connp->out_tx;
unsigned char *data = (unsigned char *)bstr_ptr(tx->response_line);
size_t len = bstr_len(tx->response_line);
size_t pos = 0;
// The request method starts at the beginning of the
// line and ends with the first whitespace character.
while ((pos < len) && (!htp_is_space(data[pos]))) {
pos++;
}
tx->response_protocol = bstr_memdup((char *)data, pos);
tx->response_protocol_number = htp_parse_protocol(tx->response_protocol);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->response_protocol), bstr_len(tx->response_protocol));
#endif
// Ignore whitespace after response protocol
while ((pos < len) && (isspace(data[pos]))) {
pos++;
}
size_t start = pos;
// Find the next whitespace character
while ((pos < len) && (!htp_is_space(data[pos]))) {
pos++;
}
tx->response_status = bstr_memdup((char *)data + start, pos - start);
tx->response_status_number = htp_parse_status(tx->response_status);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->response_status), bstr_len(tx->response_status));
#endif
// Ignore whitespace that follows
while ((pos < len) && (isspace(data[pos]))) {
pos++;
}
tx->response_message = bstr_memdup((char *)data + pos, len - pos);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->response_message), bstr_len(tx->response_message));
#endif
return HTP_OK;
}
/**
* Generic response header parser.
*
* @param connp
* @param h
* @param data
* @param len
* @return HTP status
*/
int htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t *h, char *data, size_t len) {
size_t name_start, name_end;
size_t value_start, value_end;
name_start = 0;
// Look for the colon
size_t colon_pos = 0;
while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
if (colon_pos == len) {
// Missing colon
h->flags |= HTP_FIELD_UNPARSEABLE;
if (!(connp->out_tx->flags & HTP_FIELD_UNPARSEABLE)) {
connp->out_tx->flags |= HTP_FIELD_UNPARSEABLE;
// Only log once per transaction
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request field invalid: colon missing");
}
return HTP_ERROR;
}
if (colon_pos == 0) {
// Empty header name
h->flags |= HTP_FIELD_INVALID;
if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) {
connp->out_tx->flags |= HTP_FIELD_INVALID;
// Only log once per transaction
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
}
}
name_end = colon_pos;
// Ignore LWS after field-name
size_t prev = name_end - 1;
while ((prev > name_start) && (htp_is_lws(data[prev]))) {
prev--;
name_end--;
h->flags |= HTP_FIELD_INVALID;
if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) {
connp->out_tx->flags |= HTP_FIELD_INVALID;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
}
}
// Value
value_start = colon_pos;
// Go over the colon
if (value_start < len) {
value_start++;
}
// Ignore LWS before field-content
while ((value_start < len) && (htp_is_lws(data[value_start]))) {
value_start++;
}
// Look for the end of field-content
value_end = value_start;
while (value_end < len) value_end++;
// Ignore LWS after field-content
prev = value_end - 1;
while ((prev > value_start) && (htp_is_lws(data[prev]))) {
prev--;
value_end--;
}
// Check that the header name is a token
size_t i = name_start;
while (i < name_end) {
if (!htp_is_token(data[i])) {
h->flags |= HTP_FIELD_INVALID;
if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) {
connp->out_tx->flags |= HTP_FIELD_INVALID;
htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
}
break;
}
i++;
}
// Now extract the name and the value
h->name = bstr_memdup(data + name_start, name_end - name_start);
h->value = bstr_memdup(data + value_start, value_end - value_start);
return HTP_OK;
}
/**
* Generic response header line(s) processor, which assembles folded lines
* into a single buffer before invoking the parsing function.
*
* @param connp
* @return HTP status
*/
int htp_process_response_header_generic(htp_connp_t *connp) {
bstr *tempstr = NULL;
char *data = NULL;
size_t len = 0;
// Parse header
htp_header_t *h = calloc(1, sizeof (htp_header_t));
if (h == NULL) return HTP_ERROR;
// Ensure we have the necessary header data in a single buffer
if (connp->out_header_line_index + 1 == connp->out_header_line_counter) {
// Single line
htp_header_line_t *hl = list_get(connp->out_tx->response_header_lines,
connp->out_header_line_index);
if (hl == NULL) {
// Internal error
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Process response header (generic): Internal error");
free(h);
return HTP_ERROR;
}
data = bstr_ptr(hl->line);
len = bstr_len(hl->line);
hl->header = h;
} else {
// Multiple lines (folded)
int i = 0;
for (i = connp->out_header_line_index; i < connp->out_header_line_counter; i++) {
htp_header_line_t *hl = list_get(connp->out_tx->response_header_lines, i);
len += bstr_len(hl->line);
}
tempstr = bstr_alloc(len);
if (tempstr == NULL) {
htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
"Process reqsponse header (generic): Failed to allocate bstring of %d bytes", len);
free(h);
return HTP_ERROR;
}
for (i = connp->out_header_line_index; i < connp->out_header_line_counter; i++) {
htp_header_line_t *hl = list_get(connp->out_tx->response_header_lines, i);
bstr_add_str_noex(tempstr, hl->line);
hl->header = h;
}
data = bstr_ptr(tempstr);
}
if (htp_parse_response_header_generic(connp, h, data, len) != HTP_OK) {
// Note: downstream responsible for error logging
if (tempstr != NULL) {
free(tempstr);
}
free(h);
return HTP_ERROR;
}
// Do we already have a header with the same name?
htp_header_t *h_existing = table_get(connp->out_tx->response_headers, h->name);
if (h_existing != NULL) {
// TODO Do we want to keep a list of the headers that are
// allowed to be combined in this way?
// Add to existing header
h_existing->value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
+ 2 + bstr_len(h->value));
bstr_add_mem_noex(h_existing->value, ", ", 2);
bstr_add_str_noex(h_existing->value, h->value);
// The header is no longer needed
free(h->name);
free(h->value);
free(h);
// Keep track of same-name headers
h_existing->flags |= HTP_FIELD_REPEATED;
} else {
// Add as a new header
table_add(connp->out_tx->response_headers, h->name, h);
}
if (tempstr != NULL) {
free(tempstr);
}
return HTP_OK;
}

@ -0,0 +1,181 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "htp.h"
/**
* Creates a new transaction structure.
*
* @param cfg
* @param is_cfg_shared
* @param conn
* @return The newly created transaction, or NULL on memory allocation failure.
*/
htp_tx_t *htp_tx_create(htp_cfg_t *cfg, int is_cfg_shared, htp_conn_t *conn) {
htp_tx_t *tx = calloc(1, sizeof (htp_tx_t));
if (tx == NULL) return NULL;
tx->conn = conn;
tx->cfg = cfg;
tx->is_cfg_shared = is_cfg_shared;
tx->conn = conn;
tx->request_header_lines = list_array_create(32);
tx->request_headers = table_create(32);
tx->request_line_nul_offset = -1;
tx->parsed_uri = calloc(1, sizeof (htp_uri_t));
tx->parsed_uri_incomplete = calloc(1, sizeof (htp_uri_t));
tx->response_header_lines = list_array_create(32);
tx->response_headers = table_create(32);
tx->request_protocol_number = -1;
return tx;
}
/**
* Destroys the supplied transaction.
*
* @param tx
*/
void htp_tx_destroy(htp_tx_t *tx) {
bstr_free(tx->request_line);
bstr_free(tx->request_method);
bstr_free(tx->request_uri);
bstr_free(tx->request_uri_normalized);
bstr_free(tx->request_protocol);
if (tx->parsed_uri != NULL) {
bstr_free(tx->parsed_uri->scheme);
bstr_free(tx->parsed_uri->username);
bstr_free(tx->parsed_uri->password);
bstr_free(tx->parsed_uri->hostname);
bstr_free(tx->parsed_uri->port);
bstr_free(tx->parsed_uri->path);
bstr_free(tx->parsed_uri->query);
bstr_free(tx->parsed_uri->fragment);
free(tx->parsed_uri);
}
if (tx->parsed_uri_incomplete != NULL) {
bstr_free(tx->parsed_uri_incomplete->scheme);
bstr_free(tx->parsed_uri_incomplete->username);
bstr_free(tx->parsed_uri_incomplete->password);
bstr_free(tx->parsed_uri_incomplete->hostname);
bstr_free(tx->parsed_uri_incomplete->port);
bstr_free(tx->parsed_uri_incomplete->path);
bstr_free(tx->parsed_uri_incomplete->query);
bstr_free(tx->parsed_uri_incomplete->fragment);
free(tx->parsed_uri_incomplete);
}
// Destroy request_header_lines
htp_header_line_t *hl = NULL;
list_iterator_reset(tx->request_header_lines);
while ((hl = list_iterator_next(tx->request_header_lines)) != NULL) {
bstr_free(hl->line);
// No need to destroy hl->header because
// htp_header_line_t does not own it.
free(hl);
}
list_destroy(tx->request_header_lines);
// Destroy request_headers
htp_header_t *h = NULL;
table_iterator_reset(tx->request_headers);
while (table_iterator_next(tx->request_headers, (void **) & h) != NULL) {
bstr_free(h->name);
bstr_free(h->value);
free(h);
}
table_destroy(tx->request_headers);
bstr_free(tx->response_line);
bstr_free(tx->response_protocol);
bstr_free(tx->response_status);
bstr_free(tx->response_message);
// Destroy response_header_lines
hl = NULL;
list_iterator_reset(tx->response_header_lines);
while ((hl = list_iterator_next(tx->response_header_lines)) != NULL) {
bstr_free(hl->line);
// No need to destroy hl->header because
// htp_header_line_t does not own it.
free(hl);
}
list_destroy(tx->response_header_lines);
// Destroy response headers
h = NULL;
table_iterator_reset(tx->response_headers);
while (table_iterator_next(tx->response_headers, (void **) & h) != NULL) {
bstr_free(h->name);
bstr_free(h->value);
free(h);
}
table_destroy(tx->response_headers);
// Tell the connection to remove this transaction
// from the list
htp_conn_remove_tx(tx->conn, tx);
// Invalidate the pointer to this transactions held
// by the connection parser. This is to allow a transaction
// to be destroyed from within the final response callback.
if (tx->connp != NULL) {
if (tx->connp->out_tx == tx) {
tx->connp->out_tx = NULL;
}
}
free(tx);
}
/**
* Returns the user data associated with this transaction.
*
* @param tx
* @return A pointer to user data or NULL
*/
void *htp_tx_get_user_data(htp_tx_t *tx) {
return tx->user_data;
}
/**
* Sets the configuration that is to be used for this transaction.
*
* @param tx
* @param cfg
* @param is_cfg_shared
*/
void htp_tx_set_config(htp_tx_t *tx, htp_cfg_t *cfg, int is_cfg_shared) {
tx->cfg = cfg;
tx->is_cfg_shared = is_cfg_shared;
}
/**
* Associates user data with this transaction.
*
* @param tx
* @param user_data
*/
void htp_tx_set_user_data(htp_tx_t *tx, void *user_data) {
tx->user_data = user_data;
}

@ -0,0 +1,256 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include "stdlib.h"
#include "htp_urlencoded.h"
/**
* This method is invoked whenever a piece of data, belonging to a single field (name or value)
* becomes available. It will either create a new parameter or store the transient information
* until a parameter can be created.
*
* @param urlenp
* @param data
* @param startpos
* @param endpos
* @param c Should contain -1 if the reason this function is called is because the end of
* the current data chunk is reached.
*/
static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, unsigned char *data, size_t startpos, size_t endpos, int c) {
// Add field if we know it ended or if we know that
// we've used all of the input data
if ((c != -1) || (urlenp->_complete)) {
// Add field
bstr *field = NULL;
// Did we use the string builder for this field?
if (bstr_builder_size(urlenp->_bb) > 0) {
// The current field consists of more than once piece,
// we have to use the string builder
// Add current piece to string builder
if (endpos - startpos > 0) {
bstr_builder_append_mem(urlenp->_bb, (char *) data + startpos, endpos - startpos);
}
// Generate the field and clear the string builder
field = bstr_builder_to_str(urlenp->_bb);
bstr_builder_clear(urlenp->_bb);
} else {
// We only have the current piece to work with, so
// no need to involve the string builder
field = bstr_memdup((char *) data + startpos, endpos - startpos);
}
// Process the field differently, depending on the current state
if (urlenp->_state == HTP_URLENP_STATE_KEY) {
// Store the name for later
urlenp->_name = field;
if (urlenp->_complete) {
// Param with key but no value
htp_urlen_param_t *param = calloc(1, sizeof (htp_urlen_param_t));
param->name = urlenp->_name;
urlenp->_name = NULL;
param->value = bstr_cstrdup("");
if (urlenp->decode_url_encoding) {
htp_uriencoding_normalize_inplace(param->name);
}
table_add(urlenp->params, param->name, param);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(param->name), bstr_len(param->name));
fprint_raw_data(stderr, "VALUE", (unsigned char *) bstr_ptr(param->value), bstr_len(param->value));
#endif
}
} else {
// Param with key and value
htp_urlen_param_t *param = calloc(1, sizeof (htp_urlen_param_t));
param->name = urlenp->_name;
urlenp->_name = NULL;
param->value = field;
if (urlenp->decode_url_encoding) {
htp_uriencoding_normalize_inplace(param->name);
htp_uriencoding_normalize_inplace(param->value);
}
table_add(urlenp->params, param->name, param);
#ifdef HTP_DEBUG
fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(param->name), bstr_len(param->name));
fprint_raw_data(stderr, "VALUE", (unsigned char *) bstr_ptr(param->value), bstr_len(param->value));
#endif
}
} else {
// Make a copy of the data and store it in an array for later
if (endpos - startpos > 0) {
bstr_builder_append_mem(urlenp->_bb, (char *) data + startpos, endpos - startpos);
}
}
}
/**
* Creates a new URLENCODED parser.
*
* @return New parser, or NULL on memory allocation failure.
*/
htp_urlenp_t *htp_urlenp_create() {
htp_urlenp_t *urlenp = calloc(1, sizeof (htp_urlenp_t));
if (urlenp == NULL) return NULL;
urlenp->params = table_create(HTP_URLENP_DEFAULT_PARAMS_SIZE);
if (urlenp->params == NULL) {
free(urlenp);
return NULL;
}
urlenp->_bb = bstr_builder_create();
if (urlenp->_bb == NULL) {
table_destroy(urlenp->params);
free(urlenp);
return NULL;
}
urlenp->argument_separator = '&';
urlenp->decode_url_encoding = 1;
urlenp->_state = HTP_URLENP_STATE_KEY;
return urlenp;
}
/**
* Destroys an existing URLENCODED parser.
*
* @param urlenp
*/
void htp_urlenp_destroy(htp_urlenp_t *urlenp) {
if (urlenp == NULL) return;
if (urlenp->_name != NULL) {
bstr_free(urlenp->_name);
}
bstr_builder_destroy(urlenp->_bb);
// Destroy individual parameters
htp_urlen_param_t *param = NULL;
table_iterator_reset(urlenp->params);
while (table_iterator_next(urlenp->params, (void **) & param) != NULL) {
bstr_free(param->name);
bstr_free(param->value);
free(param);
}
table_destroy(urlenp->params);
free(urlenp);
}
/**
* Finalizes parsing, forcing the parser to convert any outstanding
* data into parameters. This method should be invoked at the end
* of a parsing operation that used htp_urlenp_parse_partial().
*
* @param urlenp
* @return Success indication
*/
int htp_urlenp_finalize(htp_urlenp_t *urlenp) {
return htp_urlenp_parse_complete(urlenp, NULL, 0);
}
/**
* Parses the provided data chunk under the assumption
* that it contains all the data that will be parsed. When this
* method is used for parsing the finalization method should not
* be invoked.
*
* @param urlenp
* @param data
* @param len
* @return
*/
int htp_urlenp_parse_complete(htp_urlenp_t *urlenp, unsigned char *data, size_t len) {
// TODO urlenp->complete must not already be 1
urlenp->_complete = 1;
return htp_urlenp_parse_partial(urlenp, data, len);
}
/**
* Parses the provided data chunk, keeping state to allow streaming parsing, i.e., the
* parsing where only partial information is available at any one time. The method
* htp_urlenp_finalize() must be invoked at the end to finalize parsing.
*
* @param urlenp
* @param data
* @param len
* @return
*/
int htp_urlenp_parse_partial(htp_urlenp_t *urlenp, unsigned char *data, size_t len) {
size_t startpos = 0;
size_t pos = 0;
int c;
if (data == NULL) len = 0;
for (;;) {
// Get the next character, or -1
if (pos < len) c = data[pos];
else c = -1;
// printf("Pos %d C %c state %d\n", pos, c, urlenp->state);
switch (urlenp->_state) {
// Process key
case HTP_URLENP_STATE_KEY:
// Look for =, argument separator, or end of input
if ((c == '=') || (c == urlenp->argument_separator) || (c == -1)) {
// Data from startpos to pos
htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c);
if (c != -1) {
// Next state
startpos = pos + 1;
urlenp->_state = HTP_URLENP_STATE_VALUE;
}
}
break;
// Process value
case HTP_URLENP_STATE_VALUE:
// Look for argument separator or end of input
if ((c == urlenp->argument_separator) || (c == -1)) {
// Data from startpos to pos
htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c);
if (c != -1) {
// Next state
startpos = pos + 1;
urlenp->_state = HTP_URLENP_STATE_KEY;
}
}
break;
}
// Have we reached the end of input?
if (c == -1) break;
pos++;
}
return HTP_OK;
}

@ -0,0 +1,73 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _HTP_URLENCODED_H
#define _HTP_URLENCODED_H
typedef struct htp_urlenp_t htp_urlenp_t;
typedef struct htp_urlen_param_t htp_urlen_param_t;
#include "htp.h"
#define HTP_URLENP_DEFAULT_PARAMS_SIZE 32
#define HTP_URLENP_STATE_KEY 1
#define HTP_URLENP_STATE_VALUE 2
/**
* This is the main URLENCODED parser structure. It is used to store
* parser configuration, temporary parsing data, as well as the parameters.
*/
struct htp_urlenp_t {
/** The character used to separate parameters. Defaults to & and should
* not be changed without good reason.
*/
unsigned char argument_separator;
/** Whether to perform URL-decoding on parameters. */
int decode_url_encoding;
/** This table contains the list of parameters, indexed by name. */
table_t *params;
// Private fields; they are used during the parsing process
int _state;
int _complete;
bstr *_name;
bstr_builder_t *_bb;
};
/**
* Holds one application/x-www-form-urlencoded parameter.
*/
struct htp_urlen_param_t {
/** Parameter name. */
bstr *name;
/** Parameter value. */
bstr *value;
};
htp_urlenp_t *htp_urlenp_create();
void htp_urlenp_destroy(htp_urlenp_t *urlenp);
void htp_urlenp_set_argument_separator(htp_urlenp_t *urlenp, unsigned char argument_separator);
void htp_urlenp_set_decode_url_encoding(htp_urlenp_t *urlenp, int decode_url_encoding);
int htp_urlenp_parse_partial(htp_urlenp_t *urlenp, unsigned char *data, size_t len);
int htp_urlenp_parse_complete(htp_urlenp_t *urlenp, unsigned char *data, size_t len);
int htp_urlenp_finalize(htp_urlenp_t *urlenp);
#endif /* _HTP_URLENCODED_H */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,94 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
/*
Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
and associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
#include "utf8_decoder.h"
static const uint8_t utf8d[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
};
static const uint8_t utf8d_allow_overlong[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df; changed c0 and c1
0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef; changed e0
0x6,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff; changed f0
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
};
inline uint32_t utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
uint32_t type = utf8d[byte];
*codep = (*state != UTF8_ACCEPT) ?
(byte & 0x3fu) | (*codep << 6) :
(0xff >> type) & (byte);
*state = utf8d[256 + *state*16 + type];
return *state;
}
inline uint32_t utf8_decode_allow_overlong(uint32_t* state, uint32_t* codep, uint32_t byte) {
uint32_t type = utf8d_allow_overlong[byte];
*codep = (*state != UTF8_ACCEPT) ?
(byte & 0x3fu) | (*codep << 6) :
(0xff >> type) & (byte);
*state = utf8d[256 + *state*16 + type];
return *state;
}

@ -0,0 +1,55 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _UTF8_DECODER_H
#define _UTF8_DECODER_H
/* HTP changes:
*
* - Changed the name of the function from "decode" to "utf8_decode"
* - Created a separate header file
* - Copied the licence from the web page
* - Created a copy of the data and function "utf8_decode_allow_overlong", which
* does not treat overlong characters as invalid.
*/
/*
Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
and associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdint.h>
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
uint32_t utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte);
uint32_t utf8_decode_allow_overlong(uint32_t* state, uint32_t* codep, uint32_t byte);
#endif /* _UTF8_DECODER_H */

2
htp/m4/.gitignore vendored

@ -0,0 +1,2 @@
*
!.gitignore

@ -0,0 +1,12 @@
check_PROGRAMS = main
main_SOURCES = main.c test.c test.h test-tcpick.c
LDADD = ../htp/.libs/libhtp.a -lz
AM_CFLAGS = -g -O2
#check: all
# ./main

@ -0,0 +1,14 @@
>>>
GET / HTTP/1.0
User-Agent: Mozilla
<<<
HTTP/1.0 200 OK
Date: Mon, 31 Aug 2009 20:25:50 GMT
Server: Apache
Connection: close
Content-Type: text/html
Content-Length: 12
Hello World!

@ -0,0 +1,34 @@
>>>
POST / HTTP/1.0
Content-Length: 12
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla
p=0123456789
<<<
HTTP/1.0 200 OK
Date: Mon, 31 Aug 2009 20:25:50 GMT
Server: Apache
Connection: close
Content-Type: text/html
Content-Length: 12
Hello World!
>>>
GET / HTTP/1.0
<<<
HTTP/1.0 200 OK
Date: Mon, 31 Aug 2009 20:25:50 GMT
Server: Apache
Connection: close
Content-Type: text/html
Transfer-Encoding: chunked
9
012345678
1
9
0

@ -0,0 +1,24 @@
>>>
POST / HTTP/1.1
Transfer-Encoding: chunked
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla
Cookie: 1
9
012345678
1
9
0
Cookie: 2
<<<
HTTP/1.0 200 OK
Date: Mon, 31 Aug 2009 20:25:50 GMT
Server: Apache
Connection: close
Content-Type: text/html
Content-Length: 12
Hello World!

@ -0,0 +1,37 @@
>>>
POST / HTTP/1.1
User-Agent: curl/7.18.2 (i486-pc-linux-gnu) libcurl/7.18.2 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.8 libssh2/0.18
Accept: */*
Content-Length: 216
Expect: 100-continue
Content-Type: multipart/form-data; boundary=----------------------------07869933ca1b
<<<
HTTP/1.1 100 Continue
>>>
------------------------------07869933ca1b
Content-Disposition: form-data; name="file"; filename="404.php"
Content-Type: application/octet-stream
>>>
<? echo "404"; ?>
>>>
------------------------------07869933ca1b--
<<<
HTTP/1.1 200 OK
Date: Tue, 03 Nov 2009 09:27:47 GMT
Server: Apache
Last-Modified: Thu, 30 Apr 2009 12:20:49 GMT
ETag: "2dcada-2d-468c4b9ec6a40"
Accept-Ranges: bytes
Content-Length: 45
Vary: Accept-Encoding
Content-Type: text/html
<html><body><h1>It works!</h1></body></html>

@ -0,0 +1,9 @@
>>>
GET http://username:password@www.example.com:8080/sub/folder/file.jsp?p=q#f HTTP/1.0
<<<
HTTP/1.0 200 OK
Content-Length: 12
Hello World!

@ -0,0 +1,15 @@
>>>
GET /first HTTP/1.1
GET /second HTTP/1.1
<<<
HTTP/1.0 200 OK
Content-Length: 12
Hello World!
HTTP/1.0 200 OK
Content-Length: 12
Hello World!

@ -0,0 +1,18 @@
>>>
GET /first HTTP/1.1
<<<
HTTP/1.0 200 OK
Content-Length: 12
Hello World!
>>>
GET /second HTTP/1.1
<<<
HTTP/1.0 200 OK
Content-Length: 12
Hello World!

@ -0,0 +1,14 @@
>>>
GET / HTTP/1.0
>>>
Host: www.example.com
>>>
<<<
HTTP/1.0 200 OK
Content-Length: 12
Hello World!

@ -0,0 +1,34 @@
>>>
GET / HTTP/1.1
Host: www.example.com
<<<
HTTP/1.0 200 OK
Content-Length: 12
Hello World!
>>>
GET / HTTP/1.1
Host: www.example.com...
<<<
HTTP/1.0 200 OK
Content-Length: 12
>>>
GET / HTTP/1.1
Host: WwW.ExamPle.cOm
<<<
HTTP/1.0 200 OK
Content-Length: 12
>>>
GET / HTTP/1.1
Host: www.example.com:80
<<<
HTTP/1.0 200 OK
Content-Length: 12

@ -0,0 +1,13 @@
>>>
GET / HTTP/1.0
User-Agent: Mozilla
<<<
HTTP/1.0 200 OK
Date: Mon, 31 Aug 2009 20:25:50 GMT
Server: Apache
Connection: close
Content-Type: text/html
Hello World!

@ -0,0 +1,21 @@
>>>
CONNECT www.ssllabs.com:443 HTTP/1.0
<<<
HTTP/1.1 405 Method Not Allowed
Date: Sat, 12 Dec 2009 05:08:45 GMT
Server: Apache/2.2.14 (Unix) mod_ssl/2.2.14 OpenSSL/0.9.8g PHP/5.3.0
Allow: GET,HEAD,POST,OPTIONS,TRACE
Vary: Accept-Encoding
Content-Length: 230
Connection: close
Content-Type: text/html; charset=iso-8859-1
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>405 Method Not Allowed</title>
</head><body>
<h1>Method Not Allowed</h1>
<p>The requested method CONNECT is not allowed for the URL /.</p>
</body></html>

@ -0,0 +1,32 @@
>>>
CONNECT www.feistyduck.com:80 HTTP/1.1
Host: www.feistyduck.com
HEAD / HTTP/1.0
<<<
HTTP/1.1 301 Moved Permanently
Date: Wed, 06 Jan 2010 17:41:34 GMT
Server: Apache
Location: https://www.feistyduck.com/
Vary: Accept-Encoding
Content-Length: 235
Content-Type: text/html; charset=iso-8859-1
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>301 Moved Permanently</title>
</head><body>
<h1>Moved Permanently</h1>
<p>The document has moved <a href="https://www.feistyduck.com/">here</a>.</p>
</body></html>
HTTP/1.1 301 Moved Permanently
Date: Wed, 06 Jan 2010 17:41:46 GMT
Server: Apache
Location: https://www.feistyduck.com/
Vary: Accept-Encoding
Connection: close
Content-Type: text/html; charset=iso-8859-1

@ -0,0 +1,41 @@
>>>
POST /upload.php HTTP/1.1
Host: 192.168.3.100:8080
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: en-us,en;q=0.5
Accept-Encoding: gzip,deflate
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
Keep-Alive: 300
Connection: keep-alive
Content-Type: multipart/form-data; boundary=---------------------------41184676334
Content-Length: 610
-----------------------------41184676334
Content-Disposition: form-data; name="field1"
0123456789
-----------------------------41184676334
Content-Disposition: form-data; name="field2"
9876543210
-----------------------------41184676334
Content-Disposition: form-data; name="file1"; filename="New Text Document.txt"
Content-Type: text/plain
FFFFFFFFFFFFFFFFFFFFFFFFFFFF
-----------------------------41184676334
Content-Disposition: form-data; name="file2"; filename="New Text Document.txt"
Content-Type: text/plain
FFFFFFFFFFFFFFFFFFFFFFFFFFFF
-----------------------------41184676334--
<<<
HTTP/1.0 200 OK
Date: Mon, 31 Aug 2009 20:25:50 GMT
Server: Apache
Connection: close
Content-Type: text/html
Hello World!

@ -0,0 +1,11 @@
>>>
GET /../../images.gif HTTP/1.1
Host: www.ExAmPlE.cOM
<<<
HTTP/1.1 200 OK
Content-Type: text/html
Content-Length: 12
Hello World!

File diff suppressed because it is too large Load Diff

@ -0,0 +1,335 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include <dirent.h>
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>
#include "../htp/htp.h"
#define CLIENT 1
#define SERVER 2
static int parse_filename(const char *filename, char **remote_addr, char **local_addr) {
char *copy = strdup(filename);
char *p, *saveptr;
char *start = copy;
char *q = strrchr(copy, '/');
if (q != NULL) start = q;
q = strrchr(start, '\\');
if (q != NULL) start = q;
int count = 0;
p = strtok_r(start, "_", &saveptr);
while (p != NULL) {
count++;
// printf("%i %s\n", count, p);
switch (count) {
case 3:
*remote_addr = strdup(p);
break;
case 4:
*local_addr = strdup(p);
break;
}
p = strtok_r(NULL, "_", &saveptr);
}
free(copy);
return 0;
}
static int parse_chunk_info(char *buf, size_t *response_offset, size_t *response_len) {
char *p = buf;
size_t lastlen;
while ((*p != ']') && (p != '\0')) p++;
if (*p == '\0') return -1;
p++;
while (isspace(*p)) p++;
*response_offset = bstr_util_memtoip(p, strlen(p), 10, &lastlen);
p += lastlen;
while ((*p != '(') && (p != '\0')) p++;
if (*p == '\0') return -1;
p++;
*response_len = bstr_util_memtoip(p, strlen(p), 10, &lastlen);
return 1;
}
static int tcpick_run_file(const char *filename, htp_cfg_t *cfg, htp_connp_t **connp) {
struct timeval tv;
char buf[1025];
int first = -1, current = -1;
char *remote_addr, *local_addr;
char *request_last_chunk = NULL;
char *response_last_chunk = NULL;
size_t request_offset, request_len;
size_t request_last_offset = 0, request_last_len = 0;
size_t response_offset, response_len;
size_t response_last_offset = 0, response_last_len = 0;
if (parse_filename(filename, &remote_addr, &local_addr) < 0) {
printf("Failed to parse filename: %s\n", filename);
return -1;
}
FILE *f = fopen(filename, "rb");
if (f == NULL) {
printf("Unable to open file: %s\n", filename);
return -1;
}
gettimeofday(&tv, NULL);
// Create parser
*connp = htp_connp_create(cfg);
// Find all chunks and feed them to the parser
while (fgets(buf, 1024, f) != NULL) {
// Ignore empty lines
if (buf[0] == LF) {
continue;
}
if (strncmp(buf, "[server", 7) == 0) {
current = SERVER;
} else {
current = CLIENT;
}
if (first == -1) {
first = current;
if (first == SERVER) {
htp_connp_open(*connp, local_addr, 80, remote_addr, 80, tv.tv_usec);
} else {
htp_connp_open(*connp, remote_addr, 80, local_addr, 80, tv.tv_usec);
}
}
int len = 0;
if (first == current) {
if (parse_chunk_info(buf, &request_offset, &request_len) < 0) {
printf("Invalid line: %s", buf);
fclose(f);
htp_connp_destroy_all(*connp);
*connp = NULL;
return -1;
}
len = request_len;
// printf("# Request offset %i len %i\n", request_offset, request_len);
} else {
if (parse_chunk_info(buf, &response_offset, &response_len) < 0) {
printf("Invalid line: %s", buf);
fclose(f);
htp_connp_destroy_all(*connp);
*connp = NULL;
return -1;
}
len = response_len;
// printf("# Response offset %i len %i\n", response_offset, response_len);
}
// printf("Len: %i\n", len);
if (len <= 0) {
printf("Invalid length: %i\n", len);
fclose(f);
htp_connp_destroy_all(*connp);
*connp = NULL;
return -1;
}
char *data = malloc(len);
if (data == NULL) {
printf("Failed to allocate %i bytes\n", len);
fclose(f);
htp_connp_destroy_all(*connp);
*connp = NULL;
return -1;
}
int read = fread(data, 1, len, f);
if (read != len) {
// printf("Failed to read %i bytes (got %i)\n", len, read);
fclose(f);
htp_connp_destroy_all(*connp);
*connp = NULL;
return -1;
}
if (first == current) {
if ((request_last_chunk == NULL) || (request_len != request_last_len) || (memcmp(data, request_last_chunk, request_len) != 0)) {
// printf("# Parse request data: %i byte(s)\n", len);
if (htp_connp_req_data(*connp, tv.tv_usec, data, len) == HTP_ERROR) {
fclose(f);
return -1;
}
}
request_last_offset = request_offset;
request_last_len = request_len;
if (request_last_chunk != NULL) {
free(request_last_chunk);
}
request_last_chunk = data;
} else {
if ((response_last_chunk == NULL) || (response_len != response_last_len) || (memcmp(data, response_last_chunk, response_len) != 0)) {
// printf("# Parse response data: %i byte(s)\n", len);
if (htp_connp_res_data(*connp, tv.tv_usec, data, len) == HTP_ERROR) {
fclose(f);
return -1;
}
}
response_last_offset = response_offset;
response_last_len = response_len;
if (response_last_chunk != NULL) {
free(response_last_chunk);
}
response_last_chunk = data;
}
}
fclose(f);
htp_connp_close(*connp, tv.tv_usec);
return 1;
}
static void print_tx(htp_connp_t *connp, htp_tx_t *tx) {
char *request_line = bstr_tocstr(tx->request_line);
htp_header_t *h_user_agent = table_getc(tx->request_headers, "user-agent");
htp_header_t *h_referer = table_getc(tx->request_headers, "referer");
char *referer, *user_agent;
char buf[256];
time_t t = time(NULL);
struct tm *tmp = localtime(&t);
strftime(buf, 255, "%d/%b/%Y:%T %z", tmp);
if (h_user_agent == NULL) user_agent = strdup("-");
else {
user_agent = bstr_tocstr(h_user_agent->value);
}
if (h_referer == NULL) referer = strdup("-");
else {
referer = bstr_tocstr(h_referer->value);
}
printf("%s - - [%s] \"%s\" %i %i \"%s\" \"%s\"\n", connp->conn->remote_addr, buf,
request_line, tx->response_status_number, tx->response_message_len,
referer, user_agent);
free(referer);
free(user_agent);
free(request_line);
}
static int run_file(char *filename, htp_cfg_t *cfg) {
htp_connp_t *connp;
fprintf(stdout, "Running file %s", filename);
int rc = tcpick_run_file(filename, cfg, &connp);
if (rc < 0) {
if (connp != NULL) {
htp_log_t *last_error = htp_connp_get_last_error(connp);
if (last_error != NULL) {
printf(" -- failed: %s\n", last_error->msg);
} else {
printf(" -- failed: ERROR NOT AVAILABLE\n");
}
return 0;
} else {
return -1;
}
} else {
printf(" -- %i transaction(s)\n", list_size(connp->conn->transactions));
htp_tx_t *tx = NULL;
list_iterator_reset(connp->conn->transactions);
while ((tx = list_iterator_next(connp->conn->transactions)) != NULL) {
printf(" ");
print_tx(connp, tx);
}
printf("\n");
htp_connp_destroy_all(connp);
return 1;
}
}
static int run_directory(char *dirname, htp_cfg_t *cfg) {
struct dirent *entry;
char buf[1025];
DIR *d = opendir(dirname);
if (d == NULL) {
printf("Failed to open directory: %s\n", dirname);
return -1;
}
while ((entry = readdir(d)) != NULL) {
if (strncmp(entry->d_name, "tcpick", 6) == 0) {
strncpy(buf, dirname, 1024);
strncat(buf, "/", 1024 - strlen(buf));
strncat(buf, entry->d_name, 1024 - strlen(buf));
// fprintf(stderr, "Filename: %s\n", buf);
run_file(buf, cfg);
//if (run_file(buf, cfg) <= 0) {
// closedir(d);
// return 0;
//}
}
}
closedir(d);
return 1;
}
int main_xxx(int argc, char** argv) {
htp_cfg_t *cfg = htp_config_create();
//run_file("c:/http_traces/run1//tcpick_000015_192.168.1.67_66.249.80.118_www.both.dat", cfg);
run_directory("c:/http_traces/run1/", cfg);
return 0;
}

@ -0,0 +1,375 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include "../htp/htp.h"
#include "test.h"
/**
* Destroys a test.
*
* @param test
*/
static void test_destroy(test_t *test) {
if (test->buf != NULL) {
free(test->buf);
test->buf = NULL;
}
}
/**
* Checks if there's a chunk boundary at the given position.
*
* @param test
* @param pos
* @return Zero if there is no boundary, SERVER or CLIENT if a boundary
* was found, and a negative value on error (e.g., not enough data
* to determine if a boundary is present).
*/
static int test_is_boundary(test_t *test, int pos) {
// Check that there's enough room
if (pos + 3 >= test->len) return -1;
if ((test->buf[pos] == '<') && (test->buf[pos + 1] == '<') && (test->buf[pos + 2] == '<')) {
if (test->buf[pos + 3] == '\n') {
return SERVER;
}
if (test->buf[pos + 3] == '\r') {
if (pos + 4 >= test->len) return -1;
else if (test->buf[pos + 4] == '\n') {
return SERVER;
}
}
}
if ((test->buf[pos] == '>') && (test->buf[pos + 1] == '>') && (test->buf[pos + 2] == '>')) {
if (test->buf[pos + 3] == '\n') {
return CLIENT;
}
if (test->buf[pos + 3] == '\r') {
if (pos + 4 >= test->len) return -1;
else if (test->buf[pos + 4] == '\n') {
return CLIENT;
}
}
}
return 0;
}
/**
* Initializes test by loading the entire data file into a memory block.
*
* @param test
* @param filename
* @return Non-negative value on success, negative value on error.
*/
static int test_init(test_t *test, const char *filename) {
memset(test, 0, sizeof (test_t));
int fd = open(filename, O_RDONLY | O_BINARY);
if (fd < 0) return -1;
struct stat buf;
if (fstat(fd, &buf) < 0) {
return -1;
}
test->buf = malloc(buf.st_size);
test->len = 0;
test->pos = 0;
int bytes_read = 0;
while ((bytes_read = read(fd, test->buf + test->len, buf.st_size - test->len)) > 0) {
test->len += bytes_read;
}
if (test->len != buf.st_size) {
free(test->buf);
return -2;
}
close(fd);
return 1;
}
void test_start(test_t *test) {
test->pos = 0;
}
/**
* Finds the next data chunk in the given test.
*
* @param test
* @return One if a chunk is found or zero if there are no more chunks in the test. On
* success, test->chunk will point to the beginning of the chunk, while
* test->chunk_len will contain its length.
*/
static int test_next_chunk(test_t *test) {
if (test->pos >= test->len) {
return 0;
}
test->chunk = NULL;
while (test->pos < test->len) {
// Do we need to start another chunk?
if (test->chunk == NULL) {
// Are we at a boundary
test->chunk_direction = test_is_boundary(test, test->pos);
if (test->chunk_direction <= 0) {
// Error
return -1;
}
// Move over the boundary
test->pos += 4;
if (test->buf[test->pos] == '\n') test->pos++;
// Start new chunk
test->chunk = test->buf + test->pos;
test->chunk_offset = test->pos;
}
// Are we at the end of a line?
if (test->buf[test->pos] == '\n') {
int r = test_is_boundary(test, test->pos + 1);
if ((r == CLIENT) || (r == SERVER)) {
// We got ourselves a chunk
test->chunk_len = test->pos - test->chunk_offset;
// Remove one '\r' (in addition to the '\n' that we've already removed),
// which belongs to the next boundary
if ((test->chunk_len > 0) && (test->chunk[test->chunk_len - 1] == '\r')) {
test->chunk_len--;
}
// Position at the next boundary line
test->pos++;
return 1;
}
}
test->pos++;
}
if (test->chunk != NULL) {
test->chunk_len = test->pos - test->chunk_offset;
return 1;
}
return 0;
}
static int parse_filename(const char *filename, char **remote_addr, int *remote_port, char **local_addr, int *local_port) {
char *copy = strdup(filename);
char *p, *saveptr;
char *start = copy;
char *q = strrchr(copy, '/');
if (q != NULL) start = q;
q = strrchr(start, '\\');
if (q != NULL) start = q;
int count = 0;
p = strtok_r(start, "_", &saveptr);
while (p != NULL) {
count++;
// printf("%i %s\n", count, p);
switch (count) {
case 2:
*remote_addr = strdup(p);
break;
case 3:
*remote_port = atoi(p);
break;
case 4:
*local_addr = strdup(p);
case 5:
*local_port = atoi(p);
break;
}
p = strtok_r(NULL, "_", &saveptr);
}
free(copy);
return 0;
}
/**
* Runs a single test.
*
* @param filename
* @param cfg
* @return A pointer to the instance of htp_connp_t created during
* the test, or NULL if the test failed for some reason.
*/
int test_run(const char *testsdir, const char *testname, htp_cfg_t *cfg, htp_connp_t **connp) {
char filename[1025];
test_t test;
struct timeval tv_start, tv_end;
int rc;
*connp = NULL;
strncpy(filename, testsdir, 1024);
strncat(filename, "/", 1024 - strlen(filename));
strncat(filename, testname, 1024 - strlen(filename));
printf("Filename: %s\n", filename);
// Initinialize test
rc = test_init(&test, filename);
if (rc < 0) {
return rc;
}
gettimeofday(&tv_start, NULL);
test_start(&test);
// Create parser
*connp = htp_connp_create(cfg);
if (*connp == NULL) {
fprintf(stderr, "Failed to create connection parser\n");
exit(1);
}
htp_connp_set_user_data(*connp, (void *) 0x02);
// Does the filename contain connection metdata?
if (strncmp(testname, "stream", 6) == 0) {
// It does; use it
char *remote_addr, *local_addr;
int remote_port, local_port;
parse_filename(testname, &remote_addr, &remote_port, &local_addr, &local_port);
htp_connp_open(*connp, (const char *) remote_addr, remote_port, (const char *) local_addr, local_port, tv_start.tv_usec);
free(remote_addr);
free(local_addr);
} else {
// No connection metadata; provide some fake information instead
htp_connp_open(*connp, (const char *) "127.0.0.1", 10000, (const char *) "127.0.0.1", 80, tv_start.tv_usec);
}
// Find all chunks and feed them to the parser
int in_data_other = 0;
char *in_data;
size_t in_data_len;
size_t in_data_offset;
int out_data_other = 0;
char *out_data;
size_t out_data_len;
size_t out_data_offset;
for (;;) {
if (test_next_chunk(&test) <= 0) {
break;
}
if (test.chunk_direction == CLIENT) {
if (in_data_other) {
test_destroy(&test);
fprintf(stderr, "Unable to buffer more than one inbound chunk.\n");
return -1;
}
int rc = htp_connp_req_data(*connp, tv_start.tv_usec, test.chunk, test.chunk_len);
if (rc == STREAM_STATE_ERROR) {
test_destroy(&test);
return -101;
}
if (rc == STREAM_STATE_DATA_OTHER) {
// Parser needs to see the outbound stream in order to continue
// parsing the inbound stream.
in_data_other = 1;
in_data = test.chunk;
in_data_len = test.chunk_len;
in_data_offset = htp_connp_req_data_consumed(*connp);
}
} else {
if (out_data_other) {
int rc = htp_connp_res_data(*connp, tv_start.tv_usec, out_data + out_data_offset, out_data_len - out_data_offset);
if (rc == STREAM_STATE_ERROR) {
test_destroy(&test);
return -104;
}
out_data_other = 0;
}
int rc = htp_connp_res_data(*connp, tv_start.tv_usec, test.chunk, test.chunk_len);
if (rc == STREAM_STATE_ERROR) {
test_destroy(&test);
return -102;
}
if (rc == STREAM_STATE_DATA_OTHER) {
// Parser needs to see the outbound stream in order to continue
// parsing the inbound stream.
out_data_other = 1;
out_data = test.chunk;
out_data_len = test.chunk_len;
out_data_offset = htp_connp_res_data_consumed(*connp);
// printf("# YYY out offset is %d\n", out_data_offset);
}
if (in_data_other) {
int rc = htp_connp_req_data(*connp, tv_start.tv_usec, in_data + in_data_offset, in_data_len - in_data_offset);
if (rc == STREAM_STATE_ERROR) {
test_destroy(&test);
return -103;
}
in_data_other = 0;
}
}
}
if (out_data_other) {
int rc = htp_connp_res_data(*connp, tv_start.tv_usec, out_data + out_data_offset, out_data_len - out_data_offset);
if (rc == STREAM_STATE_ERROR) {
test_destroy(&test);
return -104;
}
out_data_other = 0;
}
gettimeofday(&tv_end, NULL);
// Close the connection
htp_connp_close(*connp, tv_end.tv_usec);
// printf("Parsing time: %i\n", tv_end.tv_usec - tv_start.tv_usec);
// Clean up
test_destroy(&test);
return 1;
}

@ -0,0 +1,48 @@
/*
* LibHTP (http://www.libhtp.org)
* Copyright 2009,2010 Ivan Ristic <ivanr@webkreator.com>
*
* LibHTP is an open source product, released under terms of the General Public Licence
* version 2 (GPLv2). Please refer to the file LICENSE, which contains the complete text
* of the license.
*
* In addition, there is a special exception that allows LibHTP to be freely
* used with any OSI-approved open source licence. Please refer to the file
* LIBHTP_LICENSING_EXCEPTION for the full text of the exception.
*
*/
#ifndef _TEST_H
#define _TEST_H
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#define UNKNOWN 0
#define CLIENT 1
#define SERVER 2
#ifndef O_BINARY
#define O_BINARY 0
#endif
typedef struct test_t test_t;
struct test_t {
char *buf;
size_t pos;
size_t len;
char *chunk;
size_t chunk_offset;
size_t chunk_len;
int chunk_direction;
};
int test_run(const char *testsdir, const char *testname, htp_cfg_t *cfg, htp_connp_t **connp);
#endif /* _TEST_H */

@ -0,0 +1,29 @@
#!/bin/bash
# Are we in the correct working directory?
if [ ! -f ./htp/htp.c ]
then
echo "ERROR: Please invoke this script from the main directory (of libhtp)"
exit
fi
# Test for the presence of svnversion
if [ -z `which svnversion` ]
then
echo "ERROR: Unable to retrieve the revision number because 'svnversion' could not be found"
exit
fi
if [ -z `which sed` ]
then
echo "ERROR: Unable to retrieve the revision number because 'sed' could not be found"
exit
fi
# Retrieve the revision number
REV=`svnversion -n`
#echo $REV
sed -e "s/\$REVISION_MISSING/$REV/" -i ./htp/htp.c

@ -187,8 +187,14 @@ win32-syslog.h
# set the include path found by configure
INCLUDES= $(all_includes)
# the library search path.
suricata_LDFLAGS = $(all_libraries)
#suricata_LDADD = -lnetfilter_queue -lpthread -lpcre
suricata_LDFLAGS = $(all_libraries)
if BUILD_LIBHTP
suricata_LDADD = $(top_builddir)/htp/htp/libhtp.la
INCLUDES += -I$(top_builddir)/htp
endif
#suricata_CFLAGS = -Wall -fno-strict-aliasing

Loading…
Cancel
Save