Compare commits

..

13 Commits
v0.2 ... v0.2.2

Author SHA1 Message Date
Aldo Cortesi
f8e10bd6ae Bump version. 2012-10-31 22:26:09 +13:00
Aldo Cortesi
6517d9e717 More info on disconnect exception. 2012-10-14 09:03:23 +13:00
Aldo Cortesi
77869634e2 Limit reads to block length. 2012-10-09 16:25:15 +13:00
Aldo Cortesi
15679e010d Add a settimeout method to tcp.BaseHandler. 2012-10-01 11:30:02 +13:00
Aldo Cortesi
064b4c8001 Make cleanBin escape carriage returns.
We get confusing output on terminals if we leave \r unescaped.
2012-09-27 10:59:46 +12:00
Aldo Cortesi
b308824193 Create netlib.utils, move cleanBin and hexdump from libmproxy.utils. 2012-09-24 11:21:48 +12:00
Aldo Cortesi
3a21e28bf1 Split FileLike into Writer and Reader, and add logging functionality. 2012-09-24 11:10:21 +12:00
Aldo Cortesi
8a6cca530c Don't create fresh FileLike objects when converting to SSL 2012-09-24 10:47:41 +12:00
Aldo Cortesi
1c80c2fdd7 Add a collection of standard User-Agent strings.
These will be used in both mitmproxy and pathod.
2012-09-01 23:04:44 +12:00
Aldo Cortesi
33557245bf v0.2.1 2012-08-23 12:57:22 +12:00
Aldo Cortesi
877a3e2062 Add a get_first convenience function to ODict. 2012-08-18 18:14:13 +12:00
Aldo Cortesi
1c21a28e64 read_headers: handle some crashes, return None on invalid data. 2012-07-30 12:50:35 +12:00
Aldo Cortesi
eafa5566c2 Handle disconnects on flush. 2012-07-30 11:30:31 +12:00
12 changed files with 337 additions and 56 deletions

14
README
View File

@@ -1,11 +1,7 @@
Netlib is a collection of network utility classes, used by pathod and mitmproxy
projects. It differs from other projects in some fundamental respects, because
both pathod and mitmproxy often need to violate standards. This means that
protocols are implemented as small, well-contained and flexible functions, and
servers are implemented to allow misbehaviour when needed.
At this point, I have no plans to make netlib useful beyond mitmproxy and
pathod. Please get in touch if you think parts of netlib might have broader
utility.
Netlib is a collection of network utility classes, used by the pathod and
mitmproxy projects. It differs from other projects in some fundamental
respects, because both pathod and mitmproxy often need to violate standards.
This means that protocols are implemented as small, well-contained and flexible
functions, and are designed to allow misbehaviour when needed.

View File

@@ -36,8 +36,8 @@ def parse_url(url):
def read_headers(fp):
"""
Read a set of headers from a file pointer. Stop once a blank line
is reached. Return a ODictCaseless object.
Read a set of headers from a file pointer. Stop once a blank line is
reached. Return a ODictCaseless object, or None if headers are invalid.
"""
ret = []
name = ''
@@ -46,6 +46,8 @@ def read_headers(fp):
if not line or line == '\r\n' or line == '\n':
break
if line[0] in ' \t':
if not ret:
return None
# continued header
ret[-1][1] = ret[-1][1] + '\r\n ' + line.strip()
else:
@@ -55,6 +57,8 @@ def read_headers(fp):
name = line[:i]
value = line[i+1:].strip()
ret.append([name, value])
else:
return None
return odict.ODictCaseless(ret)
@@ -282,6 +286,8 @@ def read_response(rfile, method, body_size_limit):
except ValueError:
raise HttpError(502, "Invalid server response: %s"%repr(line))
headers = read_headers(rfile)
if headers is None:
raise HttpError(502, "Invalid headers.")
if code >= 100 and code <= 199:
return read_response(rfile, method, body_size_limit)
if method == "HEAD" or code == 204 or code == 304:

77
netlib/http_uastrings.py Normal file
View File

@@ -0,0 +1,77 @@
"""
A small collection of useful user-agent header strings. These should be
kept reasonably current to reflect common usage.
"""
# A collection of (name, shortcut, string) tuples.
UASTRINGS = [
(
"android",
"a",
"Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Nexus 7 Build/JRO03D) AFL/01.04.02"
),
(
"blackberry",
"l",
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+"
),
(
"bingbot",
"b",
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
),
(
"chrome",
"c",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"
),
(
"firefox",
"f",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120405 Firefox/14.0a1"
),
(
"googlebot",
"g",
"Googlebot/2.1 (+http://www.googlebot.com/bot.html)"
),
(
"ie9",
"i",
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))"
),
(
"ipad",
"p",
"Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko ) Version/5.1 Mobile/9B176 Safari/7534.48.3"
),
(
"iphone",
"h",
"Mozilla/5.0 (iPhone; CPU iPhone OS 4_2_1 like Mac OS X) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5",
),
(
"safari",
"s",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10"
)
]
def get_by_shortcut(s):
"""
Retrieve a user agent entry by shortcut.
"""
for i in UASTRINGS:
if s == i[1]:
return i

View File

@@ -80,6 +80,12 @@ class ODict:
else:
return d
def get_first(self, k, d=None):
if k in self:
return self[k][0]
else:
return d
def items(self):
return self.lst[:]

View File

@@ -39,27 +39,84 @@ class NetLibDisconnect(Exception): pass
class NetLibTimeout(Exception): pass
class FileLike:
class _FileLike:
BLOCKSIZE = 1024 * 32
def __init__(self, o):
self.o = o
self._log = None
def set_descriptor(self, o):
self.o = o
def __getattr__(self, attr):
return getattr(self.o, attr)
def flush(self):
if hasattr(self.o, "flush"):
self.o.flush()
def start_log(self):
"""
Starts or resets the log.
This will store all bytes read or written.
"""
self._log = []
def stop_log(self):
"""
Stops the log.
"""
self._log = None
def is_logging(self):
return self._log is not None
def get_log(self):
"""
Returns the log as a string.
"""
if not self.is_logging():
raise ValueError("Not logging!")
return "".join(self._log)
def add_log(self, v):
if self.is_logging():
self._log.append(v)
class Writer(_FileLike):
def flush(self):
try:
if hasattr(self.o, "flush"):
self.o.flush()
except socket.error, v:
raise NetLibDisconnect(str(v))
def write(self, v):
if v:
try:
if hasattr(self.o, "sendall"):
self.add_log(v)
return self.o.sendall(v)
else:
r = self.o.write(v)
self.add_log(v[:r])
return r
except (SSL.Error, socket.error), v:
raise NetLibDisconnect(str(v))
class Reader(_FileLike):
def read(self, length):
"""
If length is None, we read until connection closes.
If length is -1, we read until connection closes.
"""
result = ''
start = time.time()
while length == -1 or length > 0:
if length == -1 or length > self.BLOCKSIZE:
rlen = self.BLOCKSIZE
else:
rlen = length
try:
data = self.o.read(self.BLOCKSIZE if length == -1 else length)
data = self.o.read(rlen)
except SSL.ZeroReturnError:
break
except SSL.WantReadError:
@@ -79,19 +136,9 @@ class FileLike:
result += data
if length != -1:
length -= len(data)
self.add_log(result)
return result
def write(self, v):
if v:
try:
if hasattr(self.o, "sendall"):
return self.o.sendall(v)
else:
r = self.o.write(v)
return r
except (SSL.Error, socket.error):
raise NetLibDisconnect()
def readline(self, size = None):
result = ''
bytes_read = 0
@@ -137,16 +184,16 @@ class TCPClient:
except SSL.Error, v:
raise NetLibError("SSL handshake error: %s"%str(v))
self.cert = certutils.SSLCert(self.connection.get_peer_certificate())
self.rfile = FileLike(self.connection)
self.wfile = FileLike(self.connection)
self.rfile.set_descriptor(self.connection)
self.wfile.set_descriptor(self.connection)
def connect(self):
try:
addr = socket.gethostbyname(self.host)
connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
connection.connect((addr, self.port))
self.rfile = FileLike(connection.makefile('rb', self.rbufsize))
self.wfile = FileLike(connection.makefile('wb', self.wbufsize))
self.rfile = Reader(connection.makefile('rb', self.rbufsize))
self.wfile = Writer(connection.makefile('wb', self.wbufsize))
except socket.error, err:
raise NetLibError('Error connecting to "%s": %s' % (self.host, err))
self.connection = connection
@@ -180,8 +227,8 @@ class BaseHandler:
wbufsize = -1
def __init__(self, connection, client_address, server):
self.connection = connection
self.rfile = FileLike(self.connection.makefile('rb', self.rbufsize))
self.wfile = FileLike(self.connection.makefile('wb', self.wbufsize))
self.rfile = Reader(self.connection.makefile('rb', self.rbufsize))
self.wfile = Writer(self.connection.makefile('wb', self.wbufsize))
self.client_address = client_address
self.server = server
@@ -206,8 +253,8 @@ class BaseHandler:
self.connection.do_handshake()
except SSL.Error, v:
raise NetLibError("SSL handshake error: %s"%str(v))
self.rfile = FileLike(self.connection)
self.wfile = FileLike(self.connection)
self.rfile.set_descriptor(self.connection)
self.wfile.set_descriptor(self.connection)
def finish(self):
self.finished = True
@@ -241,6 +288,9 @@ class BaseHandler:
def handle(self): # pragma: no cover
raise NotImplementedError
def settimeout(self, n):
self.connection.settimeout(n)
def close(self):
"""
Does a hard close of the socket, i.e. a shutdown, followed by a close.

36
netlib/utils.py Normal file
View File

@@ -0,0 +1,36 @@
def cleanBin(s, fixspacing=False):
"""
Cleans binary data to make it safe to display. If fixspacing is True,
tabs, newlines and so forth will be maintained, if not, they will be
replaced with a placeholder.
"""
parts = []
for i in s:
o = ord(i)
if (o > 31 and o < 127):
parts.append(i)
elif i in "\n\t" and not fixspacing:
parts.append(i)
else:
parts.append(".")
return "".join(parts)
def hexdump(s):
"""
Returns a set of tuples:
(offset, hex, str)
"""
parts = []
for i in range(0, len(s), 16):
o = "%.10x"%i
part = s[i:i+16]
x = " ".join("%.2x"%ord(i) for i in part)
if len(part) < 16:
x += " "
x += " ".join(" " for i in range(16 - len(part)))
parts.append(
(o, x, cleanBin(part, True))
)
return parts

View File

@@ -1,4 +1,4 @@
IVERSION = (0, 2)
IVERSION = (0, 2, 2)
VERSION = ".".join(str(i) for i in IVERSION)
NAME = "netlib"
NAMEVERSION = NAME + " " + VERSION

View File

@@ -169,16 +169,20 @@ def test_parse_init_http():
class TestReadHeaders:
def _read(self, data, verbatim=False):
if not verbatim:
data = textwrap.dedent(data)
data = data.strip()
s = cStringIO.StringIO(data)
return http.read_headers(s)
def test_read_simple(self):
data = """
Header: one
Header2: two
\r\n
"""
data = textwrap.dedent(data)
data = data.strip()
s = cStringIO.StringIO(data)
h = http.read_headers(s)
h = self._read(data)
assert h.lst == [["Header", "one"], ["Header2", "two"]]
def test_read_multi(self):
@@ -187,10 +191,7 @@ class TestReadHeaders:
Header: two
\r\n
"""
data = textwrap.dedent(data)
data = data.strip()
s = cStringIO.StringIO(data)
h = http.read_headers(s)
h = self._read(data)
assert h.lst == [["Header", "one"], ["Header", "two"]]
def test_read_continued(self):
@@ -200,12 +201,19 @@ class TestReadHeaders:
Header2: three
\r\n
"""
data = textwrap.dedent(data)
data = data.strip()
s = cStringIO.StringIO(data)
h = http.read_headers(s)
h = self._read(data)
assert h.lst == [["Header", "one\r\n two"], ["Header2", "three"]]
def test_read_continued_err(self):
data = "\tfoo: bar\r\n"
assert self._read(data, True) is None
def test_read_err(self):
data = """
foo
"""
assert self._read(data) is None
def test_read_response():
def tst(data, method, limit):
@@ -248,6 +256,14 @@ def test_read_response():
assert tst(data, "GET", None)[4] == 'foo'
assert tst(data, "HEAD", None)[4] == ''
data = """
HTTP/1.1 200 OK
\tContent-Length: 3
foo
"""
tutils.raises("invalid headers", tst, data, "GET", None)
def test_parse_url():
assert not http.parse_url("")

View File

@@ -0,0 +1,7 @@
from netlib import http_uastrings
def test_get_shortcut():
assert http_uastrings.get_by_shortcut("c")[0] == "chrome"
assert not http_uastrings.get_by_shortcut("_")

View File

@@ -85,6 +85,12 @@ class TestODict:
assert self.od.get("one") == ["two"]
assert self.od.get("two") == None
def test_get_first(self):
self.od.add("one", "two")
self.od.add("one", "three")
assert self.od.get_first("one") == "two"
assert self.od.get_first("two") == None
class TestODictCaseless:
def setUp(self):

View File

@@ -28,6 +28,11 @@ class ServerTestBase:
cls.server.shutdown()
@property
def last_handler(self):
return self.server.server.last_handler
class SNIHandler(tcp.BaseHandler):
sni = None
def handle_sni(self, connection):
@@ -63,15 +68,27 @@ class HangHandler(tcp.BaseHandler):
time.sleep(1)
class TimeoutHandler(tcp.BaseHandler):
def handle(self):
self.timeout = False
self.settimeout(0.01)
try:
self.rfile.read(10)
except tcp.NetLibTimeout:
self.timeout = True
class TServer(tcp.TCPServer):
def __init__(self, addr, ssl, q, handler, v3_only=False):
def __init__(self, addr, ssl, q, handler_klass, v3_only=False):
tcp.TCPServer.__init__(self, addr)
self.ssl, self.q = ssl, q
self.v3_only = v3_only
self.handler = handler
self.handler_klass = handler_klass
self.last_handler = None
def handle_connection(self, request, client_address):
h = self.handler(request, client_address, self)
h = self.handler_klass(request, client_address, self)
self.last_handler = h
if self.ssl:
if self.v3_only:
method = tcp.SSLv3_METHOD
@@ -194,12 +211,24 @@ class TestDisconnect(ServerTestBase):
c.close()
class TestServerTimeOut(ServerTestBase):
@classmethod
def makeserver(cls):
return TServer(("127.0.0.1", 0), False, cls.q, TimeoutHandler)
def test_timeout(self):
c = tcp.TCPClient("127.0.0.1", self.port)
c.connect()
time.sleep(0.3)
assert self.last_handler.timeout
class TestTimeOut(ServerTestBase):
@classmethod
def makeserver(cls):
return TServer(("127.0.0.1", 0), False, cls.q, HangHandler)
def test_timeout_client(self):
def test_timeout(self):
c = tcp.TCPClient("127.0.0.1", self.port)
c.connect()
c.settimeout(0.1)
@@ -226,10 +255,21 @@ class TestTCPClient:
class TestFileLike:
def test_blocksize(self):
s = cStringIO.StringIO("1234567890abcdefghijklmnopqrstuvwxyz")
s = tcp.Reader(s)
s.BLOCKSIZE = 2
assert s.read(1) == "1"
assert s.read(2) == "23"
assert s.read(3) == "456"
assert s.read(4) == "7890"
d = s.read(-1)
assert d.startswith("abc") and d.endswith("xyz")
def test_wrap(self):
s = cStringIO.StringIO("foobar\nfoobar")
s = tcp.FileLike(s)
s.flush()
s = tcp.Reader(s)
assert s.readline() == "foobar\n"
assert s.readline() == "foobar"
# Test __getattr__
@@ -237,11 +277,39 @@ class TestFileLike:
def test_limit(self):
s = cStringIO.StringIO("foobar\nfoobar")
s = tcp.FileLike(s)
s = tcp.Reader(s)
assert s.readline(3) == "foo"
def test_limitless(self):
s = cStringIO.StringIO("f"*(50*1024))
s = tcp.FileLike(s)
s = tcp.Reader(s)
ret = s.read(-1)
assert len(ret) == 50 * 1024
def test_readlog(self):
s = cStringIO.StringIO("foobar\nfoobar")
s = tcp.Reader(s)
assert not s.is_logging()
s.start_log()
assert s.is_logging()
s.readline()
assert s.get_log() == "foobar\n"
s.read(1)
assert s.get_log() == "foobar\nf"
s.start_log()
assert s.get_log() == ""
s.read(1)
assert s.get_log() == "o"
s.stop_log()
tutils.raises(ValueError, s.get_log)
def test_writelog(self):
s = cStringIO.StringIO()
s = tcp.Writer(s)
s.start_log()
assert s.is_logging()
s.write("x")
assert s.get_log() == "x"
s.write("x")
assert s.get_log() == "xx"

13
test/test_utils.py Normal file
View File

@@ -0,0 +1,13 @@
from netlib import utils
def test_hexdump():
assert utils.hexdump("one\0"*10)
def test_cleanBin():
assert utils.cleanBin("one") == "one"
assert utils.cleanBin("\00ne") == ".ne"
assert utils.cleanBin("\nne") == "\nne"
assert utils.cleanBin("\nne", True) == ".ne"