Compare commits

..

13 Commits
v0.2 ... v0.2.2

Author SHA1 Message Date
Aldo Cortesi
f8e10bd6ae Bump version. 2012-10-31 22:26:09 +13:00
Aldo Cortesi
6517d9e717 More info on disconnect exception. 2012-10-14 09:03:23 +13:00
Aldo Cortesi
77869634e2 Limit reads to block length. 2012-10-09 16:25:15 +13:00
Aldo Cortesi
15679e010d Add a settimeout method to tcp.BaseHandler. 2012-10-01 11:30:02 +13:00
Aldo Cortesi
064b4c8001 Make cleanBin escape carriage returns.
We get confusing output on terminals if we leave \r unescaped.
2012-09-27 10:59:46 +12:00
Aldo Cortesi
b308824193 Create netlib.utils, move cleanBin and hexdump from libmproxy.utils. 2012-09-24 11:21:48 +12:00
Aldo Cortesi
3a21e28bf1 Split FileLike into Writer and Reader, and add logging functionality. 2012-09-24 11:10:21 +12:00
Aldo Cortesi
8a6cca530c Don't create fresh FileLike objects when converting to SSL 2012-09-24 10:47:41 +12:00
Aldo Cortesi
1c80c2fdd7 Add a collection of standard User-Agent strings.
These will be used in both mitmproxy and pathod.
2012-09-01 23:04:44 +12:00
Aldo Cortesi
33557245bf v0.2.1 2012-08-23 12:57:22 +12:00
Aldo Cortesi
877a3e2062 Add a get_first convenience function to ODict. 2012-08-18 18:14:13 +12:00
Aldo Cortesi
1c21a28e64 read_headers: handle some crashes, return None on invalid data. 2012-07-30 12:50:35 +12:00
Aldo Cortesi
eafa5566c2 Handle disconnects on flush. 2012-07-30 11:30:31 +12:00
12 changed files with 337 additions and 56 deletions

14
README
View File

@@ -1,11 +1,7 @@
Netlib is a collection of network utility classes, used by pathod and mitmproxy Netlib is a collection of network utility classes, used by the pathod and
projects. It differs from other projects in some fundamental respects, because mitmproxy projects. It differs from other projects in some fundamental
both pathod and mitmproxy often need to violate standards. This means that respects, because both pathod and mitmproxy often need to violate standards.
protocols are implemented as small, well-contained and flexible functions, and This means that protocols are implemented as small, well-contained and flexible
servers are implemented to allow misbehaviour when needed. functions, and are designed to allow misbehaviour when needed.
At this point, I have no plans to make netlib useful beyond mitmproxy and
pathod. Please get in touch if you think parts of netlib might have broader
utility.

View File

@@ -36,8 +36,8 @@ def parse_url(url):
def read_headers(fp): def read_headers(fp):
""" """
Read a set of headers from a file pointer. Stop once a blank line Read a set of headers from a file pointer. Stop once a blank line is
is reached. Return a ODictCaseless object. reached. Return a ODictCaseless object, or None if headers are invalid.
""" """
ret = [] ret = []
name = '' name = ''
@@ -46,6 +46,8 @@ def read_headers(fp):
if not line or line == '\r\n' or line == '\n': if not line or line == '\r\n' or line == '\n':
break break
if line[0] in ' \t': if line[0] in ' \t':
if not ret:
return None
# continued header # continued header
ret[-1][1] = ret[-1][1] + '\r\n ' + line.strip() ret[-1][1] = ret[-1][1] + '\r\n ' + line.strip()
else: else:
@@ -55,6 +57,8 @@ def read_headers(fp):
name = line[:i] name = line[:i]
value = line[i+1:].strip() value = line[i+1:].strip()
ret.append([name, value]) ret.append([name, value])
else:
return None
return odict.ODictCaseless(ret) return odict.ODictCaseless(ret)
@@ -282,6 +286,8 @@ def read_response(rfile, method, body_size_limit):
except ValueError: except ValueError:
raise HttpError(502, "Invalid server response: %s"%repr(line)) raise HttpError(502, "Invalid server response: %s"%repr(line))
headers = read_headers(rfile) headers = read_headers(rfile)
if headers is None:
raise HttpError(502, "Invalid headers.")
if code >= 100 and code <= 199: if code >= 100 and code <= 199:
return read_response(rfile, method, body_size_limit) return read_response(rfile, method, body_size_limit)
if method == "HEAD" or code == 204 or code == 304: if method == "HEAD" or code == 204 or code == 304:

77
netlib/http_uastrings.py Normal file
View File

@@ -0,0 +1,77 @@
"""
A small collection of useful user-agent header strings. These should be
kept reasonably current to reflect common usage.
"""
# A collection of (name, shortcut, string) tuples.
UASTRINGS = [
(
"android",
"a",
"Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Nexus 7 Build/JRO03D) AFL/01.04.02"
),
(
"blackberry",
"l",
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+"
),
(
"bingbot",
"b",
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
),
(
"chrome",
"c",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"
),
(
"firefox",
"f",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120405 Firefox/14.0a1"
),
(
"googlebot",
"g",
"Googlebot/2.1 (+http://www.googlebot.com/bot.html)"
),
(
"ie9",
"i",
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))"
),
(
"ipad",
"p",
"Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko ) Version/5.1 Mobile/9B176 Safari/7534.48.3"
),
(
"iphone",
"h",
"Mozilla/5.0 (iPhone; CPU iPhone OS 4_2_1 like Mac OS X) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5",
),
(
"safari",
"s",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10"
)
]
def get_by_shortcut(s):
"""
Retrieve a user agent entry by shortcut.
"""
for i in UASTRINGS:
if s == i[1]:
return i

View File

@@ -80,6 +80,12 @@ class ODict:
else: else:
return d return d
def get_first(self, k, d=None):
if k in self:
return self[k][0]
else:
return d
def items(self): def items(self):
return self.lst[:] return self.lst[:]

View File

@@ -39,27 +39,84 @@ class NetLibDisconnect(Exception): pass
class NetLibTimeout(Exception): pass class NetLibTimeout(Exception): pass
class FileLike: class _FileLike:
BLOCKSIZE = 1024 * 32 BLOCKSIZE = 1024 * 32
def __init__(self, o): def __init__(self, o):
self.o = o self.o = o
self._log = None
def set_descriptor(self, o):
self.o = o
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.o, attr) return getattr(self.o, attr)
def flush(self): def start_log(self):
if hasattr(self.o, "flush"): """
self.o.flush() Starts or resets the log.
This will store all bytes read or written.
"""
self._log = []
def stop_log(self):
"""
Stops the log.
"""
self._log = None
def is_logging(self):
return self._log is not None
def get_log(self):
"""
Returns the log as a string.
"""
if not self.is_logging():
raise ValueError("Not logging!")
return "".join(self._log)
def add_log(self, v):
if self.is_logging():
self._log.append(v)
class Writer(_FileLike):
def flush(self):
try:
if hasattr(self.o, "flush"):
self.o.flush()
except socket.error, v:
raise NetLibDisconnect(str(v))
def write(self, v):
if v:
try:
if hasattr(self.o, "sendall"):
self.add_log(v)
return self.o.sendall(v)
else:
r = self.o.write(v)
self.add_log(v[:r])
return r
except (SSL.Error, socket.error), v:
raise NetLibDisconnect(str(v))
class Reader(_FileLike):
def read(self, length): def read(self, length):
""" """
If length is None, we read until connection closes. If length is -1, we read until connection closes.
""" """
result = '' result = ''
start = time.time() start = time.time()
while length == -1 or length > 0: while length == -1 or length > 0:
if length == -1 or length > self.BLOCKSIZE:
rlen = self.BLOCKSIZE
else:
rlen = length
try: try:
data = self.o.read(self.BLOCKSIZE if length == -1 else length) data = self.o.read(rlen)
except SSL.ZeroReturnError: except SSL.ZeroReturnError:
break break
except SSL.WantReadError: except SSL.WantReadError:
@@ -79,19 +136,9 @@ class FileLike:
result += data result += data
if length != -1: if length != -1:
length -= len(data) length -= len(data)
self.add_log(result)
return result return result
def write(self, v):
if v:
try:
if hasattr(self.o, "sendall"):
return self.o.sendall(v)
else:
r = self.o.write(v)
return r
except (SSL.Error, socket.error):
raise NetLibDisconnect()
def readline(self, size = None): def readline(self, size = None):
result = '' result = ''
bytes_read = 0 bytes_read = 0
@@ -137,16 +184,16 @@ class TCPClient:
except SSL.Error, v: except SSL.Error, v:
raise NetLibError("SSL handshake error: %s"%str(v)) raise NetLibError("SSL handshake error: %s"%str(v))
self.cert = certutils.SSLCert(self.connection.get_peer_certificate()) self.cert = certutils.SSLCert(self.connection.get_peer_certificate())
self.rfile = FileLike(self.connection) self.rfile.set_descriptor(self.connection)
self.wfile = FileLike(self.connection) self.wfile.set_descriptor(self.connection)
def connect(self): def connect(self):
try: try:
addr = socket.gethostbyname(self.host) addr = socket.gethostbyname(self.host)
connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
connection.connect((addr, self.port)) connection.connect((addr, self.port))
self.rfile = FileLike(connection.makefile('rb', self.rbufsize)) self.rfile = Reader(connection.makefile('rb', self.rbufsize))
self.wfile = FileLike(connection.makefile('wb', self.wbufsize)) self.wfile = Writer(connection.makefile('wb', self.wbufsize))
except socket.error, err: except socket.error, err:
raise NetLibError('Error connecting to "%s": %s' % (self.host, err)) raise NetLibError('Error connecting to "%s": %s' % (self.host, err))
self.connection = connection self.connection = connection
@@ -180,8 +227,8 @@ class BaseHandler:
wbufsize = -1 wbufsize = -1
def __init__(self, connection, client_address, server): def __init__(self, connection, client_address, server):
self.connection = connection self.connection = connection
self.rfile = FileLike(self.connection.makefile('rb', self.rbufsize)) self.rfile = Reader(self.connection.makefile('rb', self.rbufsize))
self.wfile = FileLike(self.connection.makefile('wb', self.wbufsize)) self.wfile = Writer(self.connection.makefile('wb', self.wbufsize))
self.client_address = client_address self.client_address = client_address
self.server = server self.server = server
@@ -206,8 +253,8 @@ class BaseHandler:
self.connection.do_handshake() self.connection.do_handshake()
except SSL.Error, v: except SSL.Error, v:
raise NetLibError("SSL handshake error: %s"%str(v)) raise NetLibError("SSL handshake error: %s"%str(v))
self.rfile = FileLike(self.connection) self.rfile.set_descriptor(self.connection)
self.wfile = FileLike(self.connection) self.wfile.set_descriptor(self.connection)
def finish(self): def finish(self):
self.finished = True self.finished = True
@@ -241,6 +288,9 @@ class BaseHandler:
def handle(self): # pragma: no cover def handle(self): # pragma: no cover
raise NotImplementedError raise NotImplementedError
def settimeout(self, n):
self.connection.settimeout(n)
def close(self): def close(self):
""" """
Does a hard close of the socket, i.e. a shutdown, followed by a close. Does a hard close of the socket, i.e. a shutdown, followed by a close.

36
netlib/utils.py Normal file
View File

@@ -0,0 +1,36 @@
def cleanBin(s, fixspacing=False):
"""
Cleans binary data to make it safe to display. If fixspacing is True,
tabs, newlines and so forth will be maintained, if not, they will be
replaced with a placeholder.
"""
parts = []
for i in s:
o = ord(i)
if (o > 31 and o < 127):
parts.append(i)
elif i in "\n\t" and not fixspacing:
parts.append(i)
else:
parts.append(".")
return "".join(parts)
def hexdump(s):
"""
Returns a set of tuples:
(offset, hex, str)
"""
parts = []
for i in range(0, len(s), 16):
o = "%.10x"%i
part = s[i:i+16]
x = " ".join("%.2x"%ord(i) for i in part)
if len(part) < 16:
x += " "
x += " ".join(" " for i in range(16 - len(part)))
parts.append(
(o, x, cleanBin(part, True))
)
return parts

View File

@@ -1,4 +1,4 @@
IVERSION = (0, 2) IVERSION = (0, 2, 2)
VERSION = ".".join(str(i) for i in IVERSION) VERSION = ".".join(str(i) for i in IVERSION)
NAME = "netlib" NAME = "netlib"
NAMEVERSION = NAME + " " + VERSION NAMEVERSION = NAME + " " + VERSION

View File

@@ -169,16 +169,20 @@ def test_parse_init_http():
class TestReadHeaders: class TestReadHeaders:
def _read(self, data, verbatim=False):
if not verbatim:
data = textwrap.dedent(data)
data = data.strip()
s = cStringIO.StringIO(data)
return http.read_headers(s)
def test_read_simple(self): def test_read_simple(self):
data = """ data = """
Header: one Header: one
Header2: two Header2: two
\r\n \r\n
""" """
data = textwrap.dedent(data) h = self._read(data)
data = data.strip()
s = cStringIO.StringIO(data)
h = http.read_headers(s)
assert h.lst == [["Header", "one"], ["Header2", "two"]] assert h.lst == [["Header", "one"], ["Header2", "two"]]
def test_read_multi(self): def test_read_multi(self):
@@ -187,10 +191,7 @@ class TestReadHeaders:
Header: two Header: two
\r\n \r\n
""" """
data = textwrap.dedent(data) h = self._read(data)
data = data.strip()
s = cStringIO.StringIO(data)
h = http.read_headers(s)
assert h.lst == [["Header", "one"], ["Header", "two"]] assert h.lst == [["Header", "one"], ["Header", "two"]]
def test_read_continued(self): def test_read_continued(self):
@@ -200,12 +201,19 @@ class TestReadHeaders:
Header2: three Header2: three
\r\n \r\n
""" """
data = textwrap.dedent(data) h = self._read(data)
data = data.strip()
s = cStringIO.StringIO(data)
h = http.read_headers(s)
assert h.lst == [["Header", "one\r\n two"], ["Header2", "three"]] assert h.lst == [["Header", "one\r\n two"], ["Header2", "three"]]
def test_read_continued_err(self):
data = "\tfoo: bar\r\n"
assert self._read(data, True) is None
def test_read_err(self):
data = """
foo
"""
assert self._read(data) is None
def test_read_response(): def test_read_response():
def tst(data, method, limit): def tst(data, method, limit):
@@ -248,6 +256,14 @@ def test_read_response():
assert tst(data, "GET", None)[4] == 'foo' assert tst(data, "GET", None)[4] == 'foo'
assert tst(data, "HEAD", None)[4] == '' assert tst(data, "HEAD", None)[4] == ''
data = """
HTTP/1.1 200 OK
\tContent-Length: 3
foo
"""
tutils.raises("invalid headers", tst, data, "GET", None)
def test_parse_url(): def test_parse_url():
assert not http.parse_url("") assert not http.parse_url("")

View File

@@ -0,0 +1,7 @@
from netlib import http_uastrings
def test_get_shortcut():
assert http_uastrings.get_by_shortcut("c")[0] == "chrome"
assert not http_uastrings.get_by_shortcut("_")

View File

@@ -85,6 +85,12 @@ class TestODict:
assert self.od.get("one") == ["two"] assert self.od.get("one") == ["two"]
assert self.od.get("two") == None assert self.od.get("two") == None
def test_get_first(self):
self.od.add("one", "two")
self.od.add("one", "three")
assert self.od.get_first("one") == "two"
assert self.od.get_first("two") == None
class TestODictCaseless: class TestODictCaseless:
def setUp(self): def setUp(self):

View File

@@ -28,6 +28,11 @@ class ServerTestBase:
cls.server.shutdown() cls.server.shutdown()
@property
def last_handler(self):
return self.server.server.last_handler
class SNIHandler(tcp.BaseHandler): class SNIHandler(tcp.BaseHandler):
sni = None sni = None
def handle_sni(self, connection): def handle_sni(self, connection):
@@ -63,15 +68,27 @@ class HangHandler(tcp.BaseHandler):
time.sleep(1) time.sleep(1)
class TimeoutHandler(tcp.BaseHandler):
def handle(self):
self.timeout = False
self.settimeout(0.01)
try:
self.rfile.read(10)
except tcp.NetLibTimeout:
self.timeout = True
class TServer(tcp.TCPServer): class TServer(tcp.TCPServer):
def __init__(self, addr, ssl, q, handler, v3_only=False): def __init__(self, addr, ssl, q, handler_klass, v3_only=False):
tcp.TCPServer.__init__(self, addr) tcp.TCPServer.__init__(self, addr)
self.ssl, self.q = ssl, q self.ssl, self.q = ssl, q
self.v3_only = v3_only self.v3_only = v3_only
self.handler = handler self.handler_klass = handler_klass
self.last_handler = None
def handle_connection(self, request, client_address): def handle_connection(self, request, client_address):
h = self.handler(request, client_address, self) h = self.handler_klass(request, client_address, self)
self.last_handler = h
if self.ssl: if self.ssl:
if self.v3_only: if self.v3_only:
method = tcp.SSLv3_METHOD method = tcp.SSLv3_METHOD
@@ -194,12 +211,24 @@ class TestDisconnect(ServerTestBase):
c.close() c.close()
class TestServerTimeOut(ServerTestBase):
@classmethod
def makeserver(cls):
return TServer(("127.0.0.1", 0), False, cls.q, TimeoutHandler)
def test_timeout(self):
c = tcp.TCPClient("127.0.0.1", self.port)
c.connect()
time.sleep(0.3)
assert self.last_handler.timeout
class TestTimeOut(ServerTestBase): class TestTimeOut(ServerTestBase):
@classmethod @classmethod
def makeserver(cls): def makeserver(cls):
return TServer(("127.0.0.1", 0), False, cls.q, HangHandler) return TServer(("127.0.0.1", 0), False, cls.q, HangHandler)
def test_timeout_client(self): def test_timeout(self):
c = tcp.TCPClient("127.0.0.1", self.port) c = tcp.TCPClient("127.0.0.1", self.port)
c.connect() c.connect()
c.settimeout(0.1) c.settimeout(0.1)
@@ -226,10 +255,21 @@ class TestTCPClient:
class TestFileLike: class TestFileLike:
def test_blocksize(self):
s = cStringIO.StringIO("1234567890abcdefghijklmnopqrstuvwxyz")
s = tcp.Reader(s)
s.BLOCKSIZE = 2
assert s.read(1) == "1"
assert s.read(2) == "23"
assert s.read(3) == "456"
assert s.read(4) == "7890"
d = s.read(-1)
assert d.startswith("abc") and d.endswith("xyz")
def test_wrap(self): def test_wrap(self):
s = cStringIO.StringIO("foobar\nfoobar") s = cStringIO.StringIO("foobar\nfoobar")
s = tcp.FileLike(s)
s.flush() s.flush()
s = tcp.Reader(s)
assert s.readline() == "foobar\n" assert s.readline() == "foobar\n"
assert s.readline() == "foobar" assert s.readline() == "foobar"
# Test __getattr__ # Test __getattr__
@@ -237,11 +277,39 @@ class TestFileLike:
def test_limit(self): def test_limit(self):
s = cStringIO.StringIO("foobar\nfoobar") s = cStringIO.StringIO("foobar\nfoobar")
s = tcp.FileLike(s) s = tcp.Reader(s)
assert s.readline(3) == "foo" assert s.readline(3) == "foo"
def test_limitless(self): def test_limitless(self):
s = cStringIO.StringIO("f"*(50*1024)) s = cStringIO.StringIO("f"*(50*1024))
s = tcp.FileLike(s) s = tcp.Reader(s)
ret = s.read(-1) ret = s.read(-1)
assert len(ret) == 50 * 1024 assert len(ret) == 50 * 1024
def test_readlog(self):
s = cStringIO.StringIO("foobar\nfoobar")
s = tcp.Reader(s)
assert not s.is_logging()
s.start_log()
assert s.is_logging()
s.readline()
assert s.get_log() == "foobar\n"
s.read(1)
assert s.get_log() == "foobar\nf"
s.start_log()
assert s.get_log() == ""
s.read(1)
assert s.get_log() == "o"
s.stop_log()
tutils.raises(ValueError, s.get_log)
def test_writelog(self):
s = cStringIO.StringIO()
s = tcp.Writer(s)
s.start_log()
assert s.is_logging()
s.write("x")
assert s.get_log() == "x"
s.write("x")
assert s.get_log() == "xx"

13
test/test_utils.py Normal file
View File

@@ -0,0 +1,13 @@
from netlib import utils
def test_hexdump():
assert utils.hexdump("one\0"*10)
def test_cleanBin():
assert utils.cleanBin("one") == "one"
assert utils.cleanBin("\00ne") == ".ne"
assert utils.cleanBin("\nne") == "\nne"
assert utils.cleanBin("\nne", True) == ".ne"