Merge pull request #2291 from cortesi/cuts

Introduce cuts: a flow dissector
This commit is contained in:
Aldo Cortesi
2017-04-30 14:35:06 +12:00
committed by GitHub
13 changed files with 351 additions and 13 deletions

View File

@@ -5,6 +5,7 @@ from mitmproxy.addons import check_ca
from mitmproxy.addons import clientplayback
from mitmproxy.addons import core_option_validation
from mitmproxy.addons import core
from mitmproxy.addons import cut
from mitmproxy.addons import disable_h2c
from mitmproxy.addons import onboarding
from mitmproxy.addons import proxyauth
@@ -28,6 +29,7 @@ def default_addons():
check_alpn.CheckALPN(),
check_ca.CheckCA(),
clientplayback.ClientPlayback(),
cut.Cut(),
disable_h2c.DisableH2C(),
onboarding.Onboarding(),
proxyauth.ProxyAuth(),

126
mitmproxy/addons/cut.py Normal file
View File

@@ -0,0 +1,126 @@
import csv
import typing
from mitmproxy import command
from mitmproxy import exceptions
from mitmproxy import flow
from mitmproxy import ctx
from mitmproxy import certs
from mitmproxy.utils import strutils
def headername(spec: str):
if not (spec.startswith("header[") and spec.endswith("]")):
raise exceptions.CommandError("Invalid header spec: %s" % spec)
return spec[len("header["):-1].strip()
flow_shortcuts = {
"q": "request",
"s": "response",
"cc": "client_conn",
"sc": "server_conn",
}
def is_addr(v):
return isinstance(v, tuple) and len(v) > 1
def extract(cut: str, f: flow.Flow) -> typing.Union[str, bytes]:
path = cut.split(".")
current = f # type: typing.Any
for i, spec in enumerate(path):
if spec.startswith("_"):
raise exceptions.CommandError("Can't access internal attribute %s" % spec)
if isinstance(current, flow.Flow):
spec = flow_shortcuts.get(spec, spec)
part = getattr(current, spec, None)
if i == len(path) - 1:
if spec == "port" and is_addr(current):
return str(current[1])
if spec == "host" and is_addr(current):
return str(current[0])
elif spec.startswith("header["):
return current.headers.get(headername(spec), "")
elif isinstance(part, bytes):
return part
elif isinstance(part, bool):
return "true" if part else "false"
elif isinstance(part, certs.SSLCert):
return part.to_pem().decode("ascii")
current = part
return str(current or "")
def parse_cutspec(s: str) -> typing.Tuple[str, typing.Sequence[str]]:
"""
Returns (flowspec, [cuts]).
Raises exceptions.CommandError if input is invalid.
"""
parts = s.split("|", maxsplit=1)
flowspec = "@all"
if len(parts) == 2:
flowspec = parts[1].strip()
cuts = parts[0]
cutparts = [i.strip() for i in cuts.split(",") if i.strip()]
if len(cutparts) == 0:
raise exceptions.CommandError("Invalid cut specification.")
return flowspec, cutparts
class Cut:
@command.command("cut")
def cut(self, cutspec: str) -> command.Cuts:
"""
Resolve a cut specification of the form "cuts|flowspec". The cuts
are a comma-separated list of cut snippets. Cut snippets are
attribute paths from the base of the flow object, with a few
conveniences - "q", "s", "cc" and "sc" are shortcuts for request,
response, client_conn and server_conn, "port" and "host" retrieve
parts of an address tuple, ".header[key]" retrieves a header value.
Return values converted sensibly: SSL certicates are converted to PEM
format, bools are "true" or "false", "bytes" are preserved, and all
other values are converted to strings. The flowspec is optional, and
if it is not specified, it is assumed to be @all.
"""
flowspec, cuts = parse_cutspec(cutspec)
flows = ctx.master.commands.call_args("view.resolve", [flowspec])
ret = []
for f in flows:
ret.append([extract(c, f) for c in cuts])
return ret
@command.command("cut.save")
def save(self, cuts: command.Cuts, path: str) -> None:
"""
Save cuts to file. If there are multiple rows or columns, the format
is UTF-8 encoded CSV. If there is exactly one row and one column,
the data is written to file as-is, with raw bytes preserved. If the
path is prefixed with a "+", values are appended if there is an
existing file.
"""
append = False
if path.startswith("+"):
append = True
path = path[1:]
if len(cuts) == 1 and len(cuts[0]) == 1:
with open(path, "ab" if append else "wb") as fp:
if fp.tell() > 0:
# We're appending to a file that already exists and has content
fp.write(b"\n")
v = cuts[0][0]
if isinstance(v, bytes):
fp.write(v)
else:
fp.write(v.encode("utf8"))
ctx.log.alert("Saved single cut.")
else:
with open(path, "a" if append else "w", newline='', encoding="utf8") as fp:
writer = csv.writer(fp)
for r in cuts:
writer.writerow(
[strutils.always_str(c) or "" for c in r] # type: ignore
)
ctx.log.alert("Saved %s cuts as CSV." % len(cuts))

View File

@@ -3,12 +3,18 @@ import typing
import shlex
import textwrap
import functools
import sys
from mitmproxy.utils import typecheck
from mitmproxy import exceptions
from mitmproxy import flow
Cuts = typing.Sequence[
typing.Sequence[typing.Union[str, bytes]]
]
def typename(t: type, ret: bool) -> str:
"""
Translates a type to an explanatory string. Ifl ret is True, we're
@@ -18,6 +24,8 @@ def typename(t: type, ret: bool) -> str:
return t.__name__
elif t == typing.Sequence[flow.Flow]:
return "[flow]" if ret else "flowspec"
elif t == Cuts:
return "[cuts]" if ret else "cutspec"
elif t == flow.Flow:
return "flow"
else: # pragma: no cover
@@ -102,6 +110,15 @@ class CommandManager:
raise exceptions.CommandError("Invalid command: %s" % cmdstr)
return self.call_args(parts[0], parts[1:])
def dump(self, out=sys.stdout) -> None:
cmds = list(self.commands.values())
cmds.sort(key=lambda x: x.signature_help())
for c in cmds:
for hl in (c.help or "").splitlines():
print("# " + hl, file=out)
print(c.signature_help(), file=out)
print(file=out)
def parsearg(manager: CommandManager, spec: str, argtype: type) -> typing.Any:
"""
@@ -125,6 +142,8 @@ def parsearg(manager: CommandManager, spec: str, argtype: type) -> typing.Any:
raise exceptions.CommandError("Expected an integer, got %s." % spec)
elif argtype == typing.Sequence[flow.Flow]:
return manager.call_args("view.resolve", [spec])
elif argtype == Cuts:
return manager.call_args("cut", [spec])
elif argtype == flow.Flow:
flows = manager.call_args("view.resolve", [spec])
if len(flows) != 1:

View File

@@ -174,7 +174,7 @@ def tserver_conn():
id=str(uuid.uuid4()),
address=("address", 22),
source_address=("address", 22),
ip_address=None,
ip_address=("192.168.0.1", 22),
cert=None,
timestamp_start=1,
timestamp_tcp_setup=2,
@@ -183,7 +183,7 @@ def tserver_conn():
ssl_established=False,
sni="address",
alpn_proto_negotiated=None,
tls_version=None,
tls_version="TLSv1.2",
via=None,
))
c.reply = controller.DummyReply()

View File

@@ -157,8 +157,6 @@ class FlowItem(urwid.WidgetWrap):
# callback = common.export_to_clip_or_file,
# args = (None, self.flow, common.copy_to_clipboard_or_prompt)
# )
elif key == "b":
common.ask_save_body(None, self.flow)
else:
return key

View File

@@ -151,6 +151,7 @@ def default_keymap(km):
km.add("A", "flow.resume @all", context="flowlist")
km.add("a", "flow.resume @focus", context="flowlist")
km.add("b", "console.command 'cut.save s.content|@focus '", context="flowlist")
km.add("d", "view.remove @focus", context="flowlist")
km.add("D", "view.duplicate @focus", context="flowlist")
km.add("e", "set console_eventlog=toggle", context="flowlist")

View File

@@ -85,11 +85,7 @@ def run(MasterKlass, args, extra=None): # pragma: no cover
print(optmanager.dump_defaults(opts))
sys.exit(0)
if args.commands:
cmds = []
for c in master.commands.commands.values():
cmds.append(c.signature_help())
for i in sorted(cmds):
print(i)
master.commands.dump()
sys.exit(0)
opts.set(*args.setoptions)
if extra:

View File

@@ -25,9 +25,10 @@ def always_str(str_or_bytes: Optional[AnyStr], *decode_args) -> Optional[str]:
raise TypeError("Expected str or bytes, but got {}.".format(type(str_or_bytes).__name__))
# Translate control characters to "safe" characters. This implementation initially
# replaced them with the matching control pictures (http://unicode.org/charts/PDF/U2400.pdf),
# but that turned out to render badly with monospace fonts. We are back to "." therefore.
# Translate control characters to "safe" characters. This implementation
# initially replaced them with the matching control pictures
# (http://unicode.org/charts/PDF/U2400.pdf), but that turned out to render badly
# with monospace fonts. We are back to "." therefore.
_control_char_trans = {
x: ord(".") # x + 0x2400 for unicode control group pictures
for x in range(32)

View File

@@ -19,6 +19,16 @@ def check_command_return_type(value: typing.Any, typeinfo: typing.Any) -> bool:
for v in value:
if not check_command_return_type(v, T):
return False
elif typename.startswith("typing.Union"):
try:
types = typeinfo.__args__ # type: ignore
except AttributeError:
# Python 3.5.x
types = typeinfo.__union_params__ # type: ignore
for T in types:
checks = [check_command_return_type(value, T) for T in types]
if not any(checks):
return False
elif value is None and typeinfo is None:
return True
elif not isinstance(value, typeinfo):

View File

@@ -0,0 +1,157 @@
from mitmproxy.addons import cut
from mitmproxy.addons import view
from mitmproxy import exceptions
from mitmproxy import certs
from mitmproxy.test import taddons
from mitmproxy.test import tflow
from mitmproxy.test import tutils
import pytest
def test_extract():
tf = tflow.tflow(resp=True)
tests = [
["q.method", "GET"],
["q.scheme", "http"],
["q.host", "address"],
["q.port", "22"],
["q.path", "/path"],
["q.url", "http://address:22/path"],
["q.text", "content"],
["q.content", b"content"],
["q.raw_content", b"content"],
["q.header[header]", "qvalue"],
["s.status_code", "200"],
["s.reason", "OK"],
["s.text", "message"],
["s.content", b"message"],
["s.raw_content", b"message"],
["s.header[header-response]", "svalue"],
["cc.address.port", "22"],
["cc.address.host", "address"],
["cc.tls_version", "TLSv1.2"],
["cc.sni", "address"],
["cc.ssl_established", "false"],
["sc.address.port", "22"],
["sc.address.host", "address"],
["sc.ip_address.host", "192.168.0.1"],
["sc.tls_version", "TLSv1.2"],
["sc.sni", "address"],
["sc.ssl_established", "false"],
]
for t in tests:
ret = cut.extract(t[0], tf)
if ret != t[1]:
raise AssertionError("%s: Expected %s, got %s" % (t[0], t[1], ret))
with open(tutils.test_data.path("mitmproxy/net/data/text_cert"), "rb") as f:
d = f.read()
c1 = certs.SSLCert.from_pem(d)
tf.server_conn.cert = c1
assert "CERTIFICATE" in cut.extract("sc.cert", tf)
def test_parse_cutspec():
tests = [
("", None, True),
("req.method", ("@all", ["req.method"]), False),
(
"req.method,req.host",
("@all", ["req.method", "req.host"]),
False
),
(
"req.method,req.host|~b foo",
("~b foo", ["req.method", "req.host"]),
False
),
(
"req.method,req.host|~b foo | ~b bar",
("~b foo | ~b bar", ["req.method", "req.host"]),
False
),
(
"req.method, req.host | ~b foo | ~b bar",
("~b foo | ~b bar", ["req.method", "req.host"]),
False
),
]
for cutspec, output, err in tests:
try:
assert cut.parse_cutspec(cutspec) == output
except exceptions.CommandError:
if not err:
raise
else:
if err:
raise AssertionError("Expected error.")
def test_headername():
with pytest.raises(exceptions.CommandError):
cut.headername("header[foo.")
def qr(f):
with open(f, "rb") as fp:
return fp.read()
def test_cut_file(tmpdir):
f = str(tmpdir.join("path"))
v = view.View()
c = cut.Cut()
with taddons.context() as tctx:
tctx.master.addons.add(v, c)
v.add([tflow.tflow(resp=True)])
tctx.command(c.save, "q.method|@all", f)
assert qr(f) == b"GET"
tctx.command(c.save, "q.content|@all", f)
assert qr(f) == b"content"
tctx.command(c.save, "q.content|@all", "+" + f)
assert qr(f) == b"content\ncontent"
v.add([tflow.tflow(resp=True)])
tctx.command(c.save, "q.method|@all", f)
assert qr(f).splitlines() == [b"GET", b"GET"]
tctx.command(c.save, "q.method,q.content|@all", f)
assert qr(f).splitlines() == [b"GET,content", b"GET,content"]
def test_cut():
v = view.View()
c = cut.Cut()
with taddons.context() as tctx:
v.add([tflow.tflow(resp=True)])
tctx.master.addons.add(v, c)
assert c.cut("q.method|@all") == [["GET"]]
assert c.cut("q.scheme|@all") == [["http"]]
assert c.cut("q.host|@all") == [["address"]]
assert c.cut("q.port|@all") == [["22"]]
assert c.cut("q.path|@all") == [["/path"]]
assert c.cut("q.url|@all") == [["http://address:22/path"]]
assert c.cut("q.content|@all") == [[b"content"]]
assert c.cut("q.header[header]|@all") == [["qvalue"]]
assert c.cut("q.header[unknown]|@all") == [[""]]
assert c.cut("s.status_code|@all") == [["200"]]
assert c.cut("s.reason|@all") == [["OK"]]
assert c.cut("s.content|@all") == [[b"message"]]
assert c.cut("s.header[header-response]|@all") == [["svalue"]]
assert c.cut("moo") == [[""]]
with pytest.raises(exceptions.CommandError):
assert c.cut("__dict__") == [[""]]
v = view.View()
c = cut.Cut()
with taddons.context() as tctx:
tctx.master.addons.add(v, c)
v.add([tflow.ttcpflow()])
assert c.cut("q.method|@all") == [[""]]
assert c.cut("s.status|@all") == [[""]]

View File

@@ -7,6 +7,7 @@ from mitmproxy import proxy
from mitmproxy import exceptions
from mitmproxy.test import tflow
from mitmproxy.test import taddons
import io
import pytest
@@ -55,22 +56,34 @@ def test_simple():
c.add("empty", a.empty)
c.call("empty")
fp = io.StringIO()
c.dump(fp)
assert fp.getvalue()
def test_typename():
assert command.typename(str, True) == "str"
assert command.typename(typing.Sequence[flow.Flow], True) == "[flow]"
assert command.typename(typing.Sequence[flow.Flow], False) == "flowspec"
assert command.typename(command.Cuts, False) == "cutspec"
assert command.typename(command.Cuts, True) == "[cuts]"
assert command.typename(flow.Flow, False) == "flow"
class DummyConsole:
def load(self, l):
l.add_command("view.resolve", self.resolve)
l.add_command("cut", self.cut)
def resolve(self, spec: str) -> typing.Sequence[flow.Flow]:
n = int(spec)
return [tflow.tflow(resp=True)] * n
def cut(self, spec: str) -> command.Cuts:
return [["test"]]
def test_parsearg():
with taddons.context() as tctx:
@@ -97,6 +110,10 @@ def test_parsearg():
with pytest.raises(exceptions.CommandError):
command.parsearg(tctx.master.commands, "foo", Exception)
assert command.parsearg(
tctx.master.commands, "foo", command.Cuts
) == [["test"]]
class TDec:
@command.command("cmd1")

View File

@@ -99,7 +99,7 @@ class TestServerConnection:
c.alpn_proto_negotiated = b'h2'
assert 'address:22' in repr(c)
assert 'ALPN' in repr(c)
assert 'TLS: foobar' in repr(c)
assert 'TLSv1.2: foobar' in repr(c)
c.sni = None
c.tls_established = True

View File

@@ -4,6 +4,7 @@ from unittest import mock
import pytest
from mitmproxy.utils import typecheck
from mitmproxy import command
class TBase:
@@ -93,9 +94,19 @@ def test_check_command_return_type():
assert(typecheck.check_command_return_type(None, None))
assert(not typecheck.check_command_return_type(["foo"], typing.Sequence[int]))
assert(not typecheck.check_command_return_type("foo", typing.Sequence[int]))
assert(typecheck.check_command_return_type([["foo", b"bar"]], command.Cuts))
assert(not typecheck.check_command_return_type(["foo", b"bar"], command.Cuts))
assert(not typecheck.check_command_return_type([["foo", 22]], command.Cuts))
# Python 3.5 only defines __parameters__
m = mock.Mock()
m.__str__ = lambda self: "typing.Sequence"
m.__parameters__ = (int,)
typecheck.check_command_return_type([10], m)
# Python 3.5 only defines __union_params__
m = mock.Mock()
m.__str__ = lambda self: "typing.Union"
m.__union_params__ = (int,)
assert not typecheck.check_command_return_type([22], m)