message.content -> .raw_content, implement .text

This PR improves our handling of HTTP message body encodings:

- The unaltered message body is now accessible as `.raw_content`
- The "content-encoding"-decoded content (i.e. gzip removed) content
  is not `.content`, as this is what we want in 99% of the cases.
- `.text` now provides the "content-encoding"-decoded and then
  "content-type charset"-decoded message body.
- The decoded values for `.content` and `.text` are cached,
  so that repeated access and `x.text = x.text` is cheap.
- The `decoded()` decorator is now deprecated, as we can now just use
  `.content`. Similarly `HTTPMessage.get_decoded_content()` is
  deprecated.
This commit is contained in:
Maximilian Hils
2016-07-02 01:51:47 -07:00
parent 2c09e0416b
commit 6032c4f235
23 changed files with 379 additions and 270 deletions

View File

@@ -56,8 +56,6 @@ Datastructures
:special-members:
:no-undoc-members:
.. autoclass:: decoded
.. automodule:: netlib.multidict
.. autoclass:: MultiDictView

View File

@@ -7,7 +7,6 @@ import urwid.util
import netlib
from mitmproxy import flow
from mitmproxy import models
from mitmproxy import utils
from mitmproxy.console import signals
from netlib import human
@@ -259,26 +258,24 @@ def copy_flow_format_data(part, scope, flow):
if scope in ("q", "a"):
if flow.request.content is None:
return None, "Request content is missing"
with models.decoded(flow.request):
if part == "h":
data += netlib.http.http1.assemble_request(flow.request)
elif part == "c":
data += flow.request.content
else:
raise ValueError("Unknown part: {}".format(part))
if part == "h":
data += netlib.http.http1.assemble_request(flow.request)
elif part == "c":
data += flow.request.content
else:
raise ValueError("Unknown part: {}".format(part))
if scope == "a" and flow.request.content and flow.response:
# Add padding between request and response
data += "\r\n" * 2
if scope in ("s", "a") and flow.response:
if flow.response.content is None:
return None, "Response content is missing"
with models.decoded(flow.response):
if part == "h":
data += netlib.http.http1.assemble_response(flow.response)
elif part == "c":
data += flow.response.content
else:
raise ValueError("Unknown part: {}".format(part))
if part == "h":
data += netlib.http.http1.assemble_response(flow.response)
elif part == "c":
data += flow.response.content
else:
raise ValueError("Unknown part: {}".format(part))
return data, False
@@ -388,12 +385,12 @@ def ask_save_body(part, master, state, flow):
elif part == "q" and request_has_content:
ask_save_path(
"Save request content",
flow.request.get_decoded_content()
flow.request.content
)
elif part == "s" and response_has_content:
ask_save_path(
"Save response content",
flow.response.get_decoded_content()
flow.response.content
)
else:
signals.status_message.send(message="No content to save.")
@@ -418,9 +415,9 @@ def format_flow(f, focus, extended=False, hostheader=False, marked=False):
marked = marked,
)
if f.response:
if f.response.content:
contentdesc = human.pretty_size(len(f.response.content))
elif f.response.content is None:
if f.response.raw_content:
contentdesc = human.pretty_size(len(f.response.raw_content))
elif f.response.raw_content is None:
contentdesc = "[content missing]"
else:
contentdesc = "[no content]"

View File

@@ -176,7 +176,7 @@ class FlowView(tabs.Tabs):
self.show()
def content_view(self, viewmode, message):
if message.content is None:
if message.raw_content is None:
msg, body = "", [urwid.Text([("error", "[content missing]")])]
return msg, body
else:
@@ -214,6 +214,12 @@ class FlowView(tabs.Tabs):
)
description = description.replace("Raw", "Couldn't parse: falling back to Raw")
if message.content != message.raw_content:
description = "[decoded {enc}] {desc}".format(
enc=message.headers.get("content-encoding"),
desc=description
)
# Give hint that you have to tab for the response.
if description == "No content" and isinstance(message, models.HTTPRequest):
description = "No request content (press tab to view response)"
@@ -407,15 +413,14 @@ class FlowView(tabs.Tabs):
)
)
if part == "r":
with models.decoded(message):
# Fix an issue caused by some editors when editing a
# request/response body. Many editors make it hard to save a
# file without a terminating newline on the last line. When
# editing message bodies, this can cause problems. For now, I
# just strip the newlines off the end of the body when we return
# from an editor.
c = self.master.spawn_editor(message.content or "")
message.content = c.rstrip("\n")
# Fix an issue caused by some editors when editing a
# request/response body. Many editors make it hard to save a
# file without a terminating newline on the last line. When
# editing message bodies, this can cause problems. For now, I
# just strip the newlines off the end of the body when we return
# from an editor.
c = self.master.spawn_editor(message.content or b"")
message.content = c.rstrip(b"\n")
elif part == "f":
if not message.urlencoded_form and message.content:
signals.status_prompt_onekey.send(
@@ -512,14 +517,10 @@ class FlowView(tabs.Tabs):
signals.flow_change.send(self, flow = self.flow)
def delete_body(self, t):
if t == "m":
val = None
else:
val = None
if self.tab_offset == TAB_REQ:
self.flow.request.content = val
self.flow.request.content = None
else:
self.flow.response.content = val
self.flow.response.content = None
signals.flow_change.send(self, flow = self.flow)
def keypress(self, size, key):

View File

@@ -618,15 +618,6 @@ def get_content_view(viewmode, data, **metadata):
Raises:
ContentViewException, if the content view threw an error.
"""
msg = []
headers = metadata.get("headers", {})
enc = headers.get("content-encoding")
if enc and enc != "identity":
decoded = encoding.decode(enc, data)
if decoded:
data = decoded
msg.append("[decoded %s]" % enc)
try:
ret = viewmode(data, **metadata)
# Third-party viewers can fail in unexpected ways...
@@ -637,8 +628,8 @@ def get_content_view(viewmode, data, **metadata):
sys.exc_info()[2]
)
if not ret:
ret = get("Raw")(data, **metadata)
msg.append("Couldn't parse: falling back to Raw")
desc = "Couldn't parse: falling back to Raw"
_, content = get("Raw")(data, **metadata)
else:
msg.append(ret[0])
return " ".join(msg), safe_to_print(ret[1])
desc, content = ret
return desc, safe_to_print(content)

View File

@@ -290,10 +290,10 @@ class DumpMaster(flow.FlowMaster):
code = click.style(str(code), fg=code_color, bold=True, blink=(code == 418))
reason = click.style(strutils.bytes_to_escaped_str(flow.response.reason), fg=code_color, bold=True)
if flow.response.content is None:
if flow.response.raw_content is None:
size = "(content missing)"
else:
size = human.pretty_size(len(flow.response.content))
size = human.pretty_size(len(flow.response.raw_content))
size = click.style(size, bold=True)
arrows = click.style("<<", bold=True)

View File

@@ -194,10 +194,10 @@ class FBod(_Rex):
def __call__(self, f):
if f.request and f.request.content:
if self.re.search(f.request.get_decoded_content()):
if self.re.search(f.request.content):
return True
if f.response and f.response.content:
if self.re.search(f.response.get_decoded_content()):
if self.re.search(f.response.content):
return True
return False
@@ -208,7 +208,7 @@ class FBodRequest(_Rex):
def __call__(self, f):
if f.request and f.request.content:
if self.re.search(f.request.get_decoded_content()):
if self.re.search(f.request.content):
return True
@@ -218,7 +218,7 @@ class FBodResponse(_Rex):
def __call__(self, f):
if f.response and f.response.content:
if self.re.search(f.response.get_decoded_content()):
if self.re.search(f.response.content):
return True

View File

@@ -16,7 +16,6 @@ from mitmproxy.flow import modules
from mitmproxy.onboarding import app
from mitmproxy.protocol import http_replay
from mitmproxy.proxy.config import HostMatcher
from netlib import strutils
class FlowMaster(controller.Master):
@@ -348,13 +347,16 @@ class FlowMaster(controller.Master):
return "Can't replay live request."
if f.intercepted:
return "Can't replay while intercepting..."
if f.request.content is None:
if f.request.raw_content is None:
return "Can't replay request with missing content..."
if f.request:
f.backup()
f.request.is_replay = True
# TODO: We should be able to remove this.
if "Content-Length" in f.request.headers:
f.request.headers["Content-Length"] = str(len(f.request.content))
f.request.headers["Content-Length"] = str(len(f.request.raw_content))
f.response = None
f.error = None
self.process_new_request(f)

View File

@@ -157,7 +157,7 @@ class StreamLargeBodies(object):
expected_size = http1.expected_http_body_size(
flow.request, flow.response if not is_request else None
)
if not r.content and not (0 <= expected_size <= self.max_size):
if not r.raw_content and not (0 <= expected_size <= self.max_size):
# r.stream may already be a callable, which we want to preserve.
r.stream = r.stream or True
@@ -251,7 +251,7 @@ class ServerPlaybackState:
if p[0] not in self.ignore_payload_params
)
else:
key.append(str(r.content))
key.append(str(r.raw_content))
if not self.ignore_host:
key.append(r.host)

View File

@@ -1,9 +1,9 @@
from __future__ import absolute_import, print_function, division
import cgi
import warnings
from mitmproxy.models.flow import Flow
from netlib import encoding
from netlib import version
from netlib.http import Headers
from netlib.http import Request
@@ -20,10 +20,8 @@ class MessageMixin(object):
header.
Doesn't change the message iteself or its headers.
"""
ce = self.headers.get("content-encoding")
if not self.content or ce not in encoding.ENCODINGS:
return self.content
return encoding.decode(ce, self.content)
warnings.warn(".get_decoded_content() is deprecated, please use .content directly instead.", DeprecationWarning)
return self.content
class HTTPRequest(MessageMixin, Request):

View File

@@ -41,10 +41,10 @@ class _HttpTransmissionLayer(base.Layer):
yield "this is a generator" # pragma: no cover
def send_response(self, response):
if response.content is None:
if response.data.content is None:
raise netlib.exceptions.HttpException("Cannot assemble flow with missing content")
self.send_response_headers(response)
self.send_response_body(response, [response.content])
self.send_response_body(response, [response.data.content])
def send_response_headers(self, response):
raise NotImplementedError()

View File

@@ -272,7 +272,7 @@ class FlowContent(RequestHandler):
def get(self, flow_id, message):
message = getattr(self.flow, message)
if not message.content:
if not message.raw_content:
raise APIError(400, "No content.")
content_encoding = message.headers.get("Content-Encoding", None)
@@ -295,7 +295,7 @@ class FlowContent(RequestHandler):
self.set_header("Content-Type", "application/text")
self.set_header("X-Content-Type-Options", "nosniff")
self.set_header("X-Frame-Options", "DENY")
self.write(message.content)
self.write(message.raw_content)
class Events(RequestHandler):

View File

@@ -1,39 +1,62 @@
"""
Utility functions for decoding response bodies.
Utility functions for decoding response bodies.
"""
from __future__ import absolute_import
import codecs
from io import BytesIO
import gzip
import zlib
ENCODINGS = {"identity", "gzip", "deflate"}
from typing import Union # noqa
def decode(e, content):
if not isinstance(content, bytes):
return None
encoding_map = {
"identity": identity,
"gzip": decode_gzip,
"deflate": decode_deflate,
}
if e not in encoding_map:
return None
return encoding_map[e](content)
def decode(obj, encoding, errors='strict'):
# type: (Union[str, bytes], str) -> Union[str, bytes]
"""
Decode the given input object
Returns:
The decoded value
Raises:
ValueError, if decoding fails.
"""
try:
try:
return custom_decode[encoding](obj)
except KeyError:
return codecs.decode(obj, encoding, errors)
except Exception as e:
raise ValueError("{} when decoding {} with {}".format(
type(e).__name__,
repr(obj)[:10],
repr(encoding),
))
def encode(e, content):
if not isinstance(content, bytes):
return None
encoding_map = {
"identity": identity,
"gzip": encode_gzip,
"deflate": encode_deflate,
}
if e not in encoding_map:
return None
return encoding_map[e](content)
def encode(obj, encoding, errors='strict'):
# type: (Union[str, bytes], str) -> Union[str, bytes]
"""
Encode the given input object
Returns:
The encoded value
Raises:
ValueError, if encoding fails.
"""
try:
try:
return custom_encode[encoding](obj)
except KeyError:
return codecs.encode(obj, encoding, errors)
except Exception as e:
raise ValueError("{} when encoding {} with {}".format(
type(e).__name__,
repr(obj)[:10],
repr(encoding),
))
def identity(content):
@@ -46,10 +69,7 @@ def identity(content):
def decode_gzip(content):
gfile = gzip.GzipFile(fileobj=BytesIO(content))
try:
return gfile.read()
except (IOError, EOFError):
return None
return gfile.read()
def encode_gzip(content):
@@ -70,12 +90,9 @@ def decode_deflate(content):
http://bugs.python.org/issue5784
"""
try:
try:
return zlib.decompress(content)
except zlib.error:
return zlib.decompress(content, -15)
return zlib.decompress(content)
except zlib.error:
return None
return zlib.decompress(content, -15)
def encode_deflate(content):
@@ -84,4 +101,16 @@ def encode_deflate(content):
"""
return zlib.compress(content)
__all__ = ["ENCODINGS", "encode", "decode"]
custom_decode = {
"identity": identity,
"gzip": decode_gzip,
"deflate": decode_deflate,
}
custom_encode = {
"identity": identity,
"gzip": encode_gzip,
"deflate": encode_deflate,
}
__all__ = ["encode", "decode"]

View File

@@ -5,7 +5,7 @@ from netlib import exceptions
def assemble_request(request):
if request.content is None:
if request.data.content is None:
raise exceptions.HttpException("Cannot assemble flow with missing content")
head = assemble_request_head(request)
body = b"".join(assemble_body(request.data.headers, [request.data.content]))
@@ -19,7 +19,7 @@ def assemble_request_head(request):
def assemble_response(response):
if response.content is None:
if response.data.content is None:
raise exceptions.HttpException("Cannot assemble flow with missing content")
head = assemble_response_head(response)
body = b"".join(assemble_body(response.data.headers, [response.data.content]))

View File

@@ -52,7 +52,22 @@ class MessageData(basetypes.Serializable):
return cls(**state)
class CachedDecode(object):
__slots__ = ["encoded", "encoding", "decoded"]
def __init__(self, object, encoding, decoded):
self.encoded = object
self.encoding = encoding
self.decoded = decoded
no_cached_decode = CachedDecode(None, None, None)
class Message(basetypes.Serializable):
def __init__(self):
self._content_cache = no_cached_decode # type: CachedDecode
self._text_cache = no_cached_decode # type: CachedDecode
def __eq__(self, other):
if isinstance(other, Message):
return self.data == other.data
@@ -90,19 +105,65 @@ class Message(basetypes.Serializable):
self.data.headers = h
@property
def content(self):
def raw_content(self):
# type: () -> bytes
"""
The raw (encoded) HTTP message body
See also: :py:attr:`text`
See also: :py:attr:`content`, :py:class:`text`
"""
return self.data.content
@content.setter
def content(self, content):
@raw_content.setter
def raw_content(self, content):
self.data.content = content
if isinstance(content, bytes):
self.headers["content-length"] = str(len(content))
@property
def content(self):
# type: () -> bytes
"""
The HTTP message body decoded with the content-encoding header (e.g. gzip)
See also: :py:class:`raw_content`, :py:attr:`text`
"""
ce = self.headers.get("content-encoding")
cached = (
self._content_cache.encoded == self.raw_content and
self._content_cache.encoding == ce
)
if not cached:
try:
if not ce:
raise ValueError()
decoded = encoding.decode(self.raw_content, ce)
except ValueError:
decoded = self.raw_content
self._content_cache = CachedDecode(self.raw_content, ce, decoded)
return self._content_cache.decoded
@content.setter
def content(self, value):
ce = self.headers.get("content-encoding")
cached = (
self._content_cache.decoded == value and
self._content_cache.encoding == ce
)
if not cached:
try:
if not ce:
raise ValueError()
encoded = encoding.encode(value, ce)
except ValueError:
# Do we have an unknown content-encoding?
# If so, we want to remove it.
if value and ce:
self.headers.pop("content-encoding", None)
ce = None
encoded = value
self._content_cache = CachedDecode(encoded, ce, value)
self.raw_content = self._content_cache.encoded
if isinstance(self.raw_content, bytes):
self.headers["content-length"] = str(len(self.raw_content))
@property
def http_version(self):
@@ -137,56 +198,81 @@ class Message(basetypes.Serializable):
def timestamp_end(self, timestamp_end):
self.data.timestamp_end = timestamp_end
def _get_content_type_charset(self):
# type: () -> Optional[str]
ct = headers.parse_content_type(self.headers.get("content-type", ""))
if ct:
return ct[2].get("charset")
@property
def text(self):
# type: () -> six.text_type
"""
The decoded HTTP message body.
Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive.
The HTTP message body decoded with both content-encoding header (e.g. gzip)
and content-type header charset.
.. note::
This is not implemented yet.
See also: :py:attr:`content`, :py:class:`decoded`
See also: :py:attr:`content`, :py:class:`raw_content`
"""
# This attribute should be called text, because that's what requests does.
raise NotImplementedError()
enc = self._get_content_type_charset()
# We may also want to check for HTML meta tags here at some point.
cached = (
self._text_cache.encoded == self.content and
self._text_cache.encoding == enc
)
if not cached:
try:
if not enc:
raise ValueError()
decoded = encoding.decode(self.content, enc)
except ValueError:
decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
self._text_cache = CachedDecode(self.content, enc, decoded)
return self._text_cache.decoded
@text.setter
def text(self, text):
raise NotImplementedError()
enc = self._get_content_type_charset()
cached = (
self._text_cache.decoded == text and
self._text_cache.encoding == enc
)
if not cached:
try:
if not enc:
raise ValueError()
encoded = encoding.encode(text, enc)
except ValueError:
# Do we have an unknown content-type charset?
# If so, we want to replace it with utf8.
if text and enc:
self.headers["content-type"] = re.sub(
"charset=[^;]+",
"charset=utf-8",
self.headers["content-type"]
)
encoded = text.encode("utf8", "replace" if six.PY2 else "surrogateescape")
self._text_cache = CachedDecode(encoded, enc, text)
self.content = self._text_cache.encoded
def decode(self):
"""
Decodes body based on the current Content-Encoding header, then
removes the header. If there is no Content-Encoding header, no
action is taken.
Returns:
True, if decoding succeeded.
False, otherwise.
Decodes body based on the current Content-Encoding header, then
removes the header. If there is no Content-Encoding header, no
action is taken.
"""
ce = self.headers.get("content-encoding")
data = encoding.decode(ce, self.content)
if data is None:
return False
self.content = data
self.raw_content = self.content
self.headers.pop("content-encoding", None)
return True
def encode(self, e):
"""
Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
Returns:
True, if decoding succeeded.
False, otherwise.
Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
"""
data = encoding.encode(e, self.content)
if data is None:
return False
self.content = data
self.decode() # remove the current encoding
self.headers["content-encoding"] = e
return True
self.content = self.raw_content
def replace(self, pattern, repl, flags=0):
"""
@@ -203,10 +289,9 @@ class Message(basetypes.Serializable):
repl = strutils.escaped_str_to_bytes(repl)
replacements = 0
if self.content:
with decoded(self):
self.content, replacements = re.subn(
pattern, repl, self.content, flags=flags
)
self.content, replacements = re.subn(
pattern, repl, self.content, flags=flags
)
replacements += self.headers.replace(pattern, repl, flags)
return replacements
@@ -225,29 +310,16 @@ class Message(basetypes.Serializable):
class decoded(object):
"""
A context manager that decodes a request or response, and then
re-encodes it with the same encoding after execution of the block.
Example:
.. code-block:: python
with decoded(request):
request.content = request.content.replace("foo", "bar")
Deprecated: You can now directly use :py:attr:`content`.
:py:attr:`raw_content` has the encoded content.
"""
def __init__(self, message):
self.message = message
ce = message.headers.get("content-encoding")
if ce in encoding.ENCODINGS:
self.ce = ce
else:
self.ce = None
warnings.warn("decoded() is deprecated, you can now directly use .content instead. "
".raw_content has the encoded content.", DeprecationWarning)
def __enter__(self):
if self.ce:
self.message.decode()
pass
def __exit__(self, type, value, tb):
if self.ce:
self.message.encode(self.ce)
pass

View File

@@ -5,7 +5,6 @@ import re
import six
from six.moves import urllib
from netlib import encoding
from netlib import multidict
from netlib import strutils
from netlib.http import multipart
@@ -44,6 +43,7 @@ class Request(message.Message):
An HTTP request.
"""
def __init__(self, *args, **kwargs):
super(Request, self).__init__()
self.data = RequestData(*args, **kwargs)
def __repr__(self):
@@ -327,7 +327,7 @@ class Request(message.Message):
self.headers["accept-encoding"] = (
', '.join(
e
for e in encoding.ENCODINGS
for e in {"gzip", "identity", "deflate"}
if e in accept_encoding
)
)

View File

@@ -30,13 +30,14 @@ class Response(message.Message):
An HTTP response.
"""
def __init__(self, *args, **kwargs):
super(Response, self).__init__()
self.data = ResponseData(*args, **kwargs)
def __repr__(self):
if self.content:
if self.raw_content:
details = "{}, {}".format(
self.headers.get("content-type", "unknown content type"),
human.pretty_size(len(self.content))
human.pretty_size(len(self.raw_content))
)
else:
details = "no content"

View File

@@ -209,28 +209,6 @@ Larry
headers=Headers()
)
r = cv.get_content_view(
cv.get("Auto"),
encoding.encode('gzip', b"[1, 2, 3]"),
headers=Headers(
content_type="application/json",
content_encoding="gzip"
)
)
assert "decoded gzip" in r[0]
assert "JSON" in r[0]
r = cv.get_content_view(
cv.get("XML"),
encoding.encode('gzip', b"[1, 2, 3]"),
headers=Headers(
content_type="application/json",
content_encoding="gzip"
)
)
assert "decoded gzip" in r[0]
assert "Raw" in r[0]
def test_add_cv(self):
class TestContentView(cv.View):
name = "test"

View File

@@ -73,9 +73,9 @@ def test_add_header():
def test_custom_contentviews():
with example("custom_contentviews.py") as ex:
pig = ex.ctx.contentview
_, fmt = pig("<html>test!</html>")
assert any('esttay!' in val[0][1] for val in fmt)
assert not pig("gobbledygook")
_, fmt = pig(b"<html>test!</html>")
assert any(b'esttay!' in val[0][1] for val in fmt)
assert not pig(b"gobbledygook")
def test_iframe_injector():
@@ -103,7 +103,7 @@ def test_modify_form():
def test_modify_querystring():
flow = tutils.tflow(req=netutils.treq(path="/search?q=term"))
flow = tutils.tflow(req=netutils.treq(path=b"/search?q=term"))
with example("modify_querystring.py") as ex:
ex.run("request", flow)
assert flow.request.query["mitmproxy"] == "rocks"
@@ -126,7 +126,7 @@ def test_modify_response_body():
def test_redirect_requests():
flow = tutils.tflow(req=netutils.treq(host="example.org"))
flow = tutils.tflow(req=netutils.treq(host=b"example.org"))
with example("redirect_requests.py") as ex:
ex.run("request", flow)
assert flow.request.host == "mitmproxy.org"

View File

@@ -518,13 +518,13 @@ class TestFlow(object):
f.replace("foo", "bar")
assert f.request.content != "abarb"
assert f.request.raw_content != "abarb"
f.request.decode()
assert f.request.content == "abarb"
assert f.request.raw_content == "abarb"
assert f.response.content != "abarb"
assert f.response.raw_content != "abarb"
f.response.decode()
assert f.response.content == "abarb"
assert f.response.raw_content == "abarb"
class TestState:
@@ -1102,16 +1102,6 @@ class TestRequest:
r.constrain_encoding()
assert "oink" not in r.headers["accept-encoding"]
def test_get_decoded_content(self):
r = HTTPRequest.wrap(netlib.tutils.treq())
r.content = None
r.headers["content-encoding"] = "identity"
assert r.get_decoded_content() is None
r.content = "falafel"
r.encode("gzip")
assert r.get_decoded_content() == "falafel"
def test_get_content_type(self):
resp = HTTPResponse.wrap(netlib.tutils.tresp())
resp.headers = Headers(content_type="text/plain")

View File

@@ -120,7 +120,7 @@ class _Http2TestBase(object):
client.wfile.flush()
# read CONNECT response
while client.rfile.readline() != "\r\n":
while client.rfile.readline() != b"\r\n":
pass
client.convert_to_ssl(alpn_protos=[b'h2'])
@@ -197,7 +197,7 @@ class TestSimple(_Http2TestBase, _Http2ServerBase):
(':path', '/'),
('ClIeNt-FoO', 'client-bar-1'),
('ClIeNt-FoO', 'client-bar-2'),
], body='my request body echoed back to me')
], body=b'my request body echoed back to me')
done = False
while not done:
@@ -269,7 +269,7 @@ class TestWithBodies(_Http2TestBase, _Http2ServerBase):
(':scheme', 'https'),
(':path', '/'),
],
body='foobar with request body',
body=b'foobar with request body',
)
done = False

View File

@@ -11,7 +11,6 @@ import pathod.pathoc
from mitmproxy import flow, controller
from mitmproxy.cmdline import APP_HOST, APP_PORT
from netlib import strutils
testapp = flask.Flask(__name__)

View File

@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
from netlib.http import decoded
import six
from netlib.tutils import tresp
@@ -76,6 +77,9 @@ class TestMessage(object):
resp.content = b""
assert resp.data.content == b""
assert resp.headers["content-length"] == "0"
resp.raw_content = b"bar"
assert resp.data.content == b"bar"
assert resp.headers["content-length"] == "0"
def test_content_basic(self):
_test_passthrough_attr(tresp(), "content")
@@ -93,61 +97,108 @@ class TestMessage(object):
_test_decoded_attr(tresp(), "http_version")
class TestDecodedDecorator(object):
class TestMessageContentEncoding(object):
def test_simple(self):
r = tresp()
assert r.content == b"message"
assert r.raw_content == b"message"
assert "content-encoding" not in r.headers
assert r.encode("gzip")
r.encode("gzip")
assert r.headers["content-encoding"]
assert r.content != b"message"
with decoded(r):
assert "content-encoding" not in r.headers
assert r.content == b"message"
assert r.headers["content-encoding"]
assert r.content != b"message"
assert r.raw_content != b"message"
assert r.content == b"message"
assert r.raw_content != b"message"
def test_modify(self):
r = tresp()
assert "content-encoding" not in r.headers
assert r.encode("gzip")
r.encode("gzip")
with decoded(r):
r.content = b"foo"
assert r.content != b"foo"
r.content = b"foo"
assert r.raw_content != b"foo"
r.decode()
assert r.content == b"foo"
assert r.raw_content == b"foo"
def test_unknown_ce(self):
r = tresp()
r.headers["content-encoding"] = "zopfli"
r.content = b"foo"
with decoded(r):
assert r.headers["content-encoding"]
assert r.content == b"foo"
assert r.headers["content-encoding"]
r.raw_content = b"foo"
assert r.content == b"foo"
assert r.headers["content-encoding"]
def test_cannot_decode(self):
r = tresp()
assert r.encode("gzip")
r.content = b"foo"
with decoded(r):
assert r.headers["content-encoding"]
assert r.content == b"foo"
assert r.headers["content-encoding"]
assert r.content != b"foo"
r.decode()
r.encode("gzip")
r.raw_content = b"foo"
assert r.content == b"foo"
assert r.headers["content-encoding"]
r.decode()
assert r.raw_content == b"foo"
assert "content-encoding" not in r.headers
def test_cannot_encode(self):
r = tresp()
assert r.encode("gzip")
with decoded(r):
r.content = None
r.encode("gzip")
r.content = None
assert r.headers["content-encoding"]
assert r.raw_content is None
r.headers["content-encoding"] = "zopfli"
r.content = b"foo"
assert "content-encoding" not in r.headers
assert r.content is None
assert r.raw_content == b"foo"
class TestMessageText(object):
def test_simple(self):
r = tresp(content=b'\xc3\xbc')
assert r.raw_content == b"\xc3\xbc"
assert r.content == b"\xc3\xbc"
assert r.text == u"ü"
r.encode("gzip")
assert r.text == u"ü"
r.decode()
assert r.text == u"ü"
r.headers["content-type"] = "text/html; charset=latin1"
assert r.content == b"\xc3\xbc"
assert r.text == u"ü"
def test_modify(self):
r = tresp()
r.text = u"ü"
assert r.raw_content == b"\xc3\xbc"
r.headers["content-type"] = "text/html; charset=latin1"
r.text = u"ü"
assert r.raw_content == b"\xfc"
assert r.headers["content-length"] == "1"
def test_unknown_ce(self):
r = tresp()
r.headers["content-type"] = "text/html; charset=wtf"
r.raw_content = b"foo"
assert r.text == u"foo"
def test_cannot_decode(self):
r = tresp()
r.raw_content = b"\xFF"
assert r.text == u'\ufffd' if six.PY2 else '\udcff'
def test_cannot_encode(self):
r = tresp()
r.content = None
assert "content-type" not in r.headers
assert r.raw_content is None
r.headers["content-type"] = "text/html; charset=latin1"
r.text = u""
assert r.headers["content-type"] == "text/html; charset=utf-8"
assert r.raw_content == b'\xe2\x98\x83'
r.headers["content-type"] = "text/html; charset=latin1"
r.text = u'\udcff'
assert r.headers["content-type"] == "text/html; charset=utf-8"
assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF"

View File

@@ -1,37 +1,39 @@
from netlib import encoding
from netlib import encoding, tutils
def test_identity():
assert b"string" == encoding.decode("identity", b"string")
assert b"string" == encoding.encode("identity", b"string")
assert not encoding.encode("nonexistent", b"string")
assert not encoding.decode("nonexistent encoding", b"string")
assert b"string" == encoding.decode(b"string", "identity")
assert b"string" == encoding.encode(b"string", "identity")
with tutils.raises(ValueError):
encoding.encode(b"string", "nonexistent encoding")
def test_gzip():
assert b"string" == encoding.decode(
"gzip",
encoding.encode(
"gzip",
b"string"
)
b"string",
"gzip"
),
"gzip"
)
assert encoding.decode("gzip", b"bogus") is None
with tutils.raises(ValueError):
encoding.decode(b"bogus", "gzip")
def test_deflate():
assert b"string" == encoding.decode(
"deflate",
encoding.encode(
"deflate",
b"string"
)
b"string",
"deflate"
),
"deflate"
)
assert b"string" == encoding.decode(
"deflate",
encoding.encode(
"deflate",
b"string"
)[2:-4]
b"string",
"deflate"
)[2:-4],
"deflate"
)
assert encoding.decode("deflate", b"bogus") is None
with tutils.raises(ValueError):
encoding.decode(b"bogus", "deflate")