mirror of
https://github.com/zhigang1992/mitmproxy.git
synced 2026-01-12 22:48:54 +08:00
message.content -> .raw_content, implement .text
This PR improves our handling of HTTP message body encodings: - The unaltered message body is now accessible as `.raw_content` - The "content-encoding"-decoded content (i.e. gzip removed) content is not `.content`, as this is what we want in 99% of the cases. - `.text` now provides the "content-encoding"-decoded and then "content-type charset"-decoded message body. - The decoded values for `.content` and `.text` are cached, so that repeated access and `x.text = x.text` is cheap. - The `decoded()` decorator is now deprecated, as we can now just use `.content`. Similarly `HTTPMessage.get_decoded_content()` is deprecated.
This commit is contained in:
@@ -56,8 +56,6 @@ Datastructures
|
||||
:special-members:
|
||||
:no-undoc-members:
|
||||
|
||||
.. autoclass:: decoded
|
||||
|
||||
.. automodule:: netlib.multidict
|
||||
|
||||
.. autoclass:: MultiDictView
|
||||
|
||||
@@ -7,7 +7,6 @@ import urwid.util
|
||||
|
||||
import netlib
|
||||
from mitmproxy import flow
|
||||
from mitmproxy import models
|
||||
from mitmproxy import utils
|
||||
from mitmproxy.console import signals
|
||||
from netlib import human
|
||||
@@ -259,26 +258,24 @@ def copy_flow_format_data(part, scope, flow):
|
||||
if scope in ("q", "a"):
|
||||
if flow.request.content is None:
|
||||
return None, "Request content is missing"
|
||||
with models.decoded(flow.request):
|
||||
if part == "h":
|
||||
data += netlib.http.http1.assemble_request(flow.request)
|
||||
elif part == "c":
|
||||
data += flow.request.content
|
||||
else:
|
||||
raise ValueError("Unknown part: {}".format(part))
|
||||
if part == "h":
|
||||
data += netlib.http.http1.assemble_request(flow.request)
|
||||
elif part == "c":
|
||||
data += flow.request.content
|
||||
else:
|
||||
raise ValueError("Unknown part: {}".format(part))
|
||||
if scope == "a" and flow.request.content and flow.response:
|
||||
# Add padding between request and response
|
||||
data += "\r\n" * 2
|
||||
if scope in ("s", "a") and flow.response:
|
||||
if flow.response.content is None:
|
||||
return None, "Response content is missing"
|
||||
with models.decoded(flow.response):
|
||||
if part == "h":
|
||||
data += netlib.http.http1.assemble_response(flow.response)
|
||||
elif part == "c":
|
||||
data += flow.response.content
|
||||
else:
|
||||
raise ValueError("Unknown part: {}".format(part))
|
||||
if part == "h":
|
||||
data += netlib.http.http1.assemble_response(flow.response)
|
||||
elif part == "c":
|
||||
data += flow.response.content
|
||||
else:
|
||||
raise ValueError("Unknown part: {}".format(part))
|
||||
return data, False
|
||||
|
||||
|
||||
@@ -388,12 +385,12 @@ def ask_save_body(part, master, state, flow):
|
||||
elif part == "q" and request_has_content:
|
||||
ask_save_path(
|
||||
"Save request content",
|
||||
flow.request.get_decoded_content()
|
||||
flow.request.content
|
||||
)
|
||||
elif part == "s" and response_has_content:
|
||||
ask_save_path(
|
||||
"Save response content",
|
||||
flow.response.get_decoded_content()
|
||||
flow.response.content
|
||||
)
|
||||
else:
|
||||
signals.status_message.send(message="No content to save.")
|
||||
@@ -418,9 +415,9 @@ def format_flow(f, focus, extended=False, hostheader=False, marked=False):
|
||||
marked = marked,
|
||||
)
|
||||
if f.response:
|
||||
if f.response.content:
|
||||
contentdesc = human.pretty_size(len(f.response.content))
|
||||
elif f.response.content is None:
|
||||
if f.response.raw_content:
|
||||
contentdesc = human.pretty_size(len(f.response.raw_content))
|
||||
elif f.response.raw_content is None:
|
||||
contentdesc = "[content missing]"
|
||||
else:
|
||||
contentdesc = "[no content]"
|
||||
|
||||
@@ -176,7 +176,7 @@ class FlowView(tabs.Tabs):
|
||||
self.show()
|
||||
|
||||
def content_view(self, viewmode, message):
|
||||
if message.content is None:
|
||||
if message.raw_content is None:
|
||||
msg, body = "", [urwid.Text([("error", "[content missing]")])]
|
||||
return msg, body
|
||||
else:
|
||||
@@ -214,6 +214,12 @@ class FlowView(tabs.Tabs):
|
||||
)
|
||||
description = description.replace("Raw", "Couldn't parse: falling back to Raw")
|
||||
|
||||
if message.content != message.raw_content:
|
||||
description = "[decoded {enc}] {desc}".format(
|
||||
enc=message.headers.get("content-encoding"),
|
||||
desc=description
|
||||
)
|
||||
|
||||
# Give hint that you have to tab for the response.
|
||||
if description == "No content" and isinstance(message, models.HTTPRequest):
|
||||
description = "No request content (press tab to view response)"
|
||||
@@ -407,15 +413,14 @@ class FlowView(tabs.Tabs):
|
||||
)
|
||||
)
|
||||
if part == "r":
|
||||
with models.decoded(message):
|
||||
# Fix an issue caused by some editors when editing a
|
||||
# request/response body. Many editors make it hard to save a
|
||||
# file without a terminating newline on the last line. When
|
||||
# editing message bodies, this can cause problems. For now, I
|
||||
# just strip the newlines off the end of the body when we return
|
||||
# from an editor.
|
||||
c = self.master.spawn_editor(message.content or "")
|
||||
message.content = c.rstrip("\n")
|
||||
# Fix an issue caused by some editors when editing a
|
||||
# request/response body. Many editors make it hard to save a
|
||||
# file without a terminating newline on the last line. When
|
||||
# editing message bodies, this can cause problems. For now, I
|
||||
# just strip the newlines off the end of the body when we return
|
||||
# from an editor.
|
||||
c = self.master.spawn_editor(message.content or b"")
|
||||
message.content = c.rstrip(b"\n")
|
||||
elif part == "f":
|
||||
if not message.urlencoded_form and message.content:
|
||||
signals.status_prompt_onekey.send(
|
||||
@@ -512,14 +517,10 @@ class FlowView(tabs.Tabs):
|
||||
signals.flow_change.send(self, flow = self.flow)
|
||||
|
||||
def delete_body(self, t):
|
||||
if t == "m":
|
||||
val = None
|
||||
else:
|
||||
val = None
|
||||
if self.tab_offset == TAB_REQ:
|
||||
self.flow.request.content = val
|
||||
self.flow.request.content = None
|
||||
else:
|
||||
self.flow.response.content = val
|
||||
self.flow.response.content = None
|
||||
signals.flow_change.send(self, flow = self.flow)
|
||||
|
||||
def keypress(self, size, key):
|
||||
|
||||
@@ -618,15 +618,6 @@ def get_content_view(viewmode, data, **metadata):
|
||||
Raises:
|
||||
ContentViewException, if the content view threw an error.
|
||||
"""
|
||||
msg = []
|
||||
|
||||
headers = metadata.get("headers", {})
|
||||
enc = headers.get("content-encoding")
|
||||
if enc and enc != "identity":
|
||||
decoded = encoding.decode(enc, data)
|
||||
if decoded:
|
||||
data = decoded
|
||||
msg.append("[decoded %s]" % enc)
|
||||
try:
|
||||
ret = viewmode(data, **metadata)
|
||||
# Third-party viewers can fail in unexpected ways...
|
||||
@@ -637,8 +628,8 @@ def get_content_view(viewmode, data, **metadata):
|
||||
sys.exc_info()[2]
|
||||
)
|
||||
if not ret:
|
||||
ret = get("Raw")(data, **metadata)
|
||||
msg.append("Couldn't parse: falling back to Raw")
|
||||
desc = "Couldn't parse: falling back to Raw"
|
||||
_, content = get("Raw")(data, **metadata)
|
||||
else:
|
||||
msg.append(ret[0])
|
||||
return " ".join(msg), safe_to_print(ret[1])
|
||||
desc, content = ret
|
||||
return desc, safe_to_print(content)
|
||||
|
||||
@@ -290,10 +290,10 @@ class DumpMaster(flow.FlowMaster):
|
||||
code = click.style(str(code), fg=code_color, bold=True, blink=(code == 418))
|
||||
reason = click.style(strutils.bytes_to_escaped_str(flow.response.reason), fg=code_color, bold=True)
|
||||
|
||||
if flow.response.content is None:
|
||||
if flow.response.raw_content is None:
|
||||
size = "(content missing)"
|
||||
else:
|
||||
size = human.pretty_size(len(flow.response.content))
|
||||
size = human.pretty_size(len(flow.response.raw_content))
|
||||
size = click.style(size, bold=True)
|
||||
|
||||
arrows = click.style("<<", bold=True)
|
||||
|
||||
@@ -194,10 +194,10 @@ class FBod(_Rex):
|
||||
|
||||
def __call__(self, f):
|
||||
if f.request and f.request.content:
|
||||
if self.re.search(f.request.get_decoded_content()):
|
||||
if self.re.search(f.request.content):
|
||||
return True
|
||||
if f.response and f.response.content:
|
||||
if self.re.search(f.response.get_decoded_content()):
|
||||
if self.re.search(f.response.content):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -208,7 +208,7 @@ class FBodRequest(_Rex):
|
||||
|
||||
def __call__(self, f):
|
||||
if f.request and f.request.content:
|
||||
if self.re.search(f.request.get_decoded_content()):
|
||||
if self.re.search(f.request.content):
|
||||
return True
|
||||
|
||||
|
||||
@@ -218,7 +218,7 @@ class FBodResponse(_Rex):
|
||||
|
||||
def __call__(self, f):
|
||||
if f.response and f.response.content:
|
||||
if self.re.search(f.response.get_decoded_content()):
|
||||
if self.re.search(f.response.content):
|
||||
return True
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ from mitmproxy.flow import modules
|
||||
from mitmproxy.onboarding import app
|
||||
from mitmproxy.protocol import http_replay
|
||||
from mitmproxy.proxy.config import HostMatcher
|
||||
from netlib import strutils
|
||||
|
||||
|
||||
class FlowMaster(controller.Master):
|
||||
@@ -348,13 +347,16 @@ class FlowMaster(controller.Master):
|
||||
return "Can't replay live request."
|
||||
if f.intercepted:
|
||||
return "Can't replay while intercepting..."
|
||||
if f.request.content is None:
|
||||
if f.request.raw_content is None:
|
||||
return "Can't replay request with missing content..."
|
||||
if f.request:
|
||||
f.backup()
|
||||
f.request.is_replay = True
|
||||
|
||||
# TODO: We should be able to remove this.
|
||||
if "Content-Length" in f.request.headers:
|
||||
f.request.headers["Content-Length"] = str(len(f.request.content))
|
||||
f.request.headers["Content-Length"] = str(len(f.request.raw_content))
|
||||
|
||||
f.response = None
|
||||
f.error = None
|
||||
self.process_new_request(f)
|
||||
|
||||
@@ -157,7 +157,7 @@ class StreamLargeBodies(object):
|
||||
expected_size = http1.expected_http_body_size(
|
||||
flow.request, flow.response if not is_request else None
|
||||
)
|
||||
if not r.content and not (0 <= expected_size <= self.max_size):
|
||||
if not r.raw_content and not (0 <= expected_size <= self.max_size):
|
||||
# r.stream may already be a callable, which we want to preserve.
|
||||
r.stream = r.stream or True
|
||||
|
||||
@@ -251,7 +251,7 @@ class ServerPlaybackState:
|
||||
if p[0] not in self.ignore_payload_params
|
||||
)
|
||||
else:
|
||||
key.append(str(r.content))
|
||||
key.append(str(r.raw_content))
|
||||
|
||||
if not self.ignore_host:
|
||||
key.append(r.host)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from __future__ import absolute_import, print_function, division
|
||||
|
||||
import cgi
|
||||
import warnings
|
||||
|
||||
from mitmproxy.models.flow import Flow
|
||||
from netlib import encoding
|
||||
from netlib import version
|
||||
from netlib.http import Headers
|
||||
from netlib.http import Request
|
||||
@@ -20,10 +20,8 @@ class MessageMixin(object):
|
||||
header.
|
||||
Doesn't change the message iteself or its headers.
|
||||
"""
|
||||
ce = self.headers.get("content-encoding")
|
||||
if not self.content or ce not in encoding.ENCODINGS:
|
||||
return self.content
|
||||
return encoding.decode(ce, self.content)
|
||||
warnings.warn(".get_decoded_content() is deprecated, please use .content directly instead.", DeprecationWarning)
|
||||
return self.content
|
||||
|
||||
|
||||
class HTTPRequest(MessageMixin, Request):
|
||||
|
||||
@@ -41,10 +41,10 @@ class _HttpTransmissionLayer(base.Layer):
|
||||
yield "this is a generator" # pragma: no cover
|
||||
|
||||
def send_response(self, response):
|
||||
if response.content is None:
|
||||
if response.data.content is None:
|
||||
raise netlib.exceptions.HttpException("Cannot assemble flow with missing content")
|
||||
self.send_response_headers(response)
|
||||
self.send_response_body(response, [response.content])
|
||||
self.send_response_body(response, [response.data.content])
|
||||
|
||||
def send_response_headers(self, response):
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -272,7 +272,7 @@ class FlowContent(RequestHandler):
|
||||
def get(self, flow_id, message):
|
||||
message = getattr(self.flow, message)
|
||||
|
||||
if not message.content:
|
||||
if not message.raw_content:
|
||||
raise APIError(400, "No content.")
|
||||
|
||||
content_encoding = message.headers.get("Content-Encoding", None)
|
||||
@@ -295,7 +295,7 @@ class FlowContent(RequestHandler):
|
||||
self.set_header("Content-Type", "application/text")
|
||||
self.set_header("X-Content-Type-Options", "nosniff")
|
||||
self.set_header("X-Frame-Options", "DENY")
|
||||
self.write(message.content)
|
||||
self.write(message.raw_content)
|
||||
|
||||
|
||||
class Events(RequestHandler):
|
||||
|
||||
@@ -1,39 +1,62 @@
|
||||
"""
|
||||
Utility functions for decoding response bodies.
|
||||
Utility functions for decoding response bodies.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import codecs
|
||||
from io import BytesIO
|
||||
import gzip
|
||||
import zlib
|
||||
|
||||
|
||||
ENCODINGS = {"identity", "gzip", "deflate"}
|
||||
from typing import Union # noqa
|
||||
|
||||
|
||||
def decode(e, content):
|
||||
if not isinstance(content, bytes):
|
||||
return None
|
||||
encoding_map = {
|
||||
"identity": identity,
|
||||
"gzip": decode_gzip,
|
||||
"deflate": decode_deflate,
|
||||
}
|
||||
if e not in encoding_map:
|
||||
return None
|
||||
return encoding_map[e](content)
|
||||
def decode(obj, encoding, errors='strict'):
|
||||
# type: (Union[str, bytes], str) -> Union[str, bytes]
|
||||
"""
|
||||
Decode the given input object
|
||||
|
||||
Returns:
|
||||
The decoded value
|
||||
|
||||
Raises:
|
||||
ValueError, if decoding fails.
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
return custom_decode[encoding](obj)
|
||||
except KeyError:
|
||||
return codecs.decode(obj, encoding, errors)
|
||||
except Exception as e:
|
||||
raise ValueError("{} when decoding {} with {}".format(
|
||||
type(e).__name__,
|
||||
repr(obj)[:10],
|
||||
repr(encoding),
|
||||
))
|
||||
|
||||
|
||||
def encode(e, content):
|
||||
if not isinstance(content, bytes):
|
||||
return None
|
||||
encoding_map = {
|
||||
"identity": identity,
|
||||
"gzip": encode_gzip,
|
||||
"deflate": encode_deflate,
|
||||
}
|
||||
if e not in encoding_map:
|
||||
return None
|
||||
return encoding_map[e](content)
|
||||
def encode(obj, encoding, errors='strict'):
|
||||
# type: (Union[str, bytes], str) -> Union[str, bytes]
|
||||
"""
|
||||
Encode the given input object
|
||||
|
||||
Returns:
|
||||
The encoded value
|
||||
|
||||
Raises:
|
||||
ValueError, if encoding fails.
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
return custom_encode[encoding](obj)
|
||||
except KeyError:
|
||||
return codecs.encode(obj, encoding, errors)
|
||||
except Exception as e:
|
||||
raise ValueError("{} when encoding {} with {}".format(
|
||||
type(e).__name__,
|
||||
repr(obj)[:10],
|
||||
repr(encoding),
|
||||
))
|
||||
|
||||
|
||||
def identity(content):
|
||||
@@ -46,10 +69,7 @@ def identity(content):
|
||||
|
||||
def decode_gzip(content):
|
||||
gfile = gzip.GzipFile(fileobj=BytesIO(content))
|
||||
try:
|
||||
return gfile.read()
|
||||
except (IOError, EOFError):
|
||||
return None
|
||||
return gfile.read()
|
||||
|
||||
|
||||
def encode_gzip(content):
|
||||
@@ -70,12 +90,9 @@ def decode_deflate(content):
|
||||
http://bugs.python.org/issue5784
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
return zlib.decompress(content)
|
||||
except zlib.error:
|
||||
return zlib.decompress(content, -15)
|
||||
return zlib.decompress(content)
|
||||
except zlib.error:
|
||||
return None
|
||||
return zlib.decompress(content, -15)
|
||||
|
||||
|
||||
def encode_deflate(content):
|
||||
@@ -84,4 +101,16 @@ def encode_deflate(content):
|
||||
"""
|
||||
return zlib.compress(content)
|
||||
|
||||
__all__ = ["ENCODINGS", "encode", "decode"]
|
||||
|
||||
custom_decode = {
|
||||
"identity": identity,
|
||||
"gzip": decode_gzip,
|
||||
"deflate": decode_deflate,
|
||||
}
|
||||
custom_encode = {
|
||||
"identity": identity,
|
||||
"gzip": encode_gzip,
|
||||
"deflate": encode_deflate,
|
||||
}
|
||||
|
||||
__all__ = ["encode", "decode"]
|
||||
|
||||
@@ -5,7 +5,7 @@ from netlib import exceptions
|
||||
|
||||
|
||||
def assemble_request(request):
|
||||
if request.content is None:
|
||||
if request.data.content is None:
|
||||
raise exceptions.HttpException("Cannot assemble flow with missing content")
|
||||
head = assemble_request_head(request)
|
||||
body = b"".join(assemble_body(request.data.headers, [request.data.content]))
|
||||
@@ -19,7 +19,7 @@ def assemble_request_head(request):
|
||||
|
||||
|
||||
def assemble_response(response):
|
||||
if response.content is None:
|
||||
if response.data.content is None:
|
||||
raise exceptions.HttpException("Cannot assemble flow with missing content")
|
||||
head = assemble_response_head(response)
|
||||
body = b"".join(assemble_body(response.data.headers, [response.data.content]))
|
||||
|
||||
@@ -52,7 +52,22 @@ class MessageData(basetypes.Serializable):
|
||||
return cls(**state)
|
||||
|
||||
|
||||
class CachedDecode(object):
|
||||
__slots__ = ["encoded", "encoding", "decoded"]
|
||||
|
||||
def __init__(self, object, encoding, decoded):
|
||||
self.encoded = object
|
||||
self.encoding = encoding
|
||||
self.decoded = decoded
|
||||
|
||||
no_cached_decode = CachedDecode(None, None, None)
|
||||
|
||||
|
||||
class Message(basetypes.Serializable):
|
||||
def __init__(self):
|
||||
self._content_cache = no_cached_decode # type: CachedDecode
|
||||
self._text_cache = no_cached_decode # type: CachedDecode
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Message):
|
||||
return self.data == other.data
|
||||
@@ -90,19 +105,65 @@ class Message(basetypes.Serializable):
|
||||
self.data.headers = h
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
def raw_content(self):
|
||||
# type: () -> bytes
|
||||
"""
|
||||
The raw (encoded) HTTP message body
|
||||
|
||||
See also: :py:attr:`text`
|
||||
See also: :py:attr:`content`, :py:class:`text`
|
||||
"""
|
||||
return self.data.content
|
||||
|
||||
@content.setter
|
||||
def content(self, content):
|
||||
@raw_content.setter
|
||||
def raw_content(self, content):
|
||||
self.data.content = content
|
||||
if isinstance(content, bytes):
|
||||
self.headers["content-length"] = str(len(content))
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
# type: () -> bytes
|
||||
"""
|
||||
The HTTP message body decoded with the content-encoding header (e.g. gzip)
|
||||
|
||||
See also: :py:class:`raw_content`, :py:attr:`text`
|
||||
"""
|
||||
ce = self.headers.get("content-encoding")
|
||||
cached = (
|
||||
self._content_cache.encoded == self.raw_content and
|
||||
self._content_cache.encoding == ce
|
||||
)
|
||||
if not cached:
|
||||
try:
|
||||
if not ce:
|
||||
raise ValueError()
|
||||
decoded = encoding.decode(self.raw_content, ce)
|
||||
except ValueError:
|
||||
decoded = self.raw_content
|
||||
self._content_cache = CachedDecode(self.raw_content, ce, decoded)
|
||||
return self._content_cache.decoded
|
||||
|
||||
@content.setter
|
||||
def content(self, value):
|
||||
ce = self.headers.get("content-encoding")
|
||||
cached = (
|
||||
self._content_cache.decoded == value and
|
||||
self._content_cache.encoding == ce
|
||||
)
|
||||
if not cached:
|
||||
try:
|
||||
if not ce:
|
||||
raise ValueError()
|
||||
encoded = encoding.encode(value, ce)
|
||||
except ValueError:
|
||||
# Do we have an unknown content-encoding?
|
||||
# If so, we want to remove it.
|
||||
if value and ce:
|
||||
self.headers.pop("content-encoding", None)
|
||||
ce = None
|
||||
encoded = value
|
||||
self._content_cache = CachedDecode(encoded, ce, value)
|
||||
self.raw_content = self._content_cache.encoded
|
||||
if isinstance(self.raw_content, bytes):
|
||||
self.headers["content-length"] = str(len(self.raw_content))
|
||||
|
||||
@property
|
||||
def http_version(self):
|
||||
@@ -137,56 +198,81 @@ class Message(basetypes.Serializable):
|
||||
def timestamp_end(self, timestamp_end):
|
||||
self.data.timestamp_end = timestamp_end
|
||||
|
||||
def _get_content_type_charset(self):
|
||||
# type: () -> Optional[str]
|
||||
ct = headers.parse_content_type(self.headers.get("content-type", ""))
|
||||
if ct:
|
||||
return ct[2].get("charset")
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
# type: () -> six.text_type
|
||||
"""
|
||||
The decoded HTTP message body.
|
||||
Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive.
|
||||
The HTTP message body decoded with both content-encoding header (e.g. gzip)
|
||||
and content-type header charset.
|
||||
|
||||
.. note::
|
||||
This is not implemented yet.
|
||||
|
||||
See also: :py:attr:`content`, :py:class:`decoded`
|
||||
See also: :py:attr:`content`, :py:class:`raw_content`
|
||||
"""
|
||||
# This attribute should be called text, because that's what requests does.
|
||||
raise NotImplementedError()
|
||||
enc = self._get_content_type_charset()
|
||||
|
||||
# We may also want to check for HTML meta tags here at some point.
|
||||
|
||||
cached = (
|
||||
self._text_cache.encoded == self.content and
|
||||
self._text_cache.encoding == enc
|
||||
)
|
||||
if not cached:
|
||||
try:
|
||||
if not enc:
|
||||
raise ValueError()
|
||||
decoded = encoding.decode(self.content, enc)
|
||||
except ValueError:
|
||||
decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
|
||||
self._text_cache = CachedDecode(self.content, enc, decoded)
|
||||
return self._text_cache.decoded
|
||||
|
||||
@text.setter
|
||||
def text(self, text):
|
||||
raise NotImplementedError()
|
||||
enc = self._get_content_type_charset()
|
||||
cached = (
|
||||
self._text_cache.decoded == text and
|
||||
self._text_cache.encoding == enc
|
||||
)
|
||||
if not cached:
|
||||
try:
|
||||
if not enc:
|
||||
raise ValueError()
|
||||
encoded = encoding.encode(text, enc)
|
||||
except ValueError:
|
||||
# Do we have an unknown content-type charset?
|
||||
# If so, we want to replace it with utf8.
|
||||
if text and enc:
|
||||
self.headers["content-type"] = re.sub(
|
||||
"charset=[^;]+",
|
||||
"charset=utf-8",
|
||||
self.headers["content-type"]
|
||||
)
|
||||
encoded = text.encode("utf8", "replace" if six.PY2 else "surrogateescape")
|
||||
self._text_cache = CachedDecode(encoded, enc, text)
|
||||
self.content = self._text_cache.encoded
|
||||
|
||||
def decode(self):
|
||||
"""
|
||||
Decodes body based on the current Content-Encoding header, then
|
||||
removes the header. If there is no Content-Encoding header, no
|
||||
action is taken.
|
||||
|
||||
Returns:
|
||||
True, if decoding succeeded.
|
||||
False, otherwise.
|
||||
Decodes body based on the current Content-Encoding header, then
|
||||
removes the header. If there is no Content-Encoding header, no
|
||||
action is taken.
|
||||
"""
|
||||
ce = self.headers.get("content-encoding")
|
||||
data = encoding.decode(ce, self.content)
|
||||
if data is None:
|
||||
return False
|
||||
self.content = data
|
||||
self.raw_content = self.content
|
||||
self.headers.pop("content-encoding", None)
|
||||
return True
|
||||
|
||||
def encode(self, e):
|
||||
"""
|
||||
Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
|
||||
|
||||
Returns:
|
||||
True, if decoding succeeded.
|
||||
False, otherwise.
|
||||
Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
|
||||
"""
|
||||
data = encoding.encode(e, self.content)
|
||||
if data is None:
|
||||
return False
|
||||
self.content = data
|
||||
self.decode() # remove the current encoding
|
||||
self.headers["content-encoding"] = e
|
||||
return True
|
||||
self.content = self.raw_content
|
||||
|
||||
def replace(self, pattern, repl, flags=0):
|
||||
"""
|
||||
@@ -203,10 +289,9 @@ class Message(basetypes.Serializable):
|
||||
repl = strutils.escaped_str_to_bytes(repl)
|
||||
replacements = 0
|
||||
if self.content:
|
||||
with decoded(self):
|
||||
self.content, replacements = re.subn(
|
||||
pattern, repl, self.content, flags=flags
|
||||
)
|
||||
self.content, replacements = re.subn(
|
||||
pattern, repl, self.content, flags=flags
|
||||
)
|
||||
replacements += self.headers.replace(pattern, repl, flags)
|
||||
return replacements
|
||||
|
||||
@@ -225,29 +310,16 @@ class Message(basetypes.Serializable):
|
||||
|
||||
class decoded(object):
|
||||
"""
|
||||
A context manager that decodes a request or response, and then
|
||||
re-encodes it with the same encoding after execution of the block.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
with decoded(request):
|
||||
request.content = request.content.replace("foo", "bar")
|
||||
Deprecated: You can now directly use :py:attr:`content`.
|
||||
:py:attr:`raw_content` has the encoded content.
|
||||
"""
|
||||
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
ce = message.headers.get("content-encoding")
|
||||
if ce in encoding.ENCODINGS:
|
||||
self.ce = ce
|
||||
else:
|
||||
self.ce = None
|
||||
warnings.warn("decoded() is deprecated, you can now directly use .content instead. "
|
||||
".raw_content has the encoded content.", DeprecationWarning)
|
||||
|
||||
def __enter__(self):
|
||||
if self.ce:
|
||||
self.message.decode()
|
||||
pass
|
||||
|
||||
def __exit__(self, type, value, tb):
|
||||
if self.ce:
|
||||
self.message.encode(self.ce)
|
||||
pass
|
||||
@@ -5,7 +5,6 @@ import re
|
||||
import six
|
||||
from six.moves import urllib
|
||||
|
||||
from netlib import encoding
|
||||
from netlib import multidict
|
||||
from netlib import strutils
|
||||
from netlib.http import multipart
|
||||
@@ -44,6 +43,7 @@ class Request(message.Message):
|
||||
An HTTP request.
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Request, self).__init__()
|
||||
self.data = RequestData(*args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -327,7 +327,7 @@ class Request(message.Message):
|
||||
self.headers["accept-encoding"] = (
|
||||
', '.join(
|
||||
e
|
||||
for e in encoding.ENCODINGS
|
||||
for e in {"gzip", "identity", "deflate"}
|
||||
if e in accept_encoding
|
||||
)
|
||||
)
|
||||
|
||||
@@ -30,13 +30,14 @@ class Response(message.Message):
|
||||
An HTTP response.
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Response, self).__init__()
|
||||
self.data = ResponseData(*args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
if self.content:
|
||||
if self.raw_content:
|
||||
details = "{}, {}".format(
|
||||
self.headers.get("content-type", "unknown content type"),
|
||||
human.pretty_size(len(self.content))
|
||||
human.pretty_size(len(self.raw_content))
|
||||
)
|
||||
else:
|
||||
details = "no content"
|
||||
|
||||
@@ -209,28 +209,6 @@ Larry
|
||||
headers=Headers()
|
||||
)
|
||||
|
||||
r = cv.get_content_view(
|
||||
cv.get("Auto"),
|
||||
encoding.encode('gzip', b"[1, 2, 3]"),
|
||||
headers=Headers(
|
||||
content_type="application/json",
|
||||
content_encoding="gzip"
|
||||
)
|
||||
)
|
||||
assert "decoded gzip" in r[0]
|
||||
assert "JSON" in r[0]
|
||||
|
||||
r = cv.get_content_view(
|
||||
cv.get("XML"),
|
||||
encoding.encode('gzip', b"[1, 2, 3]"),
|
||||
headers=Headers(
|
||||
content_type="application/json",
|
||||
content_encoding="gzip"
|
||||
)
|
||||
)
|
||||
assert "decoded gzip" in r[0]
|
||||
assert "Raw" in r[0]
|
||||
|
||||
def test_add_cv(self):
|
||||
class TestContentView(cv.View):
|
||||
name = "test"
|
||||
|
||||
@@ -73,9 +73,9 @@ def test_add_header():
|
||||
def test_custom_contentviews():
|
||||
with example("custom_contentviews.py") as ex:
|
||||
pig = ex.ctx.contentview
|
||||
_, fmt = pig("<html>test!</html>")
|
||||
assert any('esttay!' in val[0][1] for val in fmt)
|
||||
assert not pig("gobbledygook")
|
||||
_, fmt = pig(b"<html>test!</html>")
|
||||
assert any(b'esttay!' in val[0][1] for val in fmt)
|
||||
assert not pig(b"gobbledygook")
|
||||
|
||||
|
||||
def test_iframe_injector():
|
||||
@@ -103,7 +103,7 @@ def test_modify_form():
|
||||
|
||||
|
||||
def test_modify_querystring():
|
||||
flow = tutils.tflow(req=netutils.treq(path="/search?q=term"))
|
||||
flow = tutils.tflow(req=netutils.treq(path=b"/search?q=term"))
|
||||
with example("modify_querystring.py") as ex:
|
||||
ex.run("request", flow)
|
||||
assert flow.request.query["mitmproxy"] == "rocks"
|
||||
@@ -126,7 +126,7 @@ def test_modify_response_body():
|
||||
|
||||
|
||||
def test_redirect_requests():
|
||||
flow = tutils.tflow(req=netutils.treq(host="example.org"))
|
||||
flow = tutils.tflow(req=netutils.treq(host=b"example.org"))
|
||||
with example("redirect_requests.py") as ex:
|
||||
ex.run("request", flow)
|
||||
assert flow.request.host == "mitmproxy.org"
|
||||
|
||||
@@ -518,13 +518,13 @@ class TestFlow(object):
|
||||
|
||||
f.replace("foo", "bar")
|
||||
|
||||
assert f.request.content != "abarb"
|
||||
assert f.request.raw_content != "abarb"
|
||||
f.request.decode()
|
||||
assert f.request.content == "abarb"
|
||||
assert f.request.raw_content == "abarb"
|
||||
|
||||
assert f.response.content != "abarb"
|
||||
assert f.response.raw_content != "abarb"
|
||||
f.response.decode()
|
||||
assert f.response.content == "abarb"
|
||||
assert f.response.raw_content == "abarb"
|
||||
|
||||
|
||||
class TestState:
|
||||
@@ -1102,16 +1102,6 @@ class TestRequest:
|
||||
r.constrain_encoding()
|
||||
assert "oink" not in r.headers["accept-encoding"]
|
||||
|
||||
def test_get_decoded_content(self):
|
||||
r = HTTPRequest.wrap(netlib.tutils.treq())
|
||||
r.content = None
|
||||
r.headers["content-encoding"] = "identity"
|
||||
assert r.get_decoded_content() is None
|
||||
|
||||
r.content = "falafel"
|
||||
r.encode("gzip")
|
||||
assert r.get_decoded_content() == "falafel"
|
||||
|
||||
def test_get_content_type(self):
|
||||
resp = HTTPResponse.wrap(netlib.tutils.tresp())
|
||||
resp.headers = Headers(content_type="text/plain")
|
||||
|
||||
@@ -120,7 +120,7 @@ class _Http2TestBase(object):
|
||||
client.wfile.flush()
|
||||
|
||||
# read CONNECT response
|
||||
while client.rfile.readline() != "\r\n":
|
||||
while client.rfile.readline() != b"\r\n":
|
||||
pass
|
||||
|
||||
client.convert_to_ssl(alpn_protos=[b'h2'])
|
||||
@@ -197,7 +197,7 @@ class TestSimple(_Http2TestBase, _Http2ServerBase):
|
||||
(':path', '/'),
|
||||
('ClIeNt-FoO', 'client-bar-1'),
|
||||
('ClIeNt-FoO', 'client-bar-2'),
|
||||
], body='my request body echoed back to me')
|
||||
], body=b'my request body echoed back to me')
|
||||
|
||||
done = False
|
||||
while not done:
|
||||
@@ -269,7 +269,7 @@ class TestWithBodies(_Http2TestBase, _Http2ServerBase):
|
||||
(':scheme', 'https'),
|
||||
(':path', '/'),
|
||||
],
|
||||
body='foobar with request body',
|
||||
body=b'foobar with request body',
|
||||
)
|
||||
|
||||
done = False
|
||||
|
||||
@@ -11,7 +11,6 @@ import pathod.pathoc
|
||||
from mitmproxy import flow, controller
|
||||
from mitmproxy.cmdline import APP_HOST, APP_PORT
|
||||
|
||||
from netlib import strutils
|
||||
|
||||
testapp = flask.Flask(__name__)
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, print_function, division
|
||||
|
||||
from netlib.http import decoded
|
||||
import six
|
||||
|
||||
from netlib.tutils import tresp
|
||||
|
||||
|
||||
@@ -76,6 +77,9 @@ class TestMessage(object):
|
||||
resp.content = b""
|
||||
assert resp.data.content == b""
|
||||
assert resp.headers["content-length"] == "0"
|
||||
resp.raw_content = b"bar"
|
||||
assert resp.data.content == b"bar"
|
||||
assert resp.headers["content-length"] == "0"
|
||||
|
||||
def test_content_basic(self):
|
||||
_test_passthrough_attr(tresp(), "content")
|
||||
@@ -93,61 +97,108 @@ class TestMessage(object):
|
||||
_test_decoded_attr(tresp(), "http_version")
|
||||
|
||||
|
||||
class TestDecodedDecorator(object):
|
||||
|
||||
class TestMessageContentEncoding(object):
|
||||
def test_simple(self):
|
||||
r = tresp()
|
||||
assert r.content == b"message"
|
||||
assert r.raw_content == b"message"
|
||||
assert "content-encoding" not in r.headers
|
||||
assert r.encode("gzip")
|
||||
r.encode("gzip")
|
||||
|
||||
assert r.headers["content-encoding"]
|
||||
assert r.content != b"message"
|
||||
with decoded(r):
|
||||
assert "content-encoding" not in r.headers
|
||||
assert r.content == b"message"
|
||||
assert r.headers["content-encoding"]
|
||||
assert r.content != b"message"
|
||||
assert r.raw_content != b"message"
|
||||
assert r.content == b"message"
|
||||
assert r.raw_content != b"message"
|
||||
|
||||
def test_modify(self):
|
||||
r = tresp()
|
||||
assert "content-encoding" not in r.headers
|
||||
assert r.encode("gzip")
|
||||
r.encode("gzip")
|
||||
|
||||
with decoded(r):
|
||||
r.content = b"foo"
|
||||
|
||||
assert r.content != b"foo"
|
||||
r.content = b"foo"
|
||||
assert r.raw_content != b"foo"
|
||||
r.decode()
|
||||
assert r.content == b"foo"
|
||||
assert r.raw_content == b"foo"
|
||||
|
||||
def test_unknown_ce(self):
|
||||
r = tresp()
|
||||
r.headers["content-encoding"] = "zopfli"
|
||||
r.content = b"foo"
|
||||
with decoded(r):
|
||||
assert r.headers["content-encoding"]
|
||||
assert r.content == b"foo"
|
||||
assert r.headers["content-encoding"]
|
||||
r.raw_content = b"foo"
|
||||
assert r.content == b"foo"
|
||||
assert r.headers["content-encoding"]
|
||||
|
||||
def test_cannot_decode(self):
|
||||
r = tresp()
|
||||
assert r.encode("gzip")
|
||||
r.content = b"foo"
|
||||
with decoded(r):
|
||||
assert r.headers["content-encoding"]
|
||||
assert r.content == b"foo"
|
||||
assert r.headers["content-encoding"]
|
||||
assert r.content != b"foo"
|
||||
r.decode()
|
||||
r.encode("gzip")
|
||||
r.raw_content = b"foo"
|
||||
assert r.content == b"foo"
|
||||
assert r.headers["content-encoding"]
|
||||
r.decode()
|
||||
assert r.raw_content == b"foo"
|
||||
assert "content-encoding" not in r.headers
|
||||
|
||||
def test_cannot_encode(self):
|
||||
r = tresp()
|
||||
assert r.encode("gzip")
|
||||
with decoded(r):
|
||||
r.content = None
|
||||
r.encode("gzip")
|
||||
r.content = None
|
||||
assert r.headers["content-encoding"]
|
||||
assert r.raw_content is None
|
||||
|
||||
r.headers["content-encoding"] = "zopfli"
|
||||
r.content = b"foo"
|
||||
assert "content-encoding" not in r.headers
|
||||
assert r.content is None
|
||||
assert r.raw_content == b"foo"
|
||||
|
||||
|
||||
class TestMessageText(object):
|
||||
def test_simple(self):
|
||||
r = tresp(content=b'\xc3\xbc')
|
||||
assert r.raw_content == b"\xc3\xbc"
|
||||
assert r.content == b"\xc3\xbc"
|
||||
assert r.text == u"ü"
|
||||
|
||||
r.encode("gzip")
|
||||
assert r.text == u"ü"
|
||||
r.decode()
|
||||
assert r.text == u"ü"
|
||||
|
||||
r.headers["content-type"] = "text/html; charset=latin1"
|
||||
assert r.content == b"\xc3\xbc"
|
||||
assert r.text == u"ü"
|
||||
|
||||
def test_modify(self):
|
||||
r = tresp()
|
||||
|
||||
r.text = u"ü"
|
||||
assert r.raw_content == b"\xc3\xbc"
|
||||
|
||||
r.headers["content-type"] = "text/html; charset=latin1"
|
||||
r.text = u"ü"
|
||||
assert r.raw_content == b"\xfc"
|
||||
assert r.headers["content-length"] == "1"
|
||||
|
||||
def test_unknown_ce(self):
|
||||
r = tresp()
|
||||
r.headers["content-type"] = "text/html; charset=wtf"
|
||||
r.raw_content = b"foo"
|
||||
assert r.text == u"foo"
|
||||
|
||||
def test_cannot_decode(self):
|
||||
r = tresp()
|
||||
r.raw_content = b"\xFF"
|
||||
assert r.text == u'\ufffd' if six.PY2 else '\udcff'
|
||||
|
||||
def test_cannot_encode(self):
|
||||
r = tresp()
|
||||
r.content = None
|
||||
assert "content-type" not in r.headers
|
||||
assert r.raw_content is None
|
||||
|
||||
r.headers["content-type"] = "text/html; charset=latin1"
|
||||
r.text = u"☃"
|
||||
assert r.headers["content-type"] == "text/html; charset=utf-8"
|
||||
assert r.raw_content == b'\xe2\x98\x83'
|
||||
|
||||
r.headers["content-type"] = "text/html; charset=latin1"
|
||||
r.text = u'\udcff'
|
||||
assert r.headers["content-type"] == "text/html; charset=utf-8"
|
||||
assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF"
|
||||
|
||||
@@ -1,37 +1,39 @@
|
||||
from netlib import encoding
|
||||
from netlib import encoding, tutils
|
||||
|
||||
|
||||
def test_identity():
|
||||
assert b"string" == encoding.decode("identity", b"string")
|
||||
assert b"string" == encoding.encode("identity", b"string")
|
||||
assert not encoding.encode("nonexistent", b"string")
|
||||
assert not encoding.decode("nonexistent encoding", b"string")
|
||||
assert b"string" == encoding.decode(b"string", "identity")
|
||||
assert b"string" == encoding.encode(b"string", "identity")
|
||||
with tutils.raises(ValueError):
|
||||
encoding.encode(b"string", "nonexistent encoding")
|
||||
|
||||
|
||||
def test_gzip():
|
||||
assert b"string" == encoding.decode(
|
||||
"gzip",
|
||||
encoding.encode(
|
||||
"gzip",
|
||||
b"string"
|
||||
)
|
||||
b"string",
|
||||
"gzip"
|
||||
),
|
||||
"gzip"
|
||||
)
|
||||
assert encoding.decode("gzip", b"bogus") is None
|
||||
with tutils.raises(ValueError):
|
||||
encoding.decode(b"bogus", "gzip")
|
||||
|
||||
|
||||
def test_deflate():
|
||||
assert b"string" == encoding.decode(
|
||||
"deflate",
|
||||
encoding.encode(
|
||||
"deflate",
|
||||
b"string"
|
||||
)
|
||||
b"string",
|
||||
"deflate"
|
||||
),
|
||||
"deflate"
|
||||
)
|
||||
assert b"string" == encoding.decode(
|
||||
"deflate",
|
||||
encoding.encode(
|
||||
"deflate",
|
||||
b"string"
|
||||
)[2:-4]
|
||||
b"string",
|
||||
"deflate"
|
||||
)[2:-4],
|
||||
"deflate"
|
||||
)
|
||||
assert encoding.decode("deflate", b"bogus") is None
|
||||
with tutils.raises(ValueError):
|
||||
encoding.decode(b"bogus", "deflate")
|
||||
|
||||
Reference in New Issue
Block a user