Files
mitmproxy/mitmproxy/proxy/protocol/http.py
Aldo Cortesi 4eb2b56dec Let's not over-ride __bool__ on connection objects
If I had a thousand years and every thesaurus in the world, I still couldn't
adequately express how much I dislike this piece of interface design.
2016-11-04 10:59:41 +13:00

465 lines
18 KiB
Python

import h2.exceptions
import time
import traceback
from mitmproxy import exceptions
from mitmproxy import http
from mitmproxy import flow
from mitmproxy.proxy.protocol import base
from mitmproxy.proxy.protocol import websockets as pwebsockets
import mitmproxy.net.http
from mitmproxy.net import tcp
from mitmproxy.net import websockets
class _HttpTransmissionLayer(base.Layer):
def read_request_headers(self, flow):
raise NotImplementedError()
def read_request_body(self, request):
raise NotImplementedError()
def read_request(self, f):
request = self.read_request_headers(f)
request.data.content = b"".join(
self.read_request_body(request)
)
request.timestamp_end = time.time()
return request
def send_request(self, request):
raise NotImplementedError()
def read_response_headers(self):
raise NotImplementedError()
def read_response_body(self, request, response):
raise NotImplementedError()
yield "this is a generator" # pragma: no cover
def read_response(self, request):
response = self.read_response_headers()
response.data.content = b"".join(
self.read_response_body(request, response)
)
return response
def send_response(self, response):
if response.data.content is None:
raise exceptions.HttpException("Cannot assemble flow with missing content")
self.send_response_headers(response)
self.send_response_body(response, [response.data.content])
def send_response_headers(self, response):
raise NotImplementedError()
def send_response_body(self, response, chunks):
raise NotImplementedError()
def check_close_connection(self, f):
raise NotImplementedError()
class ConnectServerConnection:
"""
"Fake" ServerConnection to represent state after a CONNECT request to an upstream proxy.
"""
def __init__(self, address, ctx):
self.address = tcp.Address.wrap(address)
self._ctx = ctx
@property
def via(self):
return self._ctx.server_conn
def __getattr__(self, item):
return getattr(self.via, item)
def connected(self):
return self.via.connected()
class UpstreamConnectLayer(base.Layer):
def __init__(self, ctx, connect_request):
super().__init__(ctx)
self.connect_request = connect_request
self.server_conn = ConnectServerConnection(
(connect_request.host, connect_request.port),
self.ctx
)
def __call__(self):
layer = self.ctx.next_layer(self)
layer()
def _send_connect_request(self):
self.send_request(self.connect_request)
resp = self.read_response(self.connect_request)
if resp.status_code != 200:
raise exceptions.ProtocolException("Reconnect: Upstream server refuses CONNECT request")
def connect(self):
if not self.server_conn.connected():
self.ctx.connect()
self._send_connect_request()
else:
pass # swallow the message
def change_upstream_proxy_server(self, address):
if address != self.server_conn.via.address:
self.ctx.set_server(address)
def set_server(self, address):
if self.ctx.server_conn.connected():
self.ctx.disconnect()
address = tcp.Address.wrap(address)
self.connect_request.host = address.host
self.connect_request.port = address.port
self.server_conn.address = address
class HttpLayer(base.Layer):
def __init__(self, ctx, mode):
super().__init__(ctx)
self.mode = mode
self.flow = None # type: http.HTTPFlow
self.__initial_server_conn = None
"Contains the original destination in transparent mode, which needs to be restored"
"if an inline script modified the target server for a single http request"
# We cannot rely on server_conn.tls_established,
# see https://github.com/mitmproxy/mitmproxy/issues/925
self.__initial_server_tls = None
# Requests happening after CONNECT do not need Proxy-Authorization headers.
self.http_authenticated = False
def __call__(self):
if self.mode == "transparent":
self.__initial_server_tls = self.server_tls
self.__initial_server_conn = self.server_conn
while True:
self.flow = http.HTTPFlow(self.client_conn, self.server_conn, live=self)
if not self._process_flow(self.flow):
return
def _process_flow(self, f):
try:
request = self.get_request_from_client(f)
# Make sure that the incoming request matches our expectations
self.validate_request(request)
except exceptions.HttpReadDisconnect:
# don't throw an error for disconnects that happen before/between requests.
return False
except exceptions.HttpException as e:
# We optimistically guess there might be an HTTP client on the
# other end
self.send_error_response(400, repr(e))
raise exceptions.ProtocolException(
"HTTP protocol error in client request: {}".format(e)
)
self.log("request", "debug", [repr(request)])
# Handle Proxy Authentication
# Proxy Authentication conceptually does not work in transparent mode.
# We catch this misconfiguration on startup. Here, we sort out requests
# after a successful CONNECT request (which do not need to be validated anymore)
if not (self.http_authenticated or self.authenticate(request)):
return False
f.request = request
try:
# Regular Proxy Mode: Handle CONNECT
if self.mode == "regular" and request.first_line_format == "authority":
self.handle_regular_mode_connect(request)
return False
except (exceptions.ProtocolException, exceptions.NetlibException) as e:
# HTTPS tasting means that ordinary errors like resolution and
# connection errors can happen here.
self.send_error_response(502, repr(e))
f.error = flow.Error(str(e))
self.channel.ask("error", f)
return False
# update host header in reverse proxy mode
if self.config.options.mode == "reverse":
f.request.headers["Host"] = self.config.upstream_server.address.host
# set upstream auth
if self.mode == "upstream" and self.config.upstream_auth is not None:
f.request.headers["Proxy-Authorization"] = self.config.upstream_auth
self.process_request_hook(f)
try:
if websockets.check_handshake(request.headers) and websockets.check_client_version(request.headers):
# We only support RFC6455 with WebSockets version 13
# allow inline scripts to manipulate the client handshake
self.channel.ask("websocket_handshake", f)
if not f.response:
self.establish_server_connection(
f.request.host,
f.request.port,
f.request.scheme
)
self.get_response_from_server(f)
else:
# response was set by an inline script.
# we now need to emulate the responseheaders hook.
self.channel.ask("responseheaders", f)
self.log("response", "debug", [repr(f.response)])
self.channel.ask("response", f)
self.send_response_to_client(f)
if self.check_close_connection(f):
return False
# Handle 101 Switching Protocols
if f.response.status_code == 101:
self.handle_101_switching_protocols(f)
return False # should never be reached
# Upstream Proxy Mode: Handle CONNECT
if f.request.first_line_format == "authority" and f.response.status_code == 200:
self.handle_upstream_mode_connect(f.request.copy())
return False
except (exceptions.ProtocolException, exceptions.NetlibException) as e:
self.send_error_response(502, repr(e))
if not f.response:
f.error = flow.Error(str(e))
self.channel.ask("error", f)
return False
else:
raise exceptions.ProtocolException(
"Error in HTTP connection: %s" % repr(e)
)
finally:
if f:
f.live = False
return True
def get_request_from_client(self, f):
request = self.read_request(f)
f.request = request
self.channel.ask("requestheaders", f)
if request.headers.get("expect", "").lower() == "100-continue":
# TODO: We may have to use send_response_headers for HTTP2 here.
self.send_response(http.expect_continue_response)
request.headers.pop("expect")
request.body = b"".join(self.read_request_body(request))
request.timestamp_end = time.time()
return request
def send_error_response(self, code, message, headers=None):
try:
response = http.make_error_response(code, message, headers)
self.send_response(response)
except (exceptions.NetlibException, h2.exceptions.H2Error, exceptions.Http2ProtocolException):
self.log(traceback.format_exc(), "debug")
def change_upstream_proxy_server(self, address):
# Make set_upstream_proxy_server always available,
# even if there's no UpstreamConnectLayer
if address != self.server_conn.address:
return self.set_server(address)
def handle_regular_mode_connect(self, request):
self.http_authenticated = True
self.set_server((request.host, request.port))
self.send_response(http.make_connect_response(request.data.http_version))
layer = self.ctx.next_layer(self)
layer()
def handle_upstream_mode_connect(self, connect_request):
layer = UpstreamConnectLayer(self, connect_request)
layer()
def send_response_to_client(self, f):
if not f.response.stream:
# no streaming:
# we already received the full response from the server and can
# send it to the client straight away.
self.send_response(f.response)
else:
# streaming:
# First send the headers and then transfer the response incrementally
self.send_response_headers(f.response)
chunks = self.read_response_body(
f.request,
f.response
)
if callable(f.response.stream):
chunks = f.response.stream(chunks)
self.send_response_body(f.response, chunks)
f.response.timestamp_end = time.time()
def get_response_from_server(self, f):
def get_response():
self.send_request(f.request)
f.response = self.read_response_headers()
try:
get_response()
except exceptions.NetlibException as e:
self.log(
"server communication error: %s" % repr(e),
level="debug"
)
# In any case, we try to reconnect at least once. This is
# necessary because it might be possible that we already
# initiated an upstream connection after clientconnect that
# has already been expired, e.g consider the following event
# log:
# > clientconnect (transparent mode destination known)
# > serverconnect (required for client tls handshake)
# > read n% of large request
# > server detects timeout, disconnects
# > read (100-n)% of large request
# > send large request upstream
if isinstance(e, exceptions.Http2ProtocolException):
# do not try to reconnect for HTTP2
raise exceptions.ProtocolException("First and only attempt to get response via HTTP2 failed.")
self.disconnect()
self.connect()
get_response()
# call the appropriate script hook - this is an opportunity for an
# inline script to set f.stream = True
self.channel.ask("responseheaders", f)
if f.response.stream:
f.response.data.content = None
else:
f.response.data.content = b"".join(self.read_response_body(
f.request,
f.response
))
f.response.timestamp_end = time.time()
# no further manipulation of self.server_conn beyond this point
# we can safely set it as the final attribute value here.
f.server_conn = self.server_conn
def process_request_hook(self, f):
# Determine .scheme, .host and .port attributes for inline scripts.
# For absolute-form requests, they are directly given in the request.
# For authority-form requests, we only need to determine the request scheme.
# For relative-form requests, we need to determine host and port as
# well.
if self.mode == "regular":
pass # only absolute-form at this point, nothing to do here.
elif self.mode == "upstream":
pass
else:
# Setting request.host also updates the host header, which we want to preserve
host_header = f.request.headers.get("host", None)
f.request.host = self.__initial_server_conn.address.host
f.request.port = self.__initial_server_conn.address.port
if host_header:
f.request.headers["host"] = host_header
f.request.scheme = "https" if self.__initial_server_tls else "http"
self.channel.ask("request", f)
def establish_server_connection(self, host, port, scheme):
address = tcp.Address((host, port))
tls = (scheme == "https")
if self.mode == "regular" or self.mode == "transparent":
# If there's an existing connection that doesn't match our expectations, kill it.
if address != self.server_conn.address or tls != self.server_tls:
self.set_server(address)
self.set_server_tls(tls, address.host)
# Establish connection is neccessary.
if not self.server_conn.connected():
self.connect()
else:
if not self.server_conn.connected():
self.connect()
if tls:
raise exceptions.HttpProtocolException("Cannot change scheme in upstream proxy mode.")
"""
# This is a very ugly (untested) workaround to solve a very ugly problem.
if self.server_conn and self.server_conn.tls_established and not ssl:
self.disconnect()
self.connect()
elif ssl and not hasattr(self, "connected_to") or self.connected_to != address:
if self.server_conn.tls_established:
self.disconnect()
self.connect()
self.send_request(make_connect_request(address))
tls_layer = TlsLayer(self, False, True)
tls_layer._establish_tls_with_server()
"""
def validate_request(self, request):
if request.first_line_format == "absolute" and request.scheme != "http":
raise exceptions.HttpException("Invalid request scheme: %s" % request.scheme)
expected_request_forms = {
"regular": ("authority", "absolute",),
"upstream": ("authority", "absolute"),
"transparent": ("relative",)
}
allowed_request_forms = expected_request_forms[self.mode]
if request.first_line_format not in allowed_request_forms:
err_message = "Invalid HTTP request form (expected: %s, got: %s)" % (
" or ".join(allowed_request_forms), request.first_line_format
)
raise exceptions.HttpException(err_message)
if self.mode == "regular" and request.first_line_format == "absolute":
request.first_line_format = "relative"
def authenticate(self, request):
if self.config.authenticator:
if self.config.authenticator.authenticate(request.headers):
self.config.authenticator.clean(request.headers)
else:
if self.mode == "transparent":
self.send_response(http.make_error_response(
401,
"Authentication Required",
mitmproxy.net.http.Headers(**self.config.authenticator.auth_challenge_headers())
))
else:
self.send_response(http.make_error_response(
407,
"Proxy Authentication Required",
mitmproxy.net.http.Headers(**self.config.authenticator.auth_challenge_headers())
))
return False
return True
def handle_101_switching_protocols(self, f):
"""
Handle a successful HTTP 101 Switching Protocols Response, received after e.g. a WebSocket upgrade request.
"""
# Check for WebSockets handshake
is_websockets = (
f and
websockets.check_handshake(f.request.headers) and
websockets.check_handshake(f.response.headers)
)
if is_websockets and not self.config.options.websockets:
self.log(
"Client requested WebSocket connection, but the protocol is currently disabled in mitmproxy.",
"info"
)
if is_websockets and self.config.options.websockets:
layer = pwebsockets.WebSocketsLayer(self, f)
else:
layer = self.ctx.next_layer(self)
layer()