import io import os import socket import collections from urllib.parse import urlsplit import warnings __all__ = ["HTTPResponse", "HTTPConnection", "HTTPException", "NotConnected", "UnknownProtocol", "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", "BadStatusLine", "error", "responses"] HTTP_PORT = 80 HTTPS_PORT = 443 _UNKNOWN = 'UNKNOWN' # connection states _CS_IDLE = 'Idle' _CS_REQ_STARTED = 'Request-started' _CS_REQ_SENT = 'Request-sent' # status codes # informational CONTINUE = 100 SWITCHING_PROTOCOLS = 101 PROCESSING = 102 # successful OK = 200 CREATED = 201 ACCEPTED = 202 NON_AUTHORITATIVE_INFORMATION = 203 NO_CONTENT = 204 RESET_CONTENT = 205 PARTIAL_CONTENT = 206 MULTI_STATUS = 207 IM_USED = 226 # redirection MULTIPLE_CHOICES = 300 MOVED_PERMANENTLY = 301 FOUND = 302 SEE_OTHER = 303 NOT_MODIFIED = 304 USE_PROXY = 305 TEMPORARY_REDIRECT = 307 # client error BAD_REQUEST = 400 UNAUTHORIZED = 401 PAYMENT_REQUIRED = 402 FORBIDDEN = 403 NOT_FOUND = 404 METHOD_NOT_ALLOWED = 405 NOT_ACCEPTABLE = 406 PROXY_AUTHENTICATION_REQUIRED = 407 REQUEST_TIMEOUT = 408 CONFLICT = 409 GONE = 410 LENGTH_REQUIRED = 411 PRECONDITION_FAILED = 412 REQUEST_ENTITY_TOO_LARGE = 413 REQUEST_URI_TOO_LONG = 414 UNSUPPORTED_MEDIA_TYPE = 415 REQUESTED_RANGE_NOT_SATISFIABLE = 416 EXPECTATION_FAILED = 417 UNPROCESSABLE_ENTITY = 422 LOCKED = 423 FAILED_DEPENDENCY = 424 UPGRADE_REQUIRED = 426 PRECONDITION_REQUIRED = 428 TOO_MANY_REQUESTS = 429 REQUEST_HEADER_FIELDS_TOO_LARGE = 431 # server error INTERNAL_SERVER_ERROR = 500 NOT_IMPLEMENTED = 501 BAD_GATEWAY = 502 SERVICE_UNAVAILABLE = 503 GATEWAY_TIMEOUT = 504 HTTP_VERSION_NOT_SUPPORTED = 505 INSUFFICIENT_STORAGE = 507 NOT_EXTENDED = 510 NETWORK_AUTHENTICATION_REQUIRED = 511 # Mapping status codes to official W3C names responses = { 100: 'Continue', 101: 'Switching Protocols', 200: 'OK', 201: 'Created', 202: 'Accepted', 203: 'Non-Authoritative Information', 204: 'No Content', 205: 'Reset Content', 206: 'Partial Content', 300: 'Multiple Choices', 301: 'Moved Permanently', 302: 'Found', 303: 'See Other', 304: 'Not Modified', 305: 'Use Proxy', 306: '(Unused)', 307: 'Temporary Redirect', 400: 'Bad Request', 401: 'Unauthorized', 402: 'Payment Required', 403: 'Forbidden', 404: 'Not Found', 405: 'Method Not Allowed', 406: 'Not Acceptable', 407: 'Proxy Authentication Required', 408: 'Request Timeout', 409: 'Conflict', 410: 'Gone', 411: 'Length Required', 412: 'Precondition Failed', 413: 'Request Entity Too Large', 414: 'Request-URI Too Long', 415: 'Unsupported Media Type', 416: 'Requested Range Not Satisfiable', 417: 'Expectation Failed', 428: 'Precondition Required', 429: 'Too Many Requests', 431: 'Request Header Fields Too Large', 500: 'Internal Server Error', 501: 'Not Implemented', 502: 'Bad Gateway', 503: 'Service Unavailable', 504: 'Gateway Timeout', 505: 'HTTP Version Not Supported', 511: 'Network Authentication Required', } # maximal amount of data to read at one time in _safe_read MAXAMOUNT = 1048576 # maximal line length when calling readline(). _MAXLINE = 65536 _MAXHEADERS = 100 _strict_sentinel = object() class HTTPResponse: #class HTTPResponse(io.RawIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. # The bytes from the socket object are iso-8859-1 strings. # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded # text following RFC 2047. The basic status line parsing only # accepts iso-8859-1. def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): self.fp = sock.makefile("rb") self.debuglevel = debuglevel if strict is not _strict_sentinel: warnings.warn("the 'strict' argument isn't supported anymore; " "http.client now always assumes HTTP/1.x compliant servers.", DeprecationWarning, 2) self._method = method # The HTTPResponse object is returned via urllib. The clients # of http and urllib expect different attributes for the # headers. headers is used here and supports urllib. msg is # provided as a backwards compatibility layer for http # clients. self.headers = self.msg = None # from the Status-Line of the response self.version = _UNKNOWN # HTTP-Version self.status = _UNKNOWN # Status-Code self.reason = _UNKNOWN # Reason-Phrase self.chunked = _UNKNOWN # is "chunked" being used? self.chunk_left = _UNKNOWN # bytes left to read in current chunk self.length = _UNKNOWN # number of bytes left in response self.will_close = _UNKNOWN # conn will close at end of response def _read_status(self): line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") if len(line) > _MAXLINE: raise LineTooLong("status line") if self.debuglevel > 0: print("reply:", repr(line)) if not line: # Presumably, the server closed the connection before # sending a valid response. raise BadStatusLine(line) try: version, status, reason = line.split(None, 2) except ValueError: try: version, status = line.split(None, 1) reason = "" except ValueError: # empty version will cause next test to fail. version = "" if not version.startswith("HTTP/"): self._close_conn() raise BadStatusLine(line) # The status code is a three-digit number try: status = int(status) if status < 100 or status > 999: raise BadStatusLine(line) except ValueError: raise BadStatusLine(line) return version, status, reason def begin(self): if self.headers is not None: # we've already started reading the response return # read until we get a non-100 response while True: version, status, reason = self._read_status() if status != CONTINUE: break # skip the header from the 100 response while True: skip = self.fp.readline(_MAXLINE + 1) if len(skip) > _MAXLINE: raise LineTooLong("header line") skip = skip.strip() if not skip: break if self.debuglevel > 0: print("header:", skip) self.code = self.status = status self.reason = reason.strip() if version in ("HTTP/1.0", "HTTP/0.9"): # Some servers might still return "0.9", treat it as 1.0 anyway self.version = 10 elif version.startswith("HTTP/1."): self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 else: raise UnknownProtocol(version) #self.headers = self.msg = parse_headers(self.fp) self.headers = { 'Content-Length': '27', 'Server': 'WSGIServer/0.1 Python/2.7.6', 'Content-Type': 'text/html', 'X-Frame-Options': 'SAMEORIGIN', 'Vary': 'Cookie'} if self.debuglevel > 0: for hdr in self.headers: print("header:", hdr, self.headers[hdr]) # are we using the chunked-style of transfer encoding? tr_enc = self.headers.get("transfer-encoding") if tr_enc and tr_enc.lower() == "chunked": self.chunked = True self.chunk_left = None else: self.chunked = False # will the connection close at the end of the response? self.will_close = self._check_close() # do we have a Content-Length? # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" self.length = None length = self.headers.get("content-length") # are we using the chunked-style of transfer encoding? tr_enc = self.headers.get("transfer-encoding") if length and not self.chunked: try: self.length = int(length) except ValueError: self.length = None else: if self.length < 0: # ignore nonsensical negative lengths self.length = None else: self.length = None # does the body have a fixed length? (of zero) if (status == NO_CONTENT or status == NOT_MODIFIED or 100 <= status < 200 or # 1xx codes self._method == "HEAD"): self.length = 0 # if the connection remains open, and we aren't using chunked, and # a content-length was not provided, then assume that the connection # WILL close. if (not self.will_close and not self.chunked and self.length is None): self.will_close = True def _check_close(self): conn = self.headers.get("connection") if self.version == 11: # An HTTP/1.1 proxy is assumed to stay open unless # explicitly closed. conn = self.headers.get("connection") if conn and "close" in conn.lower(): return True return False # Some HTTP/1.0 implementations have support for persistent # connections, using rules different than HTTP/1.1. # For older HTTP, Keep-Alive indicates persistent connection. if self.headers.get("keep-alive"): return False # At least Akamai returns a "Connection: Keep-Alive" header, # which was supposed to be sent by the client. if conn and "keep-alive" in conn.lower(): return False # Proxy-Connection is a netscape hack. pconn = self.headers.get("proxy-connection") if pconn and "keep-alive" in pconn.lower(): return False # otherwise, assume it will close return True def _close_conn(self): fp = self.fp self.fp = None fp.close() def close(self): super().close() # set "closed" flag if self.fp: self._close_conn() # These implementations are for the benefit of io.BufferedReader. # XXX This class should probably be revised to act more like # the "raw stream" that BufferedReader expects. def flush(self): super().flush() if self.fp: self.fp.flush() def readable(self): return True def isclosed(self): return self.fp is None def read(self, amt=None): if self.fp is None: return b"" if self._method == "HEAD": self._close_conn() return b"" if amt is not None: # Amount is given, so call base class version # (which is implemented in terms of self.readinto) return super(HTTPResponse, self).read(amt) else: # Amount is not given (unbounded read) so we must check self.length # and self.chunked if self.chunked: return self._readall_chunked() if self.length is None: s = self.fp.read() else: try: s = self._safe_read(self.length) except IncompleteRead: self._close_conn() raise self.length = 0 self._close_conn() # we read everything return s def readinto(self, b): if self.fp is None: return 0 if self._method == "HEAD": self._close_conn() return 0 if self.chunked: return self._readinto_chunked(b) if self.length is not None: if len(b) > self.length: # clip the read to the "end of response" b = memoryview(b)[0:self.length] # we do not use _safe_read() here because this may be a .will_close # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) n = self.fp.readinto(b) if not n: # Ideally, we would raise IncompleteRead if the content-length # wasn't satisfied, but it might break compatibility. self._close_conn() elif self.length is not None: self.length -= n if not self.length: self._close_conn() return n def _read_next_chunk_size(self): # Read the next chunk size from the file line = self.fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise LineTooLong("chunk size") i = line.find(b";") if i >= 0: line = line[:i] # strip chunk-extensions try: return int(line, 16) except ValueError: # close the connection as protocol synchronisation is # probably lost self._close_conn() raise def _read_and_discard_trailer(self): # read and discard trailer up to the CRLF terminator ### note: we shouldn't have any trailers! while True: line = self.fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise LineTooLong("trailer line") if not line: # a vanishingly small number of sites EOF without # sending the trailer break if line in (b'\r\n', b'\n', b''): break def _readall_chunked(self): assert self.chunked != _UNKNOWN chunk_left = self.chunk_left value = [] while True: if chunk_left is None: try: chunk_left = self._read_next_chunk_size() if chunk_left == 0: break except ValueError: raise IncompleteRead(b''.join(value)) value.append(self._safe_read(chunk_left)) # we read the whole chunk, get another self._safe_read(2) # toss the CRLF at the end of the chunk chunk_left = None self._read_and_discard_trailer() # we read everything; close the "file" self._close_conn() return b''.join(value) def _readinto_chunked(self, b): assert self.chunked != _UNKNOWN chunk_left = self.chunk_left total_bytes = 0 mvb = memoryview(b) while True: if chunk_left is None: try: chunk_left = self._read_next_chunk_size() if chunk_left == 0: break except ValueError: raise IncompleteRead(bytes(b[0:total_bytes])) if len(mvb) < chunk_left: n = self._safe_readinto(mvb) self.chunk_left = chunk_left - n return total_bytes + n elif len(mvb) == chunk_left: n = self._safe_readinto(mvb) self._safe_read(2) # toss the CRLF at the end of the chunk self.chunk_left = None return total_bytes + n else: temp_mvb = mvb[0:chunk_left] n = self._safe_readinto(temp_mvb) mvb = mvb[n:] total_bytes += n # we read the whole chunk, get another self._safe_read(2) # toss the CRLF at the end of the chunk chunk_left = None self._read_and_discard_trailer() # we read everything; close the "file" self._close_conn() return total_bytes def _safe_read(self, amt): s = [] while amt > 0: chunk = self.fp.read(min(amt, MAXAMOUNT)) if not chunk: raise IncompleteRead(b''.join(s), amt) s.append(chunk) amt -= len(chunk) return b"".join(s) def _safe_readinto(self, b): total_bytes = 0 mvb = memoryview(b) while total_bytes < len(b): if MAXAMOUNT < len(mvb): temp_mvb = mvb[0:MAXAMOUNT] n = self.fp.readinto(temp_mvb) else: n = self.fp.readinto(mvb) if not n: raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) mvb = mvb[n:] total_bytes += n return total_bytes def fileno(self): return self.fp.fileno() def getheader(self, name, default=None): if self.headers is None: raise ResponseNotReady() headers = self.headers.get_all(name) or default if isinstance(headers, str) or not hasattr(headers, '__iter__'): return headers else: return ', '.join(headers) def getheaders(self): if self.headers is None: raise ResponseNotReady() return list(self.headers.items()) def __iter__(self): return self # For compatibility with old-style urllib responses. def info(self): return self.headers def geturl(self): return self.url def getcode(self): return self.status class HTTPConnection: _http_vsn = 11 _http_vsn_str = 'HTTP/1.1' response_class = HTTPResponse default_port = HTTP_PORT auto_open = 1 debuglevel = 0 def __init__(self, host, port=None, strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): if strict is not _strict_sentinel: warnings.warn("the 'strict' argument isn't supported anymore; " "http.client now always assumes HTTP/1.x compliant servers.", DeprecationWarning, 2) self.timeout = timeout self.source_address = source_address self.sock = None self._buffer = [] self.__response = None self.__state = _CS_IDLE self._method = None self._tunnel_host = None self._tunnel_port = None self._tunnel_headers = {} self._set_hostport(host, port) def set_tunnel(self, host, port=None, headers=None): self._tunnel_host = host self._tunnel_port = port if headers: self._tunnel_headers = headers else: self._tunnel_headers.clear() def _set_hostport(self, host, port): if port is None: i = host.rfind(':') j = host.rfind(']') # ipv6 addresses have [...] if i > j: try: port = int(host[i+1:]) except ValueError: if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ port = self.default_port else: raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) host = host[:i] else: port = self.default_port if host and host[0] == '[' and host[-1] == ']': host = host[1:-1] self.host = host self.port = port def set_debuglevel(self, level): self.debuglevel = level def _tunnel(self): self._set_hostport(self._tunnel_host, self._tunnel_port) connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) connect_bytes = connect_str.encode("ascii") self.send(connect_bytes) for header, value in self._tunnel_headers.items(): header_str = "%s: %s\r\n" % (header, value) header_bytes = header_str.encode("latin-1") self.send(header_bytes) self.send(b'\r\n') response = self.response_class(self.sock, method=self._method) (version, code, message) = response._read_status() if code != 200: self.close() raise socket.error("Tunnel connection failed: %d %s" % (code, message.strip())) while True: line = response.fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise LineTooLong("header line") if not line: break if line in (b'\r\n', b'\n', b''): break def connect(self): self.sock = socket.create_connection((self.host, self.port), self.timeout, self.source_address) if self._tunnel_host: self._tunnel() def close(self): if self.sock: self.sock.close() self.sock = None if self.__response: self.__response.close() self.__response = None self.__state = _CS_IDLE def send(self, data): if self.sock is None: if self.auto_open: self.connect() else: raise NotConnected() if self.debuglevel > 0: print("send:", repr(data)) blocksize = 8192 if hasattr(data, "read") : if self.debuglevel > 0: print("sendIng a read()able") encode = False try: mode = data.mode except AttributeError: # io.BytesIO and other file-like objects don't have a `mode` # attribute. pass else: if "b" not in mode: encode = True if self.debuglevel > 0: print("encoding file using iso-8859-1") while 1: datablock = data.read(blocksize) if not datablock: break if encode: datablock = datablock.encode("iso-8859-1") self.sock.sendall(datablock) return try: self.sock.sendall(data) except TypeError: if isinstance(data, collections.Iterable): for d in data: self.sock.sendall(d) else: raise TypeError("data should be a bytes-like object " "or an iterable, got %r" % type(data)) def _output(self, s): self._buffer.append(s) def _send_output(self, message_body=None): self._buffer.extend((b"", b"")) msg = b"\r\n".join(self._buffer) del self._buffer[:] if isinstance(message_body, bytes): msg += message_body message_body = None self.send(msg) if message_body is not None: self.send(message_body) def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): if self.__response and self.__response.isclosed(): self.__response = None if self.__state == _CS_IDLE: self.__state = _CS_REQ_STARTED else: raise CannotSendRequest(self.__state) self._method = method if not url: url = '/' request = '%s %s %s' % (method, url, self._http_vsn_str) self._output(request.encode('ascii')) if self._http_vsn == 11: if not skip_host: netloc = '' if url.startswith('http'): nil, netloc, nil, nil, nil = urlsplit(url) if netloc: try: netloc_enc = netloc.encode("ascii") except UnicodeEncodeError: netloc_enc = netloc.encode("idna") self.putheader('Host', netloc_enc) else: try: host_enc = self.host.encode("ascii") except UnicodeEncodeError: host_enc = self.host.encode("idna") # As per RFC 273, IPv6 address should be wrapped with [] # when used as Host header if self.host.find(':') >= 0: host_enc = b'[' + host_enc + b']' if self.port == self.default_port: self.putheader('Host', host_enc) else: host_enc = host_enc.decode("ascii") self.putheader('Host', "%s:%s" % (host_enc, self.port)) if not skip_accept_encoding: self.putheader('Accept-Encoding', 'identity') else: pass def putheader(self, header, *values): if self.__state != _CS_REQ_STARTED: raise CannotSendHeader() if hasattr(header, 'encode'): header = header.encode('ascii') values = list(values) for i, one_value in enumerate(values): if isinstance(one_value, str): values[i] = one_value.encode('latin-1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') value = b'\r\n\t'.join(values) header = header + b': ' + value self._output(header) def endheaders(self, message_body=None): if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT else: raise CannotSendHeader() self._send_output(message_body) def request(self, method, url, body=None, headers={}): self._send_request(method, url, body, headers) def _set_content_length(self, body): thelen = None try: thelen = str(len(body)) except TypeError as te: # If this is a file-like object, try to # fstat its file descriptor try: thelen = str(os.fstat(body.fileno()).st_size) except (AttributeError, OSError): # Don't send a length if this failed if self.debuglevel > 0: print("Cannot stat!!") if thelen is not None: self.putheader('Content-Length', thelen) def _send_request(self, method, url, body, headers): # Honor explicitly requested Host: and Accept-Encoding: headers. header_names = dict.fromkeys([k.lower() for k in headers]) skips = {} if 'host' in header_names: skips['skip_host'] = 1 if 'accept-encoding' in header_names: skips['skip_accept_encoding'] = 1 self.putrequest(method, url, **skips) if body is not None and ('content-length' not in header_names): self._set_content_length(body) for hdr, value in headers.items(): self.putheader(hdr, value) if isinstance(body, str): # RFC 2616 Section 3.7.1 says that text default has a # default charset of iso-8859-1. body = body.encode('iso-8859-1') self.endheaders(body) def getresponse(self): # if a prior response has been completed, then forget about it. if self.__response and self.__response.isclosed(): self.__response = None if self.__state != _CS_REQ_SENT or self.__response: raise ResponseNotReady(self.__state) if self.debuglevel > 0: response = self.response_class(self.sock, self.debuglevel, method=self._method) else: response = self.response_class(self.sock, method=self._method) response.begin() assert response.will_close != _UNKNOWN self.__state = _CS_IDLE if response.will_close: # this effectively passes the connection to the response self.close() else: # remember this, so we can tell when it is complete self.__response = response return response try: import ssl except ImportError: pass else: class HTTPSConnection(HTTPConnection): "This class allows communication via SSL." default_port = HTTPS_PORT # XXX Should key_file and cert_file be deprecated in favour of context? def __init__(self, host, port=None, key_file=None, cert_file=None, strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None, *, context=None, check_hostname=None): super(HTTPSConnection, self).__init__(host, port, strict, timeout, source_address) self.key_file = key_file self.cert_file = cert_file if context is None: # Some reasonable defaults context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 will_verify = context.verify_mode != ssl.CERT_NONE if check_hostname is None: check_hostname = will_verify elif check_hostname and not will_verify: raise ValueError("check_hostname needs a SSL context with " "either CERT_OPTIONAL or CERT_REQUIRED") if key_file or cert_file: context.load_cert_chain(cert_file, key_file) self._context = context self._check_hostname = check_hostname def connect(self): "Connect to a host on a given (SSL) port." sock = socket.create_connection((self.host, self.port), self.timeout, self.source_address) if self._tunnel_host: self.sock = sock self._tunnel() server_hostname = self.host if ssl.HAS_SNI else None self.sock = self._context.wrap_socket(sock, server_hostname=server_hostname) try: if self._check_hostname: ssl.match_hostname(self.sock.getpeercert(), self.host) except Exception: self.sock.shutdown(socket.SHUT_RDWR) self.sock.close() raise __all__.append("HTTPSConnection") class HTTPException(Exception): # Subclasses that define an __init__ must call Exception.__init__ # or define self.args. Otherwise, str() will fail. pass class NotConnected(HTTPException): pass class InvalidURL(HTTPException): pass class UnknownProtocol(HTTPException): def __init__(self, version): self.args = version, self.version = version class UnknownTransferEncoding(HTTPException): pass class UnimplementedFileMode(HTTPException): pass class IncompleteRead(HTTPException): def __init__(self, partial, expected=None): self.args = partial, self.partial = partial self.expected = expected def __repr__(self): if self.expected is not None: e = ', %i more expected' % self.expected else: e = '' return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) def __str__(self): return repr(self) class ImproperConnectionState(HTTPException): pass class CannotSendRequest(ImproperConnectionState): pass class CannotSendHeader(ImproperConnectionState): pass class ResponseNotReady(ImproperConnectionState): pass class BadStatusLine(HTTPException): def __init__(self, line): if not line: line = repr(line) self.args = line, self.line = line class LineTooLong(HTTPException): def __init__(self, line_type): HTTPException.__init__(self, "got more than %d bytes when reading %s" % (_MAXLINE, line_type)) # for backwards compatibility error = HTTPException