client.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338
  1. """HTTP/1.1 client library
  2. <intro stuff goes here>
  3. <other stuff, too>
  4. HTTPConnection goes through a number of "states", which define when a client
  5. may legally make another request or fetch the response for a particular
  6. request. This diagram details these state transitions:
  7. (null)
  8. |
  9. | HTTPConnection()
  10. v
  11. Idle
  12. |
  13. | putrequest()
  14. v
  15. Request-started
  16. |
  17. | ( putheader() )* endheaders()
  18. v
  19. Request-sent
  20. |\_____________________________
  21. | | getresponse() raises
  22. | response = getresponse() | ConnectionError
  23. v v
  24. Unread-response Idle
  25. [Response-headers-read]
  26. |\____________________
  27. | |
  28. | response.read() | putrequest()
  29. v v
  30. Idle Req-started-unread-response
  31. ______/|
  32. / |
  33. response.read() | | ( putheader() )* endheaders()
  34. v v
  35. Request-started Req-sent-unread-response
  36. |
  37. | response.read()
  38. v
  39. Request-sent
  40. This diagram presents the following rules:
  41. -- a second request may not be started until {response-headers-read}
  42. -- a response [object] cannot be retrieved until {request-sent}
  43. -- there is no differentiation between an unread response body and a
  44. partially read response body
  45. Note: this enforcement is applied by the HTTPConnection class. The
  46. HTTPResponse class does not enforce this state machine, which
  47. implies sophisticated clients may accelerate the request/response
  48. pipeline. Caution should be taken, though: accelerating the states
  49. beyond the above pattern may imply knowledge of the server's
  50. connection-close behavior for certain requests. For example, it
  51. is impossible to tell whether the server will close the connection
  52. UNTIL the response headers have been read; this means that further
  53. requests cannot be placed into the pipeline until it is known that
  54. the server will NOT be closing the connection.
  55. Logical State __state __response
  56. ------------- ------- ----------
  57. Idle _CS_IDLE None
  58. Request-started _CS_REQ_STARTED None
  59. Request-sent _CS_REQ_SENT None
  60. Unread-response _CS_IDLE <response_class>
  61. Req-started-unread-response _CS_REQ_STARTED <response_class>
  62. Req-sent-unread-response _CS_REQ_SENT <response_class>
  63. """
  64. import email.parser
  65. import email.message
  66. import http
  67. import io
  68. import os
  69. import re
  70. import socket
  71. import collections
  72. from urllib.parse import urlsplit
  73. # HTTPMessage, parse_headers(), and the HTTP status code constants are
  74. # intentionally omitted for simplicity
  75. __all__ = ["HTTPResponse", "HTTPConnection",
  76. "HTTPException", "NotConnected", "UnknownProtocol",
  77. "UnknownTransferEncoding", "UnimplementedFileMode",
  78. "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  79. "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  80. "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
  81. "responses"]
  82. HTTP_PORT = 80
  83. HTTPS_PORT = 443
  84. _UNKNOWN = 'UNKNOWN'
  85. # connection states
  86. _CS_IDLE = 'Idle'
  87. _CS_REQ_STARTED = 'Request-started'
  88. _CS_REQ_SENT = 'Request-sent'
  89. # hack to maintain backwards compatibility
  90. globals().update(http.HTTPStatus.__members__)
  91. # another hack to maintain backwards compatibility
  92. # Mapping status codes to official W3C names
  93. responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
  94. # maximal amount of data to read at one time in _safe_read
  95. MAXAMOUNT = 1048576
  96. # maximal line length when calling readline().
  97. _MAXLINE = 65536
  98. _MAXHEADERS = 100
  99. # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
  100. #
  101. # VCHAR = %x21-7E
  102. # obs-text = %x80-FF
  103. # header-field = field-name ":" OWS field-value OWS
  104. # field-name = token
  105. # field-value = *( field-content / obs-fold )
  106. # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  107. # field-vchar = VCHAR / obs-text
  108. #
  109. # obs-fold = CRLF 1*( SP / HTAB )
  110. # ; obsolete line folding
  111. # ; see Section 3.2.4
  112. # token = 1*tchar
  113. #
  114. # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
  115. # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
  116. # / DIGIT / ALPHA
  117. # ; any VCHAR, except delimiters
  118. #
  119. # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
  120. # the patterns for both name and value are more leniant than RFC
  121. # definitions to allow for backwards compatibility
  122. _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
  123. _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
  124. # We always set the Content-Length header for these methods because some
  125. # servers will otherwise respond with a 411
  126. _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
  127. def _encode(data, name='data'):
  128. """Call data.encode("latin-1") but show a better error message."""
  129. try:
  130. return data.encode("latin-1")
  131. except UnicodeEncodeError as err:
  132. raise UnicodeEncodeError(
  133. err.encoding,
  134. err.object,
  135. err.start,
  136. err.end,
  137. "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
  138. "if you want to send it encoded in UTF-8." %
  139. (name.title(), data[err.start:err.end], name)) from None
  140. class HTTPMessage(email.message.Message):
  141. # XXX The only usage of this method is in
  142. # http.server.CGIHTTPRequestHandler. Maybe move the code there so
  143. # that it doesn't need to be part of the public API. The API has
  144. # never been defined so this could cause backwards compatibility
  145. # issues.
  146. def getallmatchingheaders(self, name):
  147. """Find all header lines matching a given header name.
  148. Look through the list of headers and find all lines matching a given
  149. header name (and their continuation lines). A list of the lines is
  150. returned, without interpretation. If the header does not occur, an
  151. empty list is returned. If the header occurs multiple times, all
  152. occurrences are returned. Case is not important in the header name.
  153. """
  154. name = name.lower() + ':'
  155. n = len(name)
  156. lst = []
  157. hit = 0
  158. for line in self.keys():
  159. if line[:n].lower() == name:
  160. hit = 1
  161. elif not line[:1].isspace():
  162. hit = 0
  163. if hit:
  164. lst.append(line)
  165. return lst
  166. def parse_headers(fp, _class=HTTPMessage):
  167. """Parses only RFC2822 headers from a file pointer.
  168. email Parser wants to see strings rather than bytes.
  169. But a TextIOWrapper around self.rfile would buffer too many bytes
  170. from the stream, bytes which we later need to read as bytes.
  171. So we read the correct bytes here, as bytes, for email Parser
  172. to parse.
  173. """
  174. headers = []
  175. while True:
  176. line = fp.readline(_MAXLINE + 1)
  177. if len(line) > _MAXLINE:
  178. raise LineTooLong("header line")
  179. headers.append(line)
  180. if len(headers) > _MAXHEADERS:
  181. raise HTTPException("got more than %d headers" % _MAXHEADERS)
  182. if line in (b'\r\n', b'\n', b''):
  183. break
  184. hstring = b''.join(headers).decode('iso-8859-1')
  185. return email.parser.Parser(_class=_class).parsestr(hstring)
  186. class HTTPResponse(io.BufferedIOBase):
  187. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
  188. # The bytes from the socket object are iso-8859-1 strings.
  189. # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
  190. # text following RFC 2047. The basic status line parsing only
  191. # accepts iso-8859-1.
  192. def __init__(self, sock, debuglevel=0, method=None, url=None):
  193. # If the response includes a content-length header, we need to
  194. # make sure that the client doesn't read more than the
  195. # specified number of bytes. If it does, it will block until
  196. # the server times out and closes the connection. This will
  197. # happen if a self.fp.read() is done (without a size) whether
  198. # self.fp is buffered or not. So, no self.fp.read() by
  199. # clients unless they know what they are doing.
  200. self.fp = sock.makefile("rb")
  201. self.debuglevel = debuglevel
  202. self._method = method
  203. # The HTTPResponse object is returned via urllib. The clients
  204. # of http and urllib expect different attributes for the
  205. # headers. headers is used here and supports urllib. msg is
  206. # provided as a backwards compatibility layer for http
  207. # clients.
  208. self.headers = self.msg = None
  209. # from the Status-Line of the response
  210. self.version = _UNKNOWN # HTTP-Version
  211. self.status = _UNKNOWN # Status-Code
  212. self.reason = _UNKNOWN # Reason-Phrase
  213. self.chunked = _UNKNOWN # is "chunked" being used?
  214. self.chunk_left = _UNKNOWN # bytes left to read in current chunk
  215. self.length = _UNKNOWN # number of bytes left in response
  216. self.will_close = _UNKNOWN # conn will close at end of response
  217. def _read_status(self):
  218. line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  219. if len(line) > _MAXLINE:
  220. raise LineTooLong("status line")
  221. if self.debuglevel > 0:
  222. print("reply:", repr(line))
  223. if not line:
  224. # Presumably, the server closed the connection before
  225. # sending a valid response.
  226. raise RemoteDisconnected("Remote end closed connection without"
  227. " response")
  228. try:
  229. version, status, reason = line.split(None, 2)
  230. except ValueError:
  231. try:
  232. version, status = line.split(None, 1)
  233. reason = ""
  234. except ValueError:
  235. # empty version will cause next test to fail.
  236. version = ""
  237. if not version.startswith("HTTP/"):
  238. self._close_conn()
  239. raise BadStatusLine(line)
  240. # The status code is a three-digit number
  241. try:
  242. status = int(status)
  243. if status < 100 or status > 999:
  244. raise BadStatusLine(line)
  245. except ValueError:
  246. raise BadStatusLine(line)
  247. return version, status, reason
  248. def begin(self):
  249. if self.headers is not None:
  250. # we've already started reading the response
  251. return
  252. # read until we get a non-100 response
  253. while True:
  254. version, status, reason = self._read_status()
  255. if status != CONTINUE:
  256. break
  257. # skip the header from the 100 response
  258. while True:
  259. skip = self.fp.readline(_MAXLINE + 1)
  260. if len(skip) > _MAXLINE:
  261. raise LineTooLong("header line")
  262. skip = skip.strip()
  263. if not skip:
  264. break
  265. if self.debuglevel > 0:
  266. print("header:", skip)
  267. self.code = self.status = status
  268. self.reason = reason.strip()
  269. if version in ("HTTP/1.0", "HTTP/0.9"):
  270. # Some servers might still return "0.9", treat it as 1.0 anyway
  271. self.version = 10
  272. elif version.startswith("HTTP/1."):
  273. self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
  274. else:
  275. raise UnknownProtocol(version)
  276. self.headers = self.msg = parse_headers(self.fp)
  277. if self.debuglevel > 0:
  278. for hdr in self.headers:
  279. print("header:", hdr, end=" ")
  280. # are we using the chunked-style of transfer encoding?
  281. tr_enc = self.headers.get("transfer-encoding")
  282. if tr_enc and tr_enc.lower() == "chunked":
  283. self.chunked = True
  284. self.chunk_left = None
  285. else:
  286. self.chunked = False
  287. # will the connection close at the end of the response?
  288. self.will_close = self._check_close()
  289. # do we have a Content-Length?
  290. # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
  291. self.length = None
  292. length = self.headers.get("content-length")
  293. # are we using the chunked-style of transfer encoding?
  294. tr_enc = self.headers.get("transfer-encoding")
  295. if length and not self.chunked:
  296. try:
  297. self.length = int(length)
  298. except ValueError:
  299. self.length = None
  300. else:
  301. if self.length < 0: # ignore nonsensical negative lengths
  302. self.length = None
  303. else:
  304. self.length = None
  305. # does the body have a fixed length? (of zero)
  306. if (status == NO_CONTENT or status == NOT_MODIFIED or
  307. 100 <= status < 200 or # 1xx codes
  308. self._method == "HEAD"):
  309. self.length = 0
  310. # if the connection remains open, and we aren't using chunked, and
  311. # a content-length was not provided, then assume that the connection
  312. # WILL close.
  313. if (not self.will_close and
  314. not self.chunked and
  315. self.length is None):
  316. self.will_close = True
  317. def _check_close(self):
  318. conn = self.headers.get("connection")
  319. if self.version == 11:
  320. # An HTTP/1.1 proxy is assumed to stay open unless
  321. # explicitly closed.
  322. conn = self.headers.get("connection")
  323. if conn and "close" in conn.lower():
  324. return True
  325. return False
  326. # Some HTTP/1.0 implementations have support for persistent
  327. # connections, using rules different than HTTP/1.1.
  328. # For older HTTP, Keep-Alive indicates persistent connection.
  329. if self.headers.get("keep-alive"):
  330. return False
  331. # At least Akamai returns a "Connection: Keep-Alive" header,
  332. # which was supposed to be sent by the client.
  333. if conn and "keep-alive" in conn.lower():
  334. return False
  335. # Proxy-Connection is a netscape hack.
  336. pconn = self.headers.get("proxy-connection")
  337. if pconn and "keep-alive" in pconn.lower():
  338. return False
  339. # otherwise, assume it will close
  340. return True
  341. def _close_conn(self):
  342. fp = self.fp
  343. self.fp = None
  344. fp.close()
  345. def close(self):
  346. try:
  347. super().close() # set "closed" flag
  348. finally:
  349. if self.fp:
  350. self._close_conn()
  351. # These implementations are for the benefit of io.BufferedReader.
  352. # XXX This class should probably be revised to act more like
  353. # the "raw stream" that BufferedReader expects.
  354. def flush(self):
  355. super().flush()
  356. if self.fp:
  357. self.fp.flush()
  358. def readable(self):
  359. return True
  360. # End of "raw stream" methods
  361. def isclosed(self):
  362. """True if the connection is closed."""
  363. # NOTE: it is possible that we will not ever call self.close(). This
  364. # case occurs when will_close is TRUE, length is None, and we
  365. # read up to the last byte, but NOT past it.
  366. #
  367. # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
  368. # called, meaning self.isclosed() is meaningful.
  369. return self.fp is None
  370. def read(self, amt=None):
  371. if self.fp is None:
  372. return b""
  373. if self._method == "HEAD":
  374. self._close_conn()
  375. return b""
  376. if amt is not None:
  377. # Amount is given, implement using readinto
  378. b = bytearray(amt)
  379. n = self.readinto(b)
  380. return memoryview(b)[:n].tobytes()
  381. else:
  382. # Amount is not given (unbounded read) so we must check self.length
  383. # and self.chunked
  384. if self.chunked:
  385. return self._readall_chunked()
  386. if self.length is None:
  387. s = self.fp.read()
  388. else:
  389. try:
  390. s = self._safe_read(self.length)
  391. except IncompleteRead:
  392. self._close_conn()
  393. raise
  394. self.length = 0
  395. self._close_conn() # we read everything
  396. return s
  397. def readinto(self, b):
  398. if self.fp is None:
  399. return 0
  400. if self._method == "HEAD":
  401. self._close_conn()
  402. return 0
  403. if self.chunked:
  404. return self._readinto_chunked(b)
  405. if self.length is not None:
  406. if len(b) > self.length:
  407. # clip the read to the "end of response"
  408. b = memoryview(b)[0:self.length]
  409. # we do not use _safe_read() here because this may be a .will_close
  410. # connection, and the user is reading more bytes than will be provided
  411. # (for example, reading in 1k chunks)
  412. n = self.fp.readinto(b)
  413. if not n and b:
  414. # Ideally, we would raise IncompleteRead if the content-length
  415. # wasn't satisfied, but it might break compatibility.
  416. self._close_conn()
  417. elif self.length is not None:
  418. self.length -= n
  419. if not self.length:
  420. self._close_conn()
  421. return n
  422. def _read_next_chunk_size(self):
  423. # Read the next chunk size from the file
  424. line = self.fp.readline(_MAXLINE + 1)
  425. if len(line) > _MAXLINE:
  426. raise LineTooLong("chunk size")
  427. i = line.find(b";")
  428. if i >= 0:
  429. line = line[:i] # strip chunk-extensions
  430. try:
  431. return int(line, 16)
  432. except ValueError:
  433. # close the connection as protocol synchronisation is
  434. # probably lost
  435. self._close_conn()
  436. raise
  437. def _read_and_discard_trailer(self):
  438. # read and discard trailer up to the CRLF terminator
  439. ### note: we shouldn't have any trailers!
  440. while True:
  441. line = self.fp.readline(_MAXLINE + 1)
  442. if len(line) > _MAXLINE:
  443. raise LineTooLong("trailer line")
  444. if not line:
  445. # a vanishingly small number of sites EOF without
  446. # sending the trailer
  447. break
  448. if line in (b'\r\n', b'\n', b''):
  449. break
  450. def _get_chunk_left(self):
  451. # return self.chunk_left, reading a new chunk if necessary.
  452. # chunk_left == 0: at the end of the current chunk, need to close it
  453. # chunk_left == None: No current chunk, should read next.
  454. # This function returns non-zero or None if the last chunk has
  455. # been read.
  456. chunk_left = self.chunk_left
  457. if not chunk_left: # Can be 0 or None
  458. if chunk_left is not None:
  459. # We are at the end of chunk. dicard chunk end
  460. self._safe_read(2) # toss the CRLF at the end of the chunk
  461. try:
  462. chunk_left = self._read_next_chunk_size()
  463. except ValueError:
  464. raise IncompleteRead(b'')
  465. if chunk_left == 0:
  466. # last chunk: 1*("0") [ chunk-extension ] CRLF
  467. self._read_and_discard_trailer()
  468. # we read everything; close the "file"
  469. self._close_conn()
  470. chunk_left = None
  471. self.chunk_left = chunk_left
  472. return chunk_left
  473. def _readall_chunked(self):
  474. assert self.chunked != _UNKNOWN
  475. value = []
  476. try:
  477. while True:
  478. chunk_left = self._get_chunk_left()
  479. if chunk_left is None:
  480. break
  481. value.append(self._safe_read(chunk_left))
  482. self.chunk_left = 0
  483. return b''.join(value)
  484. except IncompleteRead:
  485. raise IncompleteRead(b''.join(value))
  486. def _readinto_chunked(self, b):
  487. assert self.chunked != _UNKNOWN
  488. total_bytes = 0
  489. mvb = memoryview(b)
  490. try:
  491. while True:
  492. chunk_left = self._get_chunk_left()
  493. if chunk_left is None:
  494. return total_bytes
  495. if len(mvb) <= chunk_left:
  496. n = self._safe_readinto(mvb)
  497. self.chunk_left = chunk_left - n
  498. return total_bytes + n
  499. temp_mvb = mvb[:chunk_left]
  500. n = self._safe_readinto(temp_mvb)
  501. mvb = mvb[n:]
  502. total_bytes += n
  503. self.chunk_left = 0
  504. except IncompleteRead:
  505. raise IncompleteRead(bytes(b[0:total_bytes]))
  506. def _safe_read(self, amt):
  507. """Read the number of bytes requested, compensating for partial reads.
  508. Normally, we have a blocking socket, but a read() can be interrupted
  509. by a signal (resulting in a partial read).
  510. Note that we cannot distinguish between EOF and an interrupt when zero
  511. bytes have been read. IncompleteRead() will be raised in this
  512. situation.
  513. This function should be used when <amt> bytes "should" be present for
  514. reading. If the bytes are truly not available (due to EOF), then the
  515. IncompleteRead exception can be used to detect the problem.
  516. """
  517. s = []
  518. while amt > 0:
  519. chunk = self.fp.read(min(amt, MAXAMOUNT))
  520. if not chunk:
  521. raise IncompleteRead(b''.join(s), amt)
  522. s.append(chunk)
  523. amt -= len(chunk)
  524. return b"".join(s)
  525. def _safe_readinto(self, b):
  526. """Same as _safe_read, but for reading into a buffer."""
  527. total_bytes = 0
  528. mvb = memoryview(b)
  529. while total_bytes < len(b):
  530. if MAXAMOUNT < len(mvb):
  531. temp_mvb = mvb[0:MAXAMOUNT]
  532. n = self.fp.readinto(temp_mvb)
  533. else:
  534. n = self.fp.readinto(mvb)
  535. if not n:
  536. raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
  537. mvb = mvb[n:]
  538. total_bytes += n
  539. return total_bytes
  540. def read1(self, n=-1):
  541. """Read with at most one underlying system call. If at least one
  542. byte is buffered, return that instead.
  543. """
  544. if self.fp is None or self._method == "HEAD":
  545. return b""
  546. if self.chunked:
  547. return self._read1_chunked(n)
  548. if self.length is not None and (n < 0 or n > self.length):
  549. n = self.length
  550. try:
  551. result = self.fp.read1(n)
  552. except ValueError:
  553. if n >= 0:
  554. raise
  555. # some implementations, like BufferedReader, don't support -1
  556. # Read an arbitrarily selected largeish chunk.
  557. result = self.fp.read1(16*1024)
  558. if not result and n:
  559. self._close_conn()
  560. elif self.length is not None:
  561. self.length -= len(result)
  562. return result
  563. def peek(self, n=-1):
  564. # Having this enables IOBase.readline() to read more than one
  565. # byte at a time
  566. if self.fp is None or self._method == "HEAD":
  567. return b""
  568. if self.chunked:
  569. return self._peek_chunked(n)
  570. return self.fp.peek(n)
  571. def readline(self, limit=-1):
  572. if self.fp is None or self._method == "HEAD":
  573. return b""
  574. if self.chunked:
  575. # Fallback to IOBase readline which uses peek() and read()
  576. return super().readline(limit)
  577. if self.length is not None and (limit < 0 or limit > self.length):
  578. limit = self.length
  579. result = self.fp.readline(limit)
  580. if not result and limit:
  581. self._close_conn()
  582. elif self.length is not None:
  583. self.length -= len(result)
  584. return result
  585. def _read1_chunked(self, n):
  586. # Strictly speaking, _get_chunk_left() may cause more than one read,
  587. # but that is ok, since that is to satisfy the chunked protocol.
  588. chunk_left = self._get_chunk_left()
  589. if chunk_left is None or n == 0:
  590. return b''
  591. if not (0 <= n <= chunk_left):
  592. n = chunk_left # if n is negative or larger than chunk_left
  593. read = self.fp.read1(n)
  594. self.chunk_left -= len(read)
  595. if not read:
  596. raise IncompleteRead(b"")
  597. return read
  598. def _peek_chunked(self, n):
  599. # Strictly speaking, _get_chunk_left() may cause more than one read,
  600. # but that is ok, since that is to satisfy the chunked protocol.
  601. try:
  602. chunk_left = self._get_chunk_left()
  603. except IncompleteRead:
  604. return b'' # peek doesn't worry about protocol
  605. if chunk_left is None:
  606. return b'' # eof
  607. # peek is allowed to return more than requested. Just request the
  608. # entire chunk, and truncate what we get.
  609. return self.fp.peek(chunk_left)[:chunk_left]
  610. def fileno(self):
  611. return self.fp.fileno()
  612. def getheader(self, name, default=None):
  613. if self.headers is None:
  614. raise ResponseNotReady()
  615. headers = self.headers.get_all(name) or default
  616. if isinstance(headers, str) or not hasattr(headers, '__iter__'):
  617. return headers
  618. else:
  619. return ', '.join(headers)
  620. def getheaders(self):
  621. """Return list of (header, value) tuples."""
  622. if self.headers is None:
  623. raise ResponseNotReady()
  624. return list(self.headers.items())
  625. # We override IOBase.__iter__ so that it doesn't check for closed-ness
  626. def __iter__(self):
  627. return self
  628. # For compatibility with old-style urllib responses.
  629. def info(self):
  630. return self.headers
  631. def geturl(self):
  632. return self.url
  633. def getcode(self):
  634. return self.status
  635. class HTTPConnection:
  636. _http_vsn = 11
  637. _http_vsn_str = 'HTTP/1.1'
  638. response_class = HTTPResponse
  639. default_port = HTTP_PORT
  640. auto_open = 1
  641. debuglevel = 0
  642. def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  643. source_address=None):
  644. self.timeout = timeout
  645. self.source_address = source_address
  646. self.sock = None
  647. self._buffer = []
  648. self.__response = None
  649. self.__state = _CS_IDLE
  650. self._method = None
  651. self._tunnel_host = None
  652. self._tunnel_port = None
  653. self._tunnel_headers = {}
  654. (self.host, self.port) = self._get_hostport(host, port)
  655. # This is stored as an instance variable to allow unit
  656. # tests to replace it with a suitable mockup
  657. self._create_connection = socket.create_connection
  658. def set_tunnel(self, host, port=None, headers=None):
  659. """Set up host and port for HTTP CONNECT tunnelling.
  660. In a connection that uses HTTP CONNECT tunneling, the host passed to the
  661. constructor is used as a proxy server that relays all communication to
  662. the endpoint passed to `set_tunnel`. This done by sending an HTTP
  663. CONNECT request to the proxy server when the connection is established.
  664. This method must be called before the HTML connection has been
  665. established.
  666. The headers argument should be a mapping of extra HTTP headers to send
  667. with the CONNECT request.
  668. """
  669. if self.sock:
  670. raise RuntimeError("Can't set up tunnel for established connection")
  671. self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
  672. if headers:
  673. self._tunnel_headers = headers
  674. else:
  675. self._tunnel_headers.clear()
  676. def _get_hostport(self, host, port):
  677. if port is None:
  678. i = host.rfind(':')
  679. j = host.rfind(']') # ipv6 addresses have [...]
  680. if i > j:
  681. try:
  682. port = int(host[i+1:])
  683. except ValueError:
  684. if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
  685. port = self.default_port
  686. else:
  687. raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
  688. host = host[:i]
  689. else:
  690. port = self.default_port
  691. if host and host[0] == '[' and host[-1] == ']':
  692. host = host[1:-1]
  693. return (host, port)
  694. def set_debuglevel(self, level):
  695. self.debuglevel = level
  696. def _tunnel(self):
  697. connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
  698. self._tunnel_port)
  699. connect_bytes = connect_str.encode("ascii")
  700. self.send(connect_bytes)
  701. for header, value in self._tunnel_headers.items():
  702. header_str = "%s: %s\r\n" % (header, value)
  703. header_bytes = header_str.encode("latin-1")
  704. self.send(header_bytes)
  705. self.send(b'\r\n')
  706. response = self.response_class(self.sock, method=self._method)
  707. (version, code, message) = response._read_status()
  708. if code != http.HTTPStatus.OK:
  709. self.close()
  710. raise OSError("Tunnel connection failed: %d %s" % (code,
  711. message.strip()))
  712. while True:
  713. line = response.fp.readline(_MAXLINE + 1)
  714. if len(line) > _MAXLINE:
  715. raise LineTooLong("header line")
  716. if not line:
  717. # for sites which EOF without sending a trailer
  718. break
  719. if line in (b'\r\n', b'\n', b''):
  720. break
  721. if self.debuglevel > 0:
  722. print('header:', line.decode())
  723. def connect(self):
  724. """Connect to the host and port specified in __init__."""
  725. self.sock = self._create_connection(
  726. (self.host,self.port), self.timeout, self.source_address)
  727. self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  728. if self._tunnel_host:
  729. self._tunnel()
  730. def close(self):
  731. """Close the connection to the HTTP server."""
  732. self.__state = _CS_IDLE
  733. try:
  734. sock = self.sock
  735. if sock:
  736. self.sock = None
  737. sock.close() # close it manually... there may be other refs
  738. finally:
  739. response = self.__response
  740. if response:
  741. self.__response = None
  742. response.close()
  743. def send(self, data):
  744. """Send `data' to the server.
  745. ``data`` can be a string object, a bytes object, an array object, a
  746. file-like object that supports a .read() method, or an iterable object.
  747. """
  748. if self.sock is None:
  749. if self.auto_open:
  750. self.connect()
  751. else:
  752. raise NotConnected()
  753. if self.debuglevel > 0:
  754. print("send:", repr(data))
  755. blocksize = 8192
  756. if hasattr(data, "read") :
  757. if self.debuglevel > 0:
  758. print("sendIng a read()able")
  759. encode = False
  760. try:
  761. mode = data.mode
  762. except AttributeError:
  763. # io.BytesIO and other file-like objects don't have a `mode`
  764. # attribute.
  765. pass
  766. else:
  767. if "b" not in mode:
  768. encode = True
  769. if self.debuglevel > 0:
  770. print("encoding file using iso-8859-1")
  771. while 1:
  772. datablock = data.read(blocksize)
  773. if not datablock:
  774. break
  775. if encode:
  776. datablock = datablock.encode("iso-8859-1")
  777. self.sock.sendall(datablock)
  778. return
  779. try:
  780. self.sock.sendall(data)
  781. except TypeError:
  782. if isinstance(data, collections.Iterable):
  783. for d in data:
  784. self.sock.sendall(d)
  785. else:
  786. raise TypeError("data should be a bytes-like object "
  787. "or an iterable, got %r" % type(data))
  788. def _output(self, s):
  789. """Add a line of output to the current request buffer.
  790. Assumes that the line does *not* end with \\r\\n.
  791. """
  792. self._buffer.append(s)
  793. def _send_output(self, message_body=None):
  794. """Send the currently buffered request and clear the buffer.
  795. Appends an extra \\r\\n to the buffer.
  796. A message_body may be specified, to be appended to the request.
  797. """
  798. self._buffer.extend((b"", b""))
  799. msg = b"\r\n".join(self._buffer)
  800. del self._buffer[:]
  801. self.send(msg)
  802. if message_body is not None:
  803. self.send(message_body)
  804. def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
  805. """Send a request to the server.
  806. `method' specifies an HTTP request method, e.g. 'GET'.
  807. `url' specifies the object being requested, e.g. '/index.html'.
  808. `skip_host' if True does not add automatically a 'Host:' header
  809. `skip_accept_encoding' if True does not add automatically an
  810. 'Accept-Encoding:' header
  811. """
  812. # if a prior response has been completed, then forget about it.
  813. if self.__response and self.__response.isclosed():
  814. self.__response = None
  815. # in certain cases, we cannot issue another request on this connection.
  816. # this occurs when:
  817. # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
  818. # 2) a response to a previous request has signalled that it is going
  819. # to close the connection upon completion.
  820. # 3) the headers for the previous response have not been read, thus
  821. # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
  822. #
  823. # if there is no prior response, then we can request at will.
  824. #
  825. # if point (2) is true, then we will have passed the socket to the
  826. # response (effectively meaning, "there is no prior response"), and
  827. # will open a new one when a new request is made.
  828. #
  829. # Note: if a prior response exists, then we *can* start a new request.
  830. # We are not allowed to begin fetching the response to this new
  831. # request, however, until that prior response is complete.
  832. #
  833. if self.__state == _CS_IDLE:
  834. self.__state = _CS_REQ_STARTED
  835. else:
  836. raise CannotSendRequest(self.__state)
  837. # Save the method we use, we need it later in the response phase
  838. self._method = method
  839. if not url:
  840. url = '/'
  841. request = '%s %s %s' % (method, url, self._http_vsn_str)
  842. # Non-ASCII characters should have been eliminated earlier
  843. self._output(request.encode('ascii'))
  844. if self._http_vsn == 11:
  845. # Issue some standard headers for better HTTP/1.1 compliance
  846. if not skip_host:
  847. # this header is issued *only* for HTTP/1.1
  848. # connections. more specifically, this means it is
  849. # only issued when the client uses the new
  850. # HTTPConnection() class. backwards-compat clients
  851. # will be using HTTP/1.0 and those clients may be
  852. # issuing this header themselves. we should NOT issue
  853. # it twice; some web servers (such as Apache) barf
  854. # when they see two Host: headers
  855. # If we need a non-standard port,include it in the
  856. # header. If the request is going through a proxy,
  857. # but the host of the actual URL, not the host of the
  858. # proxy.
  859. netloc = ''
  860. if url.startswith('http'):
  861. nil, netloc, nil, nil, nil = urlsplit(url)
  862. if netloc:
  863. try:
  864. netloc_enc = netloc.encode("ascii")
  865. except UnicodeEncodeError:
  866. netloc_enc = netloc.encode("idna")
  867. self.putheader('Host', netloc_enc)
  868. else:
  869. if self._tunnel_host:
  870. host = self._tunnel_host
  871. port = self._tunnel_port
  872. else:
  873. host = self.host
  874. port = self.port
  875. try:
  876. host_enc = host.encode("ascii")
  877. except UnicodeEncodeError:
  878. host_enc = host.encode("idna")
  879. # As per RFC 273, IPv6 address should be wrapped with []
  880. # when used as Host header
  881. if host.find(':') >= 0:
  882. host_enc = b'[' + host_enc + b']'
  883. if port == self.default_port:
  884. self.putheader('Host', host_enc)
  885. else:
  886. host_enc = host_enc.decode("ascii")
  887. self.putheader('Host', "%s:%s" % (host_enc, port))
  888. # note: we are assuming that clients will not attempt to set these
  889. # headers since *this* library must deal with the
  890. # consequences. this also means that when the supporting
  891. # libraries are updated to recognize other forms, then this
  892. # code should be changed (removed or updated).
  893. # we only want a Content-Encoding of "identity" since we don't
  894. # support encodings such as x-gzip or x-deflate.
  895. if not skip_accept_encoding:
  896. self.putheader('Accept-Encoding', 'identity')
  897. # we can accept "chunked" Transfer-Encodings, but no others
  898. # NOTE: no TE header implies *only* "chunked"
  899. #self.putheader('TE', 'chunked')
  900. # if TE is supplied in the header, then it must appear in a
  901. # Connection header.
  902. #self.putheader('Connection', 'TE')
  903. else:
  904. # For HTTP/1.0, the server will assume "not chunked"
  905. pass
  906. def putheader(self, header, *values):
  907. """Send a request header line to the server.
  908. For example: h.putheader('Accept', 'text/html')
  909. """
  910. if self.__state != _CS_REQ_STARTED:
  911. raise CannotSendHeader()
  912. if hasattr(header, 'encode'):
  913. header = header.encode('ascii')
  914. if not _is_legal_header_name(header):
  915. raise ValueError('Invalid header name %r' % (header,))
  916. values = list(values)
  917. for i, one_value in enumerate(values):
  918. if hasattr(one_value, 'encode'):
  919. values[i] = one_value.encode('latin-1')
  920. elif isinstance(one_value, int):
  921. values[i] = str(one_value).encode('ascii')
  922. if _is_illegal_header_value(values[i]):
  923. raise ValueError('Invalid header value %r' % (values[i],))
  924. value = b'\r\n\t'.join(values)
  925. header = header + b': ' + value
  926. self._output(header)
  927. def endheaders(self, message_body=None):
  928. """Indicate that the last header line has been sent to the server.
  929. This method sends the request to the server. The optional message_body
  930. argument can be used to pass a message body associated with the
  931. request. The message body will be sent in the same packet as the
  932. message headers if it is a string, otherwise it is sent as a separate
  933. packet.
  934. """
  935. if self.__state == _CS_REQ_STARTED:
  936. self.__state = _CS_REQ_SENT
  937. else:
  938. raise CannotSendHeader()
  939. self._send_output(message_body)
  940. def request(self, method, url, body=None, headers={}):
  941. """Send a complete request to the server."""
  942. self._send_request(method, url, body, headers)
  943. def _set_content_length(self, body, method):
  944. # Set the content-length based on the body. If the body is "empty", we
  945. # set Content-Length: 0 for methods that expect a body (RFC 7230,
  946. # Section 3.3.2). If the body is set for other methods, we set the
  947. # header provided we can figure out what the length is.
  948. thelen = None
  949. method_expects_body = method.upper() in _METHODS_EXPECTING_BODY
  950. if body is None and method_expects_body:
  951. thelen = '0'
  952. elif body is not None:
  953. try:
  954. thelen = str(len(body))
  955. except TypeError:
  956. # If this is a file-like object, try to
  957. # fstat its file descriptor
  958. try:
  959. thelen = str(os.fstat(body.fileno()).st_size)
  960. except (AttributeError, OSError):
  961. # Don't send a length if this failed
  962. if self.debuglevel > 0: print("Cannot stat!!")
  963. if thelen is not None:
  964. self.putheader('Content-Length', thelen)
  965. def _send_request(self, method, url, body, headers):
  966. # Honor explicitly requested Host: and Accept-Encoding: headers.
  967. header_names = dict.fromkeys([k.lower() for k in headers])
  968. skips = {}
  969. if 'host' in header_names:
  970. skips['skip_host'] = 1
  971. if 'accept-encoding' in header_names:
  972. skips['skip_accept_encoding'] = 1
  973. self.putrequest(method, url, **skips)
  974. if 'content-length' not in header_names:
  975. self._set_content_length(body, method)
  976. for hdr, value in headers.items():
  977. self.putheader(hdr, value)
  978. if isinstance(body, str):
  979. # RFC 2616 Section 3.7.1 says that text default has a
  980. # default charset of iso-8859-1.
  981. body = _encode(body, 'body')
  982. self.endheaders(body)
  983. def getresponse(self):
  984. """Get the response from the server.
  985. If the HTTPConnection is in the correct state, returns an
  986. instance of HTTPResponse or of whatever object is returned by
  987. the response_class variable.
  988. If a request has not been sent or if a previous response has
  989. not be handled, ResponseNotReady is raised. If the HTTP
  990. response indicates that the connection should be closed, then
  991. it will be closed before the response is returned. When the
  992. connection is closed, the underlying socket is closed.
  993. """
  994. # if a prior response has been completed, then forget about it.
  995. if self.__response and self.__response.isclosed():
  996. self.__response = None
  997. # if a prior response exists, then it must be completed (otherwise, we
  998. # cannot read this response's header to determine the connection-close
  999. # behavior)
  1000. #
  1001. # note: if a prior response existed, but was connection-close, then the
  1002. # socket and response were made independent of this HTTPConnection
  1003. # object since a new request requires that we open a whole new
  1004. # connection
  1005. #
  1006. # this means the prior response had one of two states:
  1007. # 1) will_close: this connection was reset and the prior socket and
  1008. # response operate independently
  1009. # 2) persistent: the response was retained and we await its
  1010. # isclosed() status to become true.
  1011. #
  1012. if self.__state != _CS_REQ_SENT or self.__response:
  1013. raise ResponseNotReady(self.__state)
  1014. if self.debuglevel > 0:
  1015. response = self.response_class(self.sock, self.debuglevel,
  1016. method=self._method)
  1017. else:
  1018. response = self.response_class(self.sock, method=self._method)
  1019. try:
  1020. try:
  1021. response.begin()
  1022. except ConnectionError:
  1023. self.close()
  1024. raise
  1025. assert response.will_close != _UNKNOWN
  1026. self.__state = _CS_IDLE
  1027. if response.will_close:
  1028. # this effectively passes the connection to the response
  1029. self.close()
  1030. else:
  1031. # remember this, so we can tell when it is complete
  1032. self.__response = response
  1033. return response
  1034. except:
  1035. response.close()
  1036. raise
  1037. try:
  1038. import ssl
  1039. except ImportError:
  1040. pass
  1041. else:
  1042. class HTTPSConnection(HTTPConnection):
  1043. "This class allows communication via SSL."
  1044. default_port = HTTPS_PORT
  1045. # XXX Should key_file and cert_file be deprecated in favour of context?
  1046. def __init__(self, host, port=None, key_file=None, cert_file=None,
  1047. timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  1048. source_address=None, *, context=None,
  1049. check_hostname=None):
  1050. super(HTTPSConnection, self).__init__(host, port, timeout,
  1051. source_address)
  1052. self.key_file = key_file
  1053. self.cert_file = cert_file
  1054. if context is None:
  1055. context = ssl._create_default_https_context()
  1056. will_verify = context.verify_mode != ssl.CERT_NONE
  1057. if check_hostname is None:
  1058. check_hostname = context.check_hostname
  1059. if check_hostname and not will_verify:
  1060. raise ValueError("check_hostname needs a SSL context with "
  1061. "either CERT_OPTIONAL or CERT_REQUIRED")
  1062. if key_file or cert_file:
  1063. context.load_cert_chain(cert_file, key_file)
  1064. self._context = context
  1065. self._check_hostname = check_hostname
  1066. def connect(self):
  1067. "Connect to a host on a given (SSL) port."
  1068. super().connect()
  1069. if self._tunnel_host:
  1070. server_hostname = self._tunnel_host
  1071. else:
  1072. server_hostname = self.host
  1073. self.sock = self._context.wrap_socket(self.sock,
  1074. server_hostname=server_hostname)
  1075. if not self._context.check_hostname and self._check_hostname:
  1076. try:
  1077. ssl.match_hostname(self.sock.getpeercert(), server_hostname)
  1078. except Exception:
  1079. self.sock.shutdown(socket.SHUT_RDWR)
  1080. self.sock.close()
  1081. raise
  1082. __all__.append("HTTPSConnection")
  1083. class HTTPException(Exception):
  1084. # Subclasses that define an __init__ must call Exception.__init__
  1085. # or define self.args. Otherwise, str() will fail.
  1086. pass
  1087. class NotConnected(HTTPException):
  1088. pass
  1089. class InvalidURL(HTTPException):
  1090. pass
  1091. class UnknownProtocol(HTTPException):
  1092. def __init__(self, version):
  1093. self.args = version,
  1094. self.version = version
  1095. class UnknownTransferEncoding(HTTPException):
  1096. pass
  1097. class UnimplementedFileMode(HTTPException):
  1098. pass
  1099. class IncompleteRead(HTTPException):
  1100. def __init__(self, partial, expected=None):
  1101. self.args = partial,
  1102. self.partial = partial
  1103. self.expected = expected
  1104. def __repr__(self):
  1105. if self.expected is not None:
  1106. e = ', %i more expected' % self.expected
  1107. else:
  1108. e = ''
  1109. return '%s(%i bytes read%s)' % (self.__class__.__name__,
  1110. len(self.partial), e)
  1111. def __str__(self):
  1112. return repr(self)
  1113. class ImproperConnectionState(HTTPException):
  1114. pass
  1115. class CannotSendRequest(ImproperConnectionState):
  1116. pass
  1117. class CannotSendHeader(ImproperConnectionState):
  1118. pass
  1119. class ResponseNotReady(ImproperConnectionState):
  1120. pass
  1121. class BadStatusLine(HTTPException):
  1122. def __init__(self, line):
  1123. if not line:
  1124. line = repr(line)
  1125. self.args = line,
  1126. self.line = line
  1127. class LineTooLong(HTTPException):
  1128. def __init__(self, line_type):
  1129. HTTPException.__init__(self, "got more than %d bytes when reading %s"
  1130. % (_MAXLINE, line_type))
  1131. class RemoteDisconnected(ConnectionResetError, BadStatusLine):
  1132. def __init__(self, *pos, **kw):
  1133. BadStatusLine.__init__(self, "")
  1134. ConnectionResetError.__init__(self, *pos, **kw)
  1135. # for backwards compatibility
  1136. error = HTTPException