Including the venv

This commit is contained in:
2020-07-18 20:14:39 +02:00
parent 822398bc37
commit 7dbbde5028
849 changed files with 146952 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
from __future__ import absolute_import
from . import ssl_match_hostname
__all__ = ('ssl_match_hostname', )

View File

@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
"""
backports.makefile
~~~~~~~~~~~~~~~~~~
Backports the Python 3 ``socket.makefile`` method for use with anything that
wants to create a "fake" socket object.
"""
import io
from socket import SocketIO
def backport_makefile(self, mode="r", buffering=None, encoding=None,
errors=None, newline=None):
"""
Backport of ``socket.makefile`` from Python 3.5.
"""
if not set(mode) <= {"r", "w", "b"}:
raise ValueError(
"invalid mode %r (only r, w, b allowed)" % (mode,)
)
writing = "w" in mode
reading = "r" in mode or not writing
assert reading or writing
binary = "b" in mode
rawmode = ""
if reading:
rawmode += "r"
if writing:
rawmode += "w"
raw = SocketIO(self, rawmode)
self._makefile_refs += 1
if buffering is None:
buffering = -1
if buffering < 0:
buffering = io.DEFAULT_BUFFER_SIZE
if buffering == 0:
if not binary:
raise ValueError("unbuffered streams must be binary")
return raw
if reading and writing:
buffer = io.BufferedRWPair(raw, raw, buffering)
elif reading:
buffer = io.BufferedReader(raw, buffering)
else:
assert writing
buffer = io.BufferedWriter(raw, buffering)
if binary:
return buffer
text = io.TextIOWrapper(buffer, encoding, errors, newline)
text.mode = mode
return text

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
An implementation of semantics and validations described in RFC 3986.
See http://rfc3986.readthedocs.io/ for detailed documentation.
:copyright: (c) 2014 Rackspace
:license: Apache v2.0, see LICENSE for details
"""
from .api import iri_reference
from .api import IRIReference
from .api import is_valid_uri
from .api import normalize_uri
from .api import uri_reference
from .api import URIReference
from .api import urlparse
from .parseresult import ParseResult
__title__ = 'rfc3986'
__author__ = 'Ian Stapleton Cordasco'
__author_email__ = 'graffatcolmingov@gmail.com'
__license__ = 'Apache v2.0'
__copyright__ = 'Copyright 2014 Rackspace'
__version__ = '1.3.2'
__all__ = (
'ParseResult',
'URIReference',
'IRIReference',
'is_valid_uri',
'normalize_uri',
'uri_reference',
'iri_reference',
'urlparse',
'__title__',
'__author__',
'__author_email__',
'__license__',
'__copyright__',
'__version__',
)

View File

@@ -0,0 +1,353 @@
"""Module containing the implementation of the URIMixin class."""
import warnings
from . import exceptions as exc
from . import misc
from . import normalizers
from . import validators
class URIMixin(object):
"""Mixin with all shared methods for URIs and IRIs."""
__hash__ = tuple.__hash__
def authority_info(self):
"""Return a dictionary with the ``userinfo``, ``host``, and ``port``.
If the authority is not valid, it will raise a
:class:`~rfc3986.exceptions.InvalidAuthority` Exception.
:returns:
``{'userinfo': 'username:password', 'host': 'www.example.com',
'port': '80'}``
:rtype: dict
:raises rfc3986.exceptions.InvalidAuthority:
If the authority is not ``None`` and can not be parsed.
"""
if not self.authority:
return {'userinfo': None, 'host': None, 'port': None}
match = self._match_subauthority()
if match is None:
# In this case, we have an authority that was parsed from the URI
# Reference, but it cannot be further parsed by our
# misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid
# authority.
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
# We had a match, now let's ensure that it is actually a valid host
# address if it is IPv4
matches = match.groupdict()
host = matches.get('host')
if (host and misc.IPv4_MATCHER.match(host) and not
validators.valid_ipv4_host_address(host)):
# If we have a host, it appears to be IPv4 and it does not have
# valid bytes, it is an InvalidAuthority.
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
return matches
def _match_subauthority(self):
return misc.SUBAUTHORITY_MATCHER.match(self.authority)
@property
def host(self):
"""If present, a string representing the host."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority['host']
@property
def port(self):
"""If present, the port extracted from the authority."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority['port']
@property
def userinfo(self):
"""If present, the userinfo extracted from the authority."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority['userinfo']
def is_absolute(self):
"""Determine if this URI Reference is an absolute URI.
See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation.
:returns: ``True`` if it is an absolute URI, ``False`` otherwise.
:rtype: bool
"""
return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit()))
def is_valid(self, **kwargs):
"""Determine if the URI is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require
the presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require
the presence of the fragment component.
:returns: ``True`` if the URI is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
validators = [
(self.scheme_is_valid, kwargs.get('require_scheme', False)),
(self.authority_is_valid, kwargs.get('require_authority', False)),
(self.path_is_valid, kwargs.get('require_path', False)),
(self.query_is_valid, kwargs.get('require_query', False)),
(self.fragment_is_valid, kwargs.get('require_fragment', False)),
]
return all(v(r) for v, r in validators)
def authority_is_valid(self, require=False):
"""Determine if the authority component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param bool require:
Set to ``True`` to require the presence of this component.
:returns:
``True`` if the authority is valid. ``False`` otherwise.
:rtype:
bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
try:
self.authority_info()
except exc.InvalidAuthority:
return False
return validators.authority_is_valid(
self.authority,
host=self.host,
require=require,
)
def scheme_is_valid(self, require=False):
"""Determine if the scheme component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the scheme is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.scheme_is_valid(self.scheme, require)
def path_is_valid(self, require=False):
"""Determine if the path component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the path is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.path_is_valid(self.path, require)
def query_is_valid(self, require=False):
"""Determine if the query component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the query is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.query_is_valid(self.query, require)
def fragment_is_valid(self, require=False):
"""Determine if the fragment component is valid.
.. deprecated:: 1.1.0
Use the Validator object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the fragment is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.fragment_is_valid(self.fragment, require)
def normalized_equality(self, other_ref):
"""Compare this URIReference to another URIReference.
:param URIReference other_ref: (required), The reference with which
we're comparing.
:returns: ``True`` if the references are equal, ``False`` otherwise.
:rtype: bool
"""
return tuple(self.normalize()) == tuple(other_ref.normalize())
def resolve_with(self, base_uri, strict=False):
"""Use an absolute URI Reference to resolve this relative reference.
Assuming this is a relative reference that you would like to resolve,
use the provided base URI to resolve it.
See http://tools.ietf.org/html/rfc3986#section-5 for more information.
:param base_uri: Either a string or URIReference. It must be an
absolute URI or it will raise an exception.
:returns: A new URIReference which is the result of resolving this
reference using ``base_uri``.
:rtype: :class:`URIReference`
:raises rfc3986.exceptions.ResolutionError:
If the ``base_uri`` is not an absolute URI.
"""
if not isinstance(base_uri, URIMixin):
base_uri = type(self).from_string(base_uri)
if not base_uri.is_absolute():
raise exc.ResolutionError(base_uri)
# This is optional per
# http://tools.ietf.org/html/rfc3986#section-5.2.1
base_uri = base_uri.normalize()
# The reference we're resolving
resolving = self
if not strict and resolving.scheme == base_uri.scheme:
resolving = resolving.copy_with(scheme=None)
# http://tools.ietf.org/html/rfc3986#page-32
if resolving.scheme is not None:
target = resolving.copy_with(
path=normalizers.normalize_path(resolving.path)
)
else:
if resolving.authority is not None:
target = resolving.copy_with(
scheme=base_uri.scheme,
path=normalizers.normalize_path(resolving.path)
)
else:
if resolving.path is None:
if resolving.query is not None:
query = resolving.query
else:
query = base_uri.query
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=base_uri.path,
query=query
)
else:
if resolving.path.startswith('/'):
path = normalizers.normalize_path(resolving.path)
else:
path = normalizers.normalize_path(
misc.merge_paths(base_uri, resolving.path)
)
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=path,
query=resolving.query
)
return target
def unsplit(self):
"""Create a URI string from the components.
:returns: The URI Reference reconstituted as a string.
:rtype: str
"""
# See http://tools.ietf.org/html/rfc3986#section-5.3
result_list = []
if self.scheme:
result_list.extend([self.scheme, ':'])
if self.authority:
result_list.extend(['//', self.authority])
if self.path:
result_list.append(self.path)
if self.query is not None:
result_list.extend(['?', self.query])
if self.fragment is not None:
result_list.extend(['#', self.fragment])
return ''.join(result_list)
def copy_with(self, scheme=misc.UseExisting, authority=misc.UseExisting,
path=misc.UseExisting, query=misc.UseExisting,
fragment=misc.UseExisting):
"""Create a copy of this reference with the new components.
:param str scheme:
(optional) The scheme to use for the new reference.
:param str authority:
(optional) The authority to use for the new reference.
:param str path:
(optional) The path to use for the new reference.
:param str query:
(optional) The query to use for the new reference.
:param str fragment:
(optional) The fragment to use for the new reference.
:returns:
New URIReference with provided components.
:rtype:
URIReference
"""
attributes = {
'scheme': scheme,
'authority': authority,
'path': path,
'query': query,
'fragment': fragment,
}
for key, value in list(attributes.items()):
if value is misc.UseExisting:
del attributes[key]
uri = self._replace(**attributes)
uri.encoding = self.encoding
return uri

View File

@@ -0,0 +1,267 @@
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for the regular expressions crafted from ABNF."""
import sys
# https://tools.ietf.org/html/rfc3986#page-13
GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@"
GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS)
# https://tools.ietf.org/html/rfc3986#page-13
SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;="
SUB_DELIMITERS_SET = set(SUB_DELIMITERS)
# Escape the '*' for use in regular expressions
SUB_DELIMITERS_RE = r"!$&'()\*+,;="
RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET)
ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
DIGIT = '0123456789'
# https://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r'._!-'
UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS)
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET)
# We need to escape the '-' in this case:
UNRESERVED_RE = r'A-Za-z0-9._~\-'
# Percent encoded character values
PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}'
PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED
# NOTE(sigmavirus24): We're going to use more strict regular expressions
# than appear in Appendix B for scheme. This will prevent over-eager
# consuming of items that aren't schemes.
SCHEME_RE = '[a-zA-Z][a-zA-Z0-9+.-]*'
_AUTHORITY_RE = '[^/?#]*'
_PATH_RE = '[^?#]*'
_QUERY_RE = '[^#]*'
_FRAGMENT_RE = '.*'
# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B
COMPONENT_PATTERN_DICT = {
'scheme': SCHEME_RE,
'authority': _AUTHORITY_RE,
'path': _PATH_RE,
'query': _QUERY_RE,
'fragment': _FRAGMENT_RE,
}
# See http://tools.ietf.org/html/rfc3986#appendix-B
# In this case, we name each of the important matches so we can use
# SRE_Match#groupdict to parse the values out if we so choose. This is also
# modified to ignore other matches that are not important to the parsing of
# the reference so we can also simply use SRE_Match#groups.
URL_PARSING_RE = (
r'(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
r'(?P<path>{path})(?:\?(?P<query>{query}))?'
r'(?:#(?P<fragment>{fragment}))?'
).format(**COMPONENT_PATTERN_DICT)
# #########################
# Authority Matcher Section
# #########################
# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2
# The pattern for a regular name, e.g., www.google.com, api.github.com
REGULAR_NAME_RE = REG_NAME = '((?:{0}|[{1}])*)'.format(
'%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + UNRESERVED_RE
)
# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1,
IPv4_RE = r'([0-9]{1,3}\.){3}[0-9]{1,3}'
# Hexadecimal characters used in each piece of an IPv6 address
HEXDIG_RE = '[0-9A-Fa-f]{1,4}'
# Least-significant 32 bits of an IPv6 address
LS32_RE = '({hex}:{hex}|{ipv4})'.format(hex=HEXDIG_RE, ipv4=IPv4_RE)
# Substitutions into the following patterns for IPv6 patterns defined
# http://tools.ietf.org/html/rfc3986#page-20
_subs = {'hex': HEXDIG_RE, 'ls32': LS32_RE}
# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details
# about ABNF (Augmented Backus-Naur Form) use in the comments
variations = [
# 6( h16 ":" ) ls32
'(%(hex)s:){6}%(ls32)s' % _subs,
# "::" 5( h16 ":" ) ls32
'::(%(hex)s:){5}%(ls32)s' % _subs,
# [ h16 ] "::" 4( h16 ":" ) ls32
'(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % _subs,
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
'((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % _subs,
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
'((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % _subs,
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
'((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % _subs,
# [ *4( h16 ":" ) h16 ] "::" ls32
'((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % _subs,
# [ *5( h16 ":" ) h16 ] "::" h16
'((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % _subs,
# [ *6( h16 ":" ) h16 ] "::"
'((%(hex)s:){0,6}%(hex)s)?::' % _subs,
]
IPv6_RE = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7})|({8}))'.format(
*variations
)
IPv_FUTURE_RE = r'v[0-9A-Fa-f]+\.[%s]+' % (
UNRESERVED_RE + SUB_DELIMITERS_RE + ':'
)
# RFC 6874 Zone ID ABNF
ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+'
IPv6_ADDRZ_RFC4007_RE = IPv6_RE + '(?:(?:%25|%)' + ZONE_ID + ')?'
IPv6_ADDRZ_RE = IPv6_RE + '(?:%25' + ZONE_ID + ')?'
IP_LITERAL_RE = r'\[({0}|{1})\]'.format(
IPv6_ADDRZ_RFC4007_RE,
IPv_FUTURE_RE,
)
# Pattern for matching the host piece of the authority
HOST_RE = HOST_PATTERN = '({0}|{1}|{2})'.format(
REG_NAME,
IPv4_RE,
IP_LITERAL_RE,
)
USERINFO_RE = '^([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':]|%s)+' % (
PCT_ENCODED
)
PORT_RE = '[0-9]{1,5}'
# ####################
# Path Matcher Section
# ####################
# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
# about the path patterns defined below.
segments = {
'segment': PCHAR + '*',
# Non-zero length segment
'segment-nz': PCHAR + '+',
# Non-zero length segment without ":"
'segment-nz-nc': PCHAR.replace(':', '') + '+'
}
# Path types taken from Section 3.3 (linked above)
PATH_EMPTY = '^$'
PATH_ROOTLESS = '%(segment-nz)s(/%(segment)s)*' % segments
PATH_NOSCHEME = '%(segment-nz-nc)s(/%(segment)s)*' % segments
PATH_ABSOLUTE = '/(%s)?' % PATH_ROOTLESS
PATH_ABEMPTY = '(/%(segment)s)*' % segments
PATH_RE = '^(%s|%s|%s|%s|%s)$' % (
PATH_ABEMPTY, PATH_ABSOLUTE, PATH_NOSCHEME, PATH_ROOTLESS, PATH_EMPTY
)
FRAGMENT_RE = QUERY_RE = (
'^([/?:@' + UNRESERVED_RE + SUB_DELIMITERS_RE + ']|%s)*$' % PCT_ENCODED
)
# ##########################
# Relative reference matcher
# ##########################
# See http://tools.ietf.org/html/rfc3986#section-4.2 for details
RELATIVE_PART_RE = '(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_NOSCHEME,
PATH_EMPTY,
)
# See http://tools.ietf.org/html/rfc3986#section-3 for definition
HIER_PART_RE = '(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_ROOTLESS,
PATH_EMPTY,
)
# ###############
# IRIs / RFC 3987
# ###############
# Only wide-unicode gets the high-ranges of UCSCHAR
if sys.maxunicode > 0xFFFF: # pragma: no cover
IPRIVATE = u'\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD'
UCSCHAR_RE = (
u'\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
u'\U00010000-\U0001FFFD\U00020000-\U0002FFFD'
u'\U00030000-\U0003FFFD\U00040000-\U0004FFFD'
u'\U00050000-\U0005FFFD\U00060000-\U0006FFFD'
u'\U00070000-\U0007FFFD\U00080000-\U0008FFFD'
u'\U00090000-\U0009FFFD\U000A0000-\U000AFFFD'
u'\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD'
u'\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD'
)
else: # pragma: no cover
IPRIVATE = u'\uE000-\uF8FF'
UCSCHAR_RE = (
u'\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
)
IUNRESERVED_RE = u'A-Za-z0-9\\._~\\-' + UCSCHAR_RE
IPCHAR = u'([' + IUNRESERVED_RE + SUB_DELIMITERS_RE + u':@]|%s)' % PCT_ENCODED
isegments = {
'isegment': IPCHAR + u'*',
# Non-zero length segment
'isegment-nz': IPCHAR + u'+',
# Non-zero length segment without ":"
'isegment-nz-nc': IPCHAR.replace(':', '') + u'+'
}
IPATH_ROOTLESS = u'%(isegment-nz)s(/%(isegment)s)*' % isegments
IPATH_NOSCHEME = u'%(isegment-nz-nc)s(/%(isegment)s)*' % isegments
IPATH_ABSOLUTE = u'/(?:%s)?' % IPATH_ROOTLESS
IPATH_ABEMPTY = u'(?:/%(isegment)s)*' % isegments
IPATH_RE = u'^(?:%s|%s|%s|%s|%s)$' % (
IPATH_ABEMPTY, IPATH_ABSOLUTE, IPATH_NOSCHEME, IPATH_ROOTLESS, PATH_EMPTY
)
IREGULAR_NAME_RE = IREG_NAME = u'(?:{0}|[{1}])*'.format(
u'%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + IUNRESERVED_RE
)
IHOST_RE = IHOST_PATTERN = u'({0}|{1}|{2})'.format(
IREG_NAME,
IPv4_RE,
IP_LITERAL_RE,
)
IUSERINFO_RE = u'^(?:[' + IUNRESERVED_RE + SUB_DELIMITERS_RE + u':]|%s)+' % (
PCT_ENCODED
)
IFRAGMENT_RE = (u'^(?:[/?:@' + IUNRESERVED_RE + SUB_DELIMITERS_RE
+ u']|%s)*$' % PCT_ENCODED)
IQUERY_RE = (u'^(?:[/?:@' + IUNRESERVED_RE + SUB_DELIMITERS_RE
+ IPRIVATE + u']|%s)*$' % PCT_ENCODED)
IRELATIVE_PART_RE = u'(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_NOSCHEME,
PATH_EMPTY,
)
IHIER_PART_RE = u'(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_ROOTLESS,
PATH_EMPTY,
)

View File

@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing the simple and functional API for rfc3986.
This module defines functions and provides access to the public attributes
and classes of rfc3986.
"""
from .iri import IRIReference
from .parseresult import ParseResult
from .uri import URIReference
def uri_reference(uri, encoding='utf-8'):
"""Parse a URI string into a URIReference.
This is a convenience function. You could achieve the same end by using
``URIReference.from_string(uri)``.
:param str uri: The URI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed URI
:rtype: :class:`URIReference`
"""
return URIReference.from_string(uri, encoding)
def iri_reference(iri, encoding='utf-8'):
"""Parse a IRI string into an IRIReference.
This is a convenience function. You could achieve the same end by using
``IRIReference.from_string(iri)``.
:param str iri: The IRI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed IRI
:rtype: :class:`IRIReference`
"""
return IRIReference.from_string(iri, encoding)
def is_valid_uri(uri, encoding='utf-8', **kwargs):
"""Determine if the URI given is valid.
This is a convenience function. You could use either
``uri_reference(uri).is_valid()`` or
``URIReference.from_string(uri).is_valid()`` to achieve the same result.
:param str uri: The URI to be validated.
:param str encoding: The encoding of the string provided
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require the
presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require the
presence of the fragment component.
:returns: ``True`` if the URI is valid, ``False`` otherwise.
:rtype: bool
"""
return URIReference.from_string(uri, encoding).is_valid(**kwargs)
def normalize_uri(uri, encoding='utf-8'):
"""Normalize the given URI.
This is a convenience function. You could use either
``uri_reference(uri).normalize().unsplit()`` or
``URIReference.from_string(uri).normalize().unsplit()`` instead.
:param str uri: The URI to be normalized.
:param str encoding: The encoding of the string provided
:returns: The normalized URI.
:rtype: str
"""
normalized_reference = URIReference.from_string(uri, encoding).normalize()
return normalized_reference.unsplit()
def urlparse(uri, encoding='utf-8'):
"""Parse a given URI and return a ParseResult.
This is a partial replacement of the standard library's urlparse function.
:param str uri: The URI to be parsed.
:param str encoding: The encoding of the string provided.
:returns: A parsed URI
:rtype: :class:`~rfc3986.parseresult.ParseResult`
"""
return ParseResult.from_string(uri, encoding, strict=False)

View File

@@ -0,0 +1,298 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2017 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the logic for the URIBuilder object."""
from . import compat
from . import normalizers
from . import uri
class URIBuilder(object):
"""Object to aid in building up a URI Reference from parts.
.. note::
This object should be instantiated by the user, but it's recommended
that it is not provided with arguments. Instead, use the available
method to populate the fields.
"""
def __init__(self, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None):
"""Initialize our URI builder.
:param str scheme:
(optional)
:param str userinfo:
(optional)
:param str host:
(optional)
:param int port:
(optional)
:param str path:
(optional)
:param str query:
(optional)
:param str fragment:
(optional)
"""
self.scheme = scheme
self.userinfo = userinfo
self.host = host
self.port = port
self.path = path
self.query = query
self.fragment = fragment
def __repr__(self):
"""Provide a convenient view of our builder object."""
formatstr = ('URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, '
'host={b.host}, port={b.port}, path={b.path}, '
'query={b.query}, fragment={b.fragment})')
return formatstr.format(b=self)
def add_scheme(self, scheme):
"""Add a scheme to our builder object.
After normalizing, this will generate a new URIBuilder instance with
the specified scheme and all other attributes the same.
.. code-block:: python
>>> URIBuilder().add_scheme('HTTPS')
URIBuilder(scheme='https', userinfo=None, host=None, port=None,
path=None, query=None, fragment=None)
"""
scheme = normalizers.normalize_scheme(scheme)
return URIBuilder(
scheme=scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_credentials(self, username, password):
"""Add credentials as the userinfo portion of the URI.
.. code-block:: python
>>> URIBuilder().add_credentials('root', 's3crete')
URIBuilder(scheme=None, userinfo='root:s3crete', host=None,
port=None, path=None, query=None, fragment=None)
>>> URIBuilder().add_credentials('root', None)
URIBuilder(scheme=None, userinfo='root', host=None,
port=None, path=None, query=None, fragment=None)
"""
if username is None:
raise ValueError('Username cannot be None')
userinfo = normalizers.normalize_username(username)
if password is not None:
userinfo = '{}:{}'.format(
userinfo,
normalizers.normalize_password(password),
)
return URIBuilder(
scheme=self.scheme,
userinfo=userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_host(self, host):
"""Add hostname to the URI.
.. code-block:: python
>>> URIBuilder().add_host('google.com')
URIBuilder(scheme=None, userinfo=None, host='google.com',
port=None, path=None, query=None, fragment=None)
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=normalizers.normalize_host(host),
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_port(self, port):
"""Add port to the URI.
.. code-block:: python
>>> URIBuilder().add_port(80)
URIBuilder(scheme=None, userinfo=None, host=None, port='80',
path=None, query=None, fragment=None)
>>> URIBuilder().add_port(443)
URIBuilder(scheme=None, userinfo=None, host=None, port='443',
path=None, query=None, fragment=None)
"""
port_int = int(port)
if port_int < 0:
raise ValueError(
'ports are not allowed to be negative. You provided {}'.format(
port_int,
)
)
if port_int > 65535:
raise ValueError(
'ports are not allowed to be larger than 65535. '
'You provided {}'.format(
port_int,
)
)
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port='{}'.format(port_int),
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_path(self, path):
"""Add a path to the URI.
.. code-block:: python
>>> URIBuilder().add_path('sigmavirus24/rfc3985')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/sigmavirus24/rfc3986', query=None, fragment=None)
>>> URIBuilder().add_path('/checkout.php')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/checkout.php', query=None, fragment=None)
"""
if not path.startswith('/'):
path = '/{}'.format(path)
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=normalizers.normalize_path(path),
query=self.query,
fragment=self.fragment,
)
def add_query_from(self, query_items):
"""Generate and add a query a dictionary or list of tuples.
.. code-block:: python
>>> URIBuilder().add_query_from({'a': 'b c'})
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c', fragment=None)
>>> URIBuilder().add_query_from([('a', 'b c')])
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c', fragment=None)
"""
query = normalizers.normalize_query(compat.urlencode(query_items))
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=query,
fragment=self.fragment,
)
def add_query(self, query):
"""Add a pre-formated query string to the URI.
.. code-block:: python
>>> URIBuilder().add_query('a=b&c=d')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b&c=d', fragment=None)
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=normalizers.normalize_query(query),
fragment=self.fragment,
)
def add_fragment(self, fragment):
"""Add a fragment to the URI.
.. code-block:: python
>>> URIBuilder().add_fragment('section-2.6.1')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment='section-2.6.1')
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=normalizers.normalize_fragment(fragment),
)
def finalize(self):
"""Create a URIReference from our builder.
.. code-block:: python
>>> URIBuilder().add_scheme('https').add_host('github.com'
... ).add_path('sigmavirus24/rfc3986').finalize().unsplit()
'https://github.com/sigmavirus24/rfc3986'
>>> URIBuilder().add_scheme('https').add_host('github.com'
... ).add_path('sigmavirus24/rfc3986').add_credentials(
... 'sigmavirus24', 'not-re@l').finalize().unsplit()
'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986'
"""
return uri.URIReference(
self.scheme,
normalizers.normalize_authority(
(self.userinfo, self.host, self.port)
),
self.path,
self.query,
self.fragment,
)

View File

@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compatibility module for Python 2 and 3 support."""
import sys
try:
from urllib.parse import quote as urlquote
except ImportError: # Python 2.x
from urllib import quote as urlquote
try:
from urllib.parse import urlencode
except ImportError: # Python 2.x
from urllib import urlencode
__all__ = (
'to_bytes',
'to_str',
'urlquote',
'urlencode',
)
PY3 = (3, 0) <= sys.version_info < (4, 0)
PY2 = (2, 6) <= sys.version_info < (2, 8)
if PY3:
unicode = str # Python 3.x
def to_str(b, encoding='utf-8'):
"""Ensure that b is text in the specified encoding."""
if hasattr(b, 'decode') and not isinstance(b, unicode):
b = b.decode(encoding)
return b
def to_bytes(s, encoding='utf-8'):
"""Ensure that s is converted to bytes from the encoding."""
if hasattr(s, 'encode') and not isinstance(s, bytes):
s = s.encode(encoding)
return s

View File

@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""Exceptions module for rfc3986."""
from . import compat
class RFC3986Exception(Exception):
"""Base class for all rfc3986 exception classes."""
pass
class InvalidAuthority(RFC3986Exception):
"""Exception when the authority string is invalid."""
def __init__(self, authority):
"""Initialize the exception with the invalid authority."""
super(InvalidAuthority, self).__init__(
u"The authority ({0}) is not valid.".format(
compat.to_str(authority)))
class InvalidPort(RFC3986Exception):
"""Exception when the port is invalid."""
def __init__(self, port):
"""Initialize the exception with the invalid port."""
super(InvalidPort, self).__init__(
'The port ("{0}") is not valid.'.format(port))
class ResolutionError(RFC3986Exception):
"""Exception to indicate a failure to resolve a URI."""
def __init__(self, uri):
"""Initialize the error with the failed URI."""
super(ResolutionError, self).__init__(
"{0} is not an absolute URI.".format(uri.unsplit()))
class ValidationError(RFC3986Exception):
"""Exception raised during Validation of a URI."""
pass
class MissingComponentError(ValidationError):
"""Exception raised when a required component is missing."""
def __init__(self, uri, *component_names):
"""Initialize the error with the missing component name."""
verb = 'was'
if len(component_names) > 1:
verb = 'were'
self.uri = uri
self.components = sorted(component_names)
components = ', '.join(self.components)
super(MissingComponentError, self).__init__(
"{} {} required but missing".format(components, verb),
uri,
self.components,
)
class UnpermittedComponentError(ValidationError):
"""Exception raised when a component has an unpermitted value."""
def __init__(self, component_name, component_value, allowed_values):
"""Initialize the error with the unpermitted component."""
super(UnpermittedComponentError, self).__init__(
"{} was required to be one of {!r} but was {!r}".format(
component_name, list(sorted(allowed_values)), component_value,
),
component_name,
component_value,
allowed_values,
)
self.component_name = component_name
self.component_value = component_value
self.allowed_values = allowed_values
class PasswordForbidden(ValidationError):
"""Exception raised when a URL has a password in the userinfo section."""
def __init__(self, uri):
"""Initialize the error with the URI that failed validation."""
unsplit = getattr(uri, 'unsplit', lambda: uri)
super(PasswordForbidden, self).__init__(
'"{}" contained a password when validation forbade it'.format(
unsplit()
)
)
self.uri = uri
class InvalidComponentsError(ValidationError):
"""Exception raised when one or more components are invalid."""
def __init__(self, uri, *component_names):
"""Initialize the error with the invalid component name(s)."""
verb = 'was'
if len(component_names) > 1:
verb = 'were'
self.uri = uri
self.components = sorted(component_names)
components = ', '.join(self.components)
super(InvalidComponentsError, self).__init__(
"{} {} found to be invalid".format(components, verb),
uri,
self.components,
)
class MissingDependencyError(RFC3986Exception):
"""Exception raised when an IRI is encoded without the 'idna' module."""

View File

@@ -0,0 +1,147 @@
"""Module containing the implementation of the IRIReference class."""
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
try:
from pip._vendor import idna
except ImportError: # pragma: no cover
idna = None
class IRIReference(namedtuple('IRIReference', misc.URI_COMPONENTS),
uri.URIMixin):
"""Immutable object representing a parsed IRI Reference.
Can be encoded into an URIReference object via the procedure
specified in RFC 3987 Section 3.1
.. note::
The IRI submodule is a new interface and may possibly change in
the future. Check for changes to the interface when upgrading.
"""
slots = ()
def __new__(cls, scheme, authority, path, query, fragment,
encoding='utf-8'):
"""Create a new IRIReference."""
ref = super(IRIReference, cls).__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment)
ref.encoding = encoding
return ref
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = self.__class__(*other)
elif not isinstance(other, IRIReference):
try:
other_ref = self.__class__.from_string(other)
except TypeError:
raise TypeError(
'Unable to compare {0}() to {1}()'.format(
type(self).__name__, type(other).__name__))
# See http://tools.ietf.org/html/rfc3986#section-6.2
return tuple(self) == tuple(other_ref)
def _match_subauthority(self):
return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
@classmethod
def from_string(cls, iri_string, encoding='utf-8'):
"""Parse a IRI reference from the given unicode IRI string.
:param str iri_string: Unicode IRI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`IRIReference` or subclass thereof
"""
iri_string = compat.to_str(iri_string, encoding)
split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
return cls(
split_iri['scheme'], split_iri['authority'],
normalizers.encode_component(split_iri['path'], encoding),
normalizers.encode_component(split_iri['query'], encoding),
normalizers.encode_component(split_iri['fragment'], encoding),
encoding,
)
def encode(self, idna_encoder=None): # noqa: C901
"""Encode an IRIReference into a URIReference instance.
If the ``idna`` module is installed or the ``rfc3986[idna]``
extra is used then unicode characters in the IRI host
component will be encoded with IDNA2008.
:param idna_encoder:
Function that encodes each part of the host component
If not given will raise an exception if the IRI
contains a host component.
:rtype: uri.URIReference
:returns: A URI reference
"""
authority = self.authority
if authority:
if idna_encoder is None:
if idna is None: # pragma: no cover
raise exceptions.MissingDependencyError(
"Could not import the 'idna' module "
"and the IRI hostname requires encoding"
)
def idna_encoder(name):
if any(ord(c) > 128 for c in name):
try:
return idna.encode(name.lower(),
strict=True,
std3_rules=True)
except idna.IDNAError:
raise exceptions.InvalidAuthority(self.authority)
return name
authority = ""
if self.host:
authority = ".".join([compat.to_str(idna_encoder(part))
for part in self.host.split(".")])
if self.userinfo is not None:
authority = (normalizers.encode_component(
self.userinfo, self.encoding) + '@' + authority)
if self.port is not None:
authority += ":" + str(self.port)
return uri.URIReference(self.scheme,
authority,
path=self.path,
query=self.query,
fragment=self.fragment,
encoding=self.encoding)

View File

@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing compiled regular expressions and constants.
This module contains important constants, patterns, and compiled regular
expressions for parsing and validating URIs and their components.
"""
import re
from . import abnf_regexp
# These are enumerated for the named tuple used as a superclass of
# URIReference
URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment']
important_characters = {
'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS,
'sub_delimiters': abnf_regexp.SUB_DELIMITERS,
# We need to escape the '*' in this case
're_sub_delimiters': abnf_regexp.SUB_DELIMITERS_RE,
'unreserved_chars': abnf_regexp.UNRESERVED_CHARS,
# We need to escape the '-' in this case:
're_unreserved': abnf_regexp.UNRESERVED_RE,
}
# For details about delimiters and reserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.2
GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
# For details about unreserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
SUBAUTHORITY_MATCHER = re.compile((
'^(?:(?P<userinfo>{0})@)?' # userinfo
'(?P<host>{1})' # host
':?(?P<port>{2})?$' # port
).format(abnf_regexp.USERINFO_RE,
abnf_regexp.HOST_PATTERN,
abnf_regexp.PORT_RE))
HOST_MATCHER = re.compile('^' + abnf_regexp.HOST_RE + '$')
IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$')
IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$')
# Used by host validator
IPv6_NO_RFC4007_MATCHER = re.compile(r'^\[%s\]$' % (
abnf_regexp.IPv6_ADDRZ_RE
))
# Matcher used to validate path components
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
# ##################################
# Query and Fragment Matcher Section
# ##################################
QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
FRAGMENT_MATCHER = QUERY_MATCHER
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE))
RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % (
abnf_regexp.RELATIVE_PART_RE,
abnf_regexp.QUERY_RE,
abnf_regexp.FRAGMENT_RE,
))
# See http://tools.ietf.org/html/rfc3986#section-4.3
ABSOLUTE_URI_MATCHER = re.compile(r'^%s:%s(\?%s)?$' % (
abnf_regexp.COMPONENT_PATTERN_DICT['scheme'],
abnf_regexp.HIER_PART_RE,
abnf_regexp.QUERY_RE[1:-1],
))
# ###############
# IRIs / RFC 3987
# ###############
IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
ISUBAUTHORITY_MATCHER = re.compile((
u'^(?:(?P<userinfo>{0})@)?' # iuserinfo
u'(?P<host>{1})' # ihost
u':?(?P<port>{2})?$' # port
).format(abnf_regexp.IUSERINFO_RE,
abnf_regexp.IHOST_RE,
abnf_regexp.PORT_RE), re.UNICODE)
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
def merge_paths(base_uri, relative_path):
"""Merge a base URI's path with a relative URI's path."""
if base_uri.path is None and base_uri.authority is not None:
return '/' + relative_path
else:
path = base_uri.path or ''
index = path.rfind('/')
return path[:index] + '/' + relative_path
UseExisting = object()

View File

@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module with functions to normalize components."""
import re
from . import compat
from . import misc
def normalize_scheme(scheme):
"""Normalize the scheme component."""
return scheme.lower()
def normalize_authority(authority):
"""Normalize an authority tuple to a string."""
userinfo, host, port = authority
result = ''
if userinfo:
result += normalize_percent_characters(userinfo) + '@'
if host:
result += normalize_host(host)
if port:
result += ':' + port
return result
def normalize_username(username):
"""Normalize a username to make it safe to include in userinfo."""
return compat.urlquote(username)
def normalize_password(password):
"""Normalize a password to make safe for userinfo."""
return compat.urlquote(password)
def normalize_host(host):
"""Normalize a host string."""
if misc.IPv6_MATCHER.match(host):
percent = host.find('%')
if percent != -1:
percent_25 = host.find('%25')
# Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'
# from RFC 6874. If the host is '[<IPv6 addr>%25]' then we
# assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'
if percent_25 == -1 or percent < percent_25 or \
(percent == percent_25 and percent_25 == len(host) - 4):
host = host.replace('%', '%25', 1)
# Don't normalize the casing of the Zone ID
return host[:percent].lower() + host[percent:]
return host.lower()
def normalize_path(path):
"""Normalize the path string."""
if not path:
return path
path = normalize_percent_characters(path)
return remove_dot_segments(path)
def normalize_query(query):
"""Normalize the query string."""
if not query:
return query
return normalize_percent_characters(query)
def normalize_fragment(fragment):
"""Normalize the fragment string."""
if not fragment:
return fragment
return normalize_percent_characters(fragment)
PERCENT_MATCHER = re.compile('%[A-Fa-f0-9]{2}')
def normalize_percent_characters(s):
"""All percent characters should be upper-cased.
For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
"""
matches = set(PERCENT_MATCHER.findall(s))
for m in matches:
if not m.isupper():
s = s.replace(m, m.upper())
return s
def remove_dot_segments(s):
"""Remove dot segments from the string.
See also Section 5.2.4 of :rfc:`3986`.
"""
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = s.split('/') # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == '.':
continue
# Anything other than '..', should be appended to the output
elif segment != '..':
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if s.startswith('/') and (not output or output[0]):
output.insert(0, '')
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if s.endswith(('/.', '/..')):
output.append('')
return '/'.join(output)
def encode_component(uri_component, encoding):
"""Encode the specific component in the provided encoding."""
if uri_component is None:
return uri_component
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
percent_encodings = len(PERCENT_MATCHER.findall(
compat.to_str(uri_component, encoding)))
uri_bytes = compat.to_bytes(uri_component, encoding)
is_percent_encoded = percent_encodings == uri_bytes.count(b'%')
encoded_uri = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring on both Python 2 & 3
byte = uri_bytes[i:i+1]
byte_ord = ord(byte)
if ((is_percent_encoded and byte == b'%')
or (byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED)):
encoded_uri.extend(byte)
continue
encoded_uri.extend('%{0:02x}'.format(byte_ord).encode().upper())
return encoded_uri.decode(encoding)

View File

@@ -0,0 +1,385 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the urlparse compatibility logic."""
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
__all__ = ('ParseResult', 'ParseResultBytes')
PARSED_COMPONENTS = ('scheme', 'userinfo', 'host', 'port', 'path', 'query',
'fragment')
class ParseResultMixin(object):
def _generate_authority(self, attributes):
# I swear I did not align the comparisons below. That's just how they
# happened to align based on pep8 and attribute lengths.
userinfo, host, port = (attributes[p]
for p in ('userinfo', 'host', 'port'))
if (self.userinfo != userinfo or
self.host != host or
self.port != port):
if port:
port = '{0}'.format(port)
return normalizers.normalize_authority(
(compat.to_str(userinfo, self.encoding),
compat.to_str(host, self.encoding),
port)
)
return self.authority
def geturl(self):
"""Shim to match the standard library method."""
return self.unsplit()
@property
def hostname(self):
"""Shim to match the standard library."""
return self.host
@property
def netloc(self):
"""Shim to match the standard library."""
return self.authority
@property
def params(self):
"""Shim to match the standard library."""
return self.query
class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS),
ParseResultMixin):
"""Implementation of urlparse compatibility class.
This uses the URIReference logic to handle compatibility with the
urlparse.ParseResult class.
"""
slots = ()
def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
uri_ref, encoding='utf-8'):
"""Create a new ParseResult."""
parse_result = super(ParseResult, cls).__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query,
fragment)
parse_result.encoding = encoding
parse_result.reference = uri_ref
return parse_result
@classmethod
def from_parts(cls, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None, encoding='utf-8'):
"""Create a ParseResult instance from its parts."""
authority = ''
if userinfo is not None:
authority += userinfo + '@'
if host is not None:
authority += host
if port is not None:
authority += ':{0}'.format(port)
uri_ref = uri.URIReference(scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
encoding=encoding).normalize()
userinfo, host, port = authority_from(uri_ref, strict=True)
return cls(scheme=uri_ref.scheme,
userinfo=userinfo,
host=host,
port=port,
path=uri_ref.path,
query=uri_ref.query,
fragment=uri_ref.fragment,
uri_ref=uri_ref,
encoding=encoding)
@classmethod
def from_string(cls, uri_string, encoding='utf-8', strict=True,
lazy_normalize=True):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResult` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
if not lazy_normalize:
reference = reference.normalize()
userinfo, host, port = authority_from(reference, strict)
return cls(scheme=reference.scheme,
userinfo=userinfo,
host=host,
port=port,
path=reference.path,
query=reference.query,
fragment=reference.fragment,
uri_ref=reference,
encoding=encoding)
@property
def authority(self):
"""Return the normalized authority."""
return self.reference.authority
def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting,
host=misc.UseExisting, port=misc.UseExisting,
path=misc.UseExisting, query=misc.UseExisting,
fragment=misc.UseExisting):
"""Create a copy of this instance replacing with specified parts."""
attributes = zip(PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment))
attrs_dict = {}
for name, value in attributes:
if value is misc.UseExisting:
value = getattr(self, name)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
ref = self.reference.copy_with(scheme=attrs_dict['scheme'],
authority=authority,
path=attrs_dict['path'],
query=attrs_dict['query'],
fragment=attrs_dict['fragment'])
return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
def encode(self, encoding=None):
"""Convert to an instance of ParseResultBytes."""
encoding = encoding or self.encoding
attrs = dict(
zip(PARSED_COMPONENTS,
(attr.encode(encoding) if hasattr(attr, 'encode') else attr
for attr in self)))
return ParseResultBytes(
uri_ref=self.reference,
encoding=encoding,
**attrs
)
def unsplit(self, use_idna=False):
"""Create a URI string from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: str
"""
parse_result = self
if use_idna and self.host:
hostbytes = self.host.encode('idna')
host = hostbytes.decode(self.encoding)
parse_result = self.copy_with(host=host)
return parse_result.reference.unsplit()
class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS),
ParseResultMixin):
"""Compatibility shim for the urlparse.ParseResultBytes object."""
def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
uri_ref, encoding='utf-8', lazy_normalize=True):
"""Create a new ParseResultBytes instance."""
parse_result = super(ParseResultBytes, cls).__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query or None,
fragment or None)
parse_result.encoding = encoding
parse_result.reference = uri_ref
parse_result.lazy_normalize = lazy_normalize
return parse_result
@classmethod
def from_parts(cls, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None, encoding='utf-8',
lazy_normalize=True):
"""Create a ParseResult instance from its parts."""
authority = ''
if userinfo is not None:
authority += userinfo + '@'
if host is not None:
authority += host
if port is not None:
authority += ':{0}'.format(int(port))
uri_ref = uri.URIReference(scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
encoding=encoding)
if not lazy_normalize:
uri_ref = uri_ref.normalize()
to_bytes = compat.to_bytes
userinfo, host, port = authority_from(uri_ref, strict=True)
return cls(scheme=to_bytes(scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(path, encoding),
query=to_bytes(query, encoding),
fragment=to_bytes(fragment, encoding),
uri_ref=uri_ref,
encoding=encoding,
lazy_normalize=lazy_normalize)
@classmethod
def from_string(cls, uri_string, encoding='utf-8', strict=True,
lazy_normalize=True):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResultBytes` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
if not lazy_normalize:
reference = reference.normalize()
userinfo, host, port = authority_from(reference, strict)
to_bytes = compat.to_bytes
return cls(scheme=to_bytes(reference.scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(reference.path, encoding),
query=to_bytes(reference.query, encoding),
fragment=to_bytes(reference.fragment, encoding),
uri_ref=reference,
encoding=encoding,
lazy_normalize=lazy_normalize)
@property
def authority(self):
"""Return the normalized authority."""
return self.reference.authority.encode(self.encoding)
def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting,
host=misc.UseExisting, port=misc.UseExisting,
path=misc.UseExisting, query=misc.UseExisting,
fragment=misc.UseExisting, lazy_normalize=True):
"""Create a copy of this instance replacing with specified parts."""
attributes = zip(PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment))
attrs_dict = {}
for name, value in attributes:
if value is misc.UseExisting:
value = getattr(self, name)
if not isinstance(value, bytes) and hasattr(value, 'encode'):
value = value.encode(self.encoding)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
to_str = compat.to_str
ref = self.reference.copy_with(
scheme=to_str(attrs_dict['scheme'], self.encoding),
authority=to_str(authority, self.encoding),
path=to_str(attrs_dict['path'], self.encoding),
query=to_str(attrs_dict['query'], self.encoding),
fragment=to_str(attrs_dict['fragment'], self.encoding)
)
if not lazy_normalize:
ref = ref.normalize()
return ParseResultBytes(
uri_ref=ref,
encoding=self.encoding,
lazy_normalize=lazy_normalize,
**attrs_dict
)
def unsplit(self, use_idna=False):
"""Create a URI bytes object from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: bytes
"""
parse_result = self
if use_idna and self.host:
# self.host is bytes, to encode to idna, we need to decode it
# first
host = self.host.decode(self.encoding)
hostbytes = host.encode('idna')
parse_result = self.copy_with(host=hostbytes)
if self.lazy_normalize:
parse_result = parse_result.copy_with(lazy_normalize=False)
uri = parse_result.reference.unsplit()
return uri.encode(self.encoding)
def split_authority(authority):
# Initialize our expected return values
userinfo = host = port = None
# Initialize an extra var we may need to use
extra_host = None
# Set-up rest in case there is no userinfo portion
rest = authority
if '@' in authority:
userinfo, rest = authority.rsplit('@', 1)
# Handle IPv6 host addresses
if rest.startswith('['):
host, rest = rest.split(']', 1)
host += ']'
if ':' in rest:
extra_host, port = rest.split(':', 1)
elif not host and rest:
host = rest
if extra_host and not host:
host = extra_host
return userinfo, host, port
def authority_from(reference, strict):
try:
subauthority = reference.authority_info()
except exceptions.InvalidAuthority:
if strict:
raise
userinfo, host, port = split_authority(reference.authority)
else:
# Thanks to Richard Barrell for this idea:
# https://twitter.com/0x2ba22e11/status/617338811975139328
userinfo, host, port = (subauthority.get(p)
for p in ('userinfo', 'host', 'port'))
if port:
try:
port = int(port)
except ValueError:
raise exceptions.InvalidPort(port)
return userinfo, host, port

View File

@@ -0,0 +1,153 @@
"""Module containing the implementation of the URIReference class."""
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import misc
from . import normalizers
from ._mixin import URIMixin
class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS), URIMixin):
"""Immutable object representing a parsed URI Reference.
.. note::
This class is not intended to be directly instantiated by the user.
This object exposes attributes for the following components of a
URI:
- scheme
- authority
- path
- query
- fragment
.. attribute:: scheme
The scheme that was parsed for the URI Reference. For example,
``http``, ``https``, ``smtp``, ``imap``, etc.
.. attribute:: authority
Component of the URI that contains the user information, host,
and port sub-components. For example,
``google.com``, ``127.0.0.1:5000``, ``username@[::1]``,
``username:password@example.com:443``, etc.
.. attribute:: path
The path that was parsed for the given URI Reference. For example,
``/``, ``/index.php``, etc.
.. attribute:: query
The query component for a given URI Reference. For example, ``a=b``,
``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc.
.. attribute:: fragment
The fragment component of a URI. For example, ``section-3.1``.
This class also provides extra attributes for easier access to information
like the subcomponents of the authority component.
.. attribute:: userinfo
The user information parsed from the authority.
.. attribute:: host
The hostname, IPv4, or IPv6 adddres parsed from the authority.
.. attribute:: port
The port parsed from the authority.
"""
slots = ()
def __new__(cls, scheme, authority, path, query, fragment,
encoding='utf-8'):
"""Create a new URIReference."""
ref = super(URIReference, cls).__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment)
ref.encoding = encoding
return ref
__hash__ = tuple.__hash__
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = URIReference(*other)
elif not isinstance(other, URIReference):
try:
other_ref = URIReference.from_string(other)
except TypeError:
raise TypeError(
'Unable to compare URIReference() to {0}()'.format(
type(other).__name__))
# See http://tools.ietf.org/html/rfc3986#section-6.2
naive_equality = tuple(self) == tuple(other_ref)
return naive_equality or self.normalized_equality(other_ref)
def normalize(self):
"""Normalize this reference as described in Section 6.2.2.
This is not an in-place normalization. Instead this creates a new
URIReference.
:returns: A new reference object with normalized components.
:rtype: URIReference
"""
# See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in
# this method.
return URIReference(normalizers.normalize_scheme(self.scheme or ''),
normalizers.normalize_authority(
(self.userinfo, self.host, self.port)),
normalizers.normalize_path(self.path or ''),
normalizers.normalize_query(self.query),
normalizers.normalize_fragment(self.fragment),
self.encoding)
@classmethod
def from_string(cls, uri_string, encoding='utf-8'):
"""Parse a URI reference from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`URIReference` or subclass thereof
"""
uri_string = compat.to_str(uri_string, encoding)
split_uri = misc.URI_MATCHER.match(uri_string).groupdict()
return cls(
split_uri['scheme'], split_uri['authority'],
normalizers.encode_component(split_uri['path'], encoding),
normalizers.encode_component(split_uri['query'], encoding),
normalizers.encode_component(split_uri['fragment'], encoding),
encoding,
)

View File

@@ -0,0 +1,450 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2017 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the validation logic for rfc3986."""
from . import exceptions
from . import misc
from . import normalizers
class Validator(object):
"""Object used to configure validation of all objects in rfc3986.
.. versionadded:: 1.0
Example usage::
>>> from rfc3986 import api, validators
>>> uri = api.uri_reference('https://github.com/')
>>> validator = validators.Validator().require_presence_of(
... 'scheme', 'host', 'path',
... ).allow_schemes(
... 'http', 'https',
... ).allow_hosts(
... '127.0.0.1', 'github.com',
... )
>>> validator.validate(uri)
>>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com')
>>> validator.validate(invalid_uri)
Traceback (most recent call last):
...
rfc3986.exceptions.MissingComponentError: ('path was required but
missing', URIReference(scheme=u'imap', authority=u'mail.google.com',
path=None, query=None, fragment=None), ['path'])
"""
COMPONENT_NAMES = frozenset([
'scheme',
'userinfo',
'host',
'port',
'path',
'query',
'fragment',
])
def __init__(self):
"""Initialize our default validations."""
self.allowed_schemes = set()
self.allowed_hosts = set()
self.allowed_ports = set()
self.allow_password = True
self.required_components = {
'scheme': False,
'userinfo': False,
'host': False,
'port': False,
'path': False,
'query': False,
'fragment': False,
}
self.validated_components = self.required_components.copy()
def allow_schemes(self, *schemes):
"""Require the scheme to be one of the provided schemes.
.. versionadded:: 1.0
:param schemes:
Schemes, without ``://`` that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for scheme in schemes:
self.allowed_schemes.add(normalizers.normalize_scheme(scheme))
return self
def allow_hosts(self, *hosts):
"""Require the host to be one of the provided hosts.
.. versionadded:: 1.0
:param hosts:
Hosts that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for host in hosts:
self.allowed_hosts.add(normalizers.normalize_host(host))
return self
def allow_ports(self, *ports):
"""Require the port to be one of the provided ports.
.. versionadded:: 1.0
:param ports:
Ports that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for port in ports:
port_int = int(port, base=10)
if 0 <= port_int <= 65535:
self.allowed_ports.add(port)
return self
def allow_use_of_password(self):
"""Allow passwords to be present in the URI.
.. versionadded:: 1.0
:returns:
The validator instance.
:rtype:
Validator
"""
self.allow_password = True
return self
def forbid_use_of_password(self):
"""Prevent passwords from being included in the URI.
.. versionadded:: 1.0
:returns:
The validator instance.
:rtype:
Validator
"""
self.allow_password = False
return self
def check_validity_of(self, *components):
"""Check the validity of the components provided.
This can be specified repeatedly.
.. versionadded:: 1.1
:param components:
Names of components from :attr:`Validator.COMPONENT_NAMES`.
:returns:
The validator instance.
:rtype:
Validator
"""
components = [c.lower() for c in components]
for component in components:
if component not in self.COMPONENT_NAMES:
raise ValueError(
'"{}" is not a valid component'.format(component)
)
self.validated_components.update({
component: True for component in components
})
return self
def require_presence_of(self, *components):
"""Require the components provided.
This can be specified repeatedly.
.. versionadded:: 1.0
:param components:
Names of components from :attr:`Validator.COMPONENT_NAMES`.
:returns:
The validator instance.
:rtype:
Validator
"""
components = [c.lower() for c in components]
for component in components:
if component not in self.COMPONENT_NAMES:
raise ValueError(
'"{}" is not a valid component'.format(component)
)
self.required_components.update({
component: True for component in components
})
return self
def validate(self, uri):
"""Check a URI for conditions specified on this validator.
.. versionadded:: 1.0
:param uri:
Parsed URI to validate.
:type uri:
rfc3986.uri.URIReference
:raises MissingComponentError:
When a required component is missing.
:raises UnpermittedComponentError:
When a component is not one of those allowed.
:raises PasswordForbidden:
When a password is present in the userinfo component but is
not permitted by configuration.
:raises InvalidComponentsError:
When a component was found to be invalid.
"""
if not self.allow_password:
check_password(uri)
required_components = [
component
for component, required in self.required_components.items()
if required
]
validated_components = [
component
for component, required in self.validated_components.items()
if required
]
if required_components:
ensure_required_components_exist(uri, required_components)
if validated_components:
ensure_components_are_valid(uri, validated_components)
ensure_one_of(self.allowed_schemes, uri, 'scheme')
ensure_one_of(self.allowed_hosts, uri, 'host')
ensure_one_of(self.allowed_ports, uri, 'port')
def check_password(uri):
"""Assert that there is no password present in the uri."""
userinfo = uri.userinfo
if not userinfo:
return
credentials = userinfo.split(':', 1)
if len(credentials) <= 1:
return
raise exceptions.PasswordForbidden(uri)
def ensure_one_of(allowed_values, uri, attribute):
"""Assert that the uri's attribute is one of the allowed values."""
value = getattr(uri, attribute)
if value is not None and allowed_values and value not in allowed_values:
raise exceptions.UnpermittedComponentError(
attribute, value, allowed_values,
)
def ensure_required_components_exist(uri, required_components):
"""Assert that all required components are present in the URI."""
missing_components = sorted([
component
for component in required_components
if getattr(uri, component) is None
])
if missing_components:
raise exceptions.MissingComponentError(uri, *missing_components)
def is_valid(value, matcher, require):
"""Determine if a value is valid based on the provided matcher.
:param str value:
Value to validate.
:param matcher:
Compiled regular expression to use to validate the value.
:param require:
Whether or not the value is required.
"""
if require:
return (value is not None
and matcher.match(value))
# require is False and value is not None
return value is None or matcher.match(value)
def authority_is_valid(authority, host=None, require=False):
"""Determine if the authority string is valid.
:param str authority:
The authority to validate.
:param str host:
(optional) The host portion of the authority to validate.
:param bool require:
(optional) Specify if authority must not be None.
:returns:
``True`` if valid, ``False`` otherwise
:rtype:
bool
"""
validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require)
if validated and host is not None:
return host_is_valid(host, require)
return validated
def host_is_valid(host, require=False):
"""Determine if the host string is valid.
:param str host:
The host to validate.
:param bool require:
(optional) Specify if host must not be None.
:returns:
``True`` if valid, ``False`` otherwise
:rtype:
bool
"""
validated = is_valid(host, misc.HOST_MATCHER, require)
if validated and host is not None and misc.IPv4_MATCHER.match(host):
return valid_ipv4_host_address(host)
elif validated and host is not None and misc.IPv6_MATCHER.match(host):
return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None
return validated
def scheme_is_valid(scheme, require=False):
"""Determine if the scheme is valid.
:param str scheme:
The scheme string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a scheme.
:returns:
``True`` if the scheme is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(scheme, misc.SCHEME_MATCHER, require)
def path_is_valid(path, require=False):
"""Determine if the path component is valid.
:param str path:
The path string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a path.
:returns:
``True`` if the path is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(path, misc.PATH_MATCHER, require)
def query_is_valid(query, require=False):
"""Determine if the query component is valid.
:param str query:
The query string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a query.
:returns:
``True`` if the query is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(query, misc.QUERY_MATCHER, require)
def fragment_is_valid(fragment, require=False):
"""Determine if the fragment component is valid.
:param str fragment:
The fragment string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a fragment.
:returns:
``True`` if the fragment is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(fragment, misc.FRAGMENT_MATCHER, require)
def valid_ipv4_host_address(host):
"""Determine if the given host is a valid IPv4 address."""
# If the host exists, and it might be IPv4, check each byte in the
# address.
return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')])
_COMPONENT_VALIDATORS = {
'scheme': scheme_is_valid,
'path': path_is_valid,
'query': query_is_valid,
'fragment': fragment_is_valid,
}
_SUBAUTHORITY_VALIDATORS = set(['userinfo', 'host', 'port'])
def subauthority_component_is_valid(uri, component):
"""Determine if the userinfo, host, and port are valid."""
try:
subauthority_dict = uri.authority_info()
except exceptions.InvalidAuthority:
return False
# If we can parse the authority into sub-components and we're not
# validating the port, we can assume it's valid.
if component == 'host':
return host_is_valid(subauthority_dict['host'])
elif component != 'port':
return True
try:
port = int(subauthority_dict['port'])
except TypeError:
# If the port wasn't provided it'll be None and int(None) raises a
# TypeError
return True
return (0 <= port <= 65535)
def ensure_components_are_valid(uri, validated_components):
"""Assert that all components are valid in the URI."""
invalid_components = set([])
for component in validated_components:
if component in _SUBAUTHORITY_VALIDATORS:
if not subauthority_component_is_valid(uri, component):
invalid_components.add(component)
# Python's peephole optimizer means that while this continue *is*
# actually executed, coverage.py cannot detect that. See also,
# https://bitbucket.org/ned/coveragepy/issues/198/continue-marked-as-not-covered
continue # nocov: Python 2.7, 3.3, 3.4
validator = _COMPONENT_VALIDATORS[component]
if not validator(getattr(uri, component)):
invalid_components.add(component)
if invalid_components:
raise exceptions.InvalidComponentsError(uri, *invalid_components)

View File

@@ -0,0 +1,868 @@
"""Utilities for writing code that runs on Python 2 and 3"""
# Copyright (c) 2010-2015 Benjamin Peterson
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import
import functools
import itertools
import operator
import sys
import types
__author__ = "Benjamin Peterson <benjamin@python.org>"
__version__ = "1.10.0"
# Useful for very coarse version differentiation.
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
PY34 = sys.version_info[0:2] >= (3, 4)
if PY3:
string_types = str,
integer_types = int,
class_types = type,
text_type = str
binary_type = bytes
MAXSIZE = sys.maxsize
else:
string_types = basestring,
integer_types = (int, long)
class_types = (type, types.ClassType)
text_type = unicode
binary_type = str
if sys.platform.startswith("java"):
# Jython always uses 32 bits.
MAXSIZE = int((1 << 31) - 1)
else:
# It's possible to have sizeof(long) != sizeof(Py_ssize_t).
class X(object):
def __len__(self):
return 1 << 31
try:
len(X())
except OverflowError:
# 32-bit
MAXSIZE = int((1 << 31) - 1)
else:
# 64-bit
MAXSIZE = int((1 << 63) - 1)
del X
def _add_doc(func, doc):
"""Add documentation to a function."""
func.__doc__ = doc
def _import_module(name):
"""Import module, returning the module after the last dot."""
__import__(name)
return sys.modules[name]
class _LazyDescr(object):
def __init__(self, name):
self.name = name
def __get__(self, obj, tp):
result = self._resolve()
setattr(obj, self.name, result) # Invokes __set__.
try:
# This is a bit ugly, but it avoids running this again by
# removing this descriptor.
delattr(obj.__class__, self.name)
except AttributeError:
pass
return result
class MovedModule(_LazyDescr):
def __init__(self, name, old, new=None):
super(MovedModule, self).__init__(name)
if PY3:
if new is None:
new = name
self.mod = new
else:
self.mod = old
def _resolve(self):
return _import_module(self.mod)
def __getattr__(self, attr):
_module = self._resolve()
value = getattr(_module, attr)
setattr(self, attr, value)
return value
class _LazyModule(types.ModuleType):
def __init__(self, name):
super(_LazyModule, self).__init__(name)
self.__doc__ = self.__class__.__doc__
def __dir__(self):
attrs = ["__doc__", "__name__"]
attrs += [attr.name for attr in self._moved_attributes]
return attrs
# Subclasses should override this
_moved_attributes = []
class MovedAttribute(_LazyDescr):
def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
super(MovedAttribute, self).__init__(name)
if PY3:
if new_mod is None:
new_mod = name
self.mod = new_mod
if new_attr is None:
if old_attr is None:
new_attr = name
else:
new_attr = old_attr
self.attr = new_attr
else:
self.mod = old_mod
if old_attr is None:
old_attr = name
self.attr = old_attr
def _resolve(self):
module = _import_module(self.mod)
return getattr(module, self.attr)
class _SixMetaPathImporter(object):
"""
A meta path importer to import six.moves and its submodules.
This class implements a PEP302 finder and loader. It should be compatible
with Python 2.5 and all existing versions of Python3
"""
def __init__(self, six_module_name):
self.name = six_module_name
self.known_modules = {}
def _add_module(self, mod, *fullnames):
for fullname in fullnames:
self.known_modules[self.name + "." + fullname] = mod
def _get_module(self, fullname):
return self.known_modules[self.name + "." + fullname]
def find_module(self, fullname, path=None):
if fullname in self.known_modules:
return self
return None
def __get_module(self, fullname):
try:
return self.known_modules[fullname]
except KeyError:
raise ImportError("This loader does not know module " + fullname)
def load_module(self, fullname):
try:
# in case of a reload
return sys.modules[fullname]
except KeyError:
pass
mod = self.__get_module(fullname)
if isinstance(mod, MovedModule):
mod = mod._resolve()
else:
mod.__loader__ = self
sys.modules[fullname] = mod
return mod
def is_package(self, fullname):
"""
Return true, if the named module is a package.
We need this method to get correct spec objects with
Python 3.4 (see PEP451)
"""
return hasattr(self.__get_module(fullname), "__path__")
def get_code(self, fullname):
"""Return None
Required, if is_package is implemented"""
self.__get_module(fullname) # eventually raises ImportError
return None
get_source = get_code # same as get_code
_importer = _SixMetaPathImporter(__name__)
class _MovedItems(_LazyModule):
"""Lazy loading of moved objects"""
__path__ = [] # mark as package
_moved_attributes = [
MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
MovedAttribute("intern", "__builtin__", "sys"),
MovedAttribute("map", "itertools", "builtins", "imap", "map"),
MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
MovedAttribute("reduce", "__builtin__", "functools"),
MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
MovedAttribute("StringIO", "StringIO", "io"),
MovedAttribute("UserDict", "UserDict", "collections"),
MovedAttribute("UserList", "UserList", "collections"),
MovedAttribute("UserString", "UserString", "collections"),
MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
MovedModule("builtins", "__builtin__"),
MovedModule("configparser", "ConfigParser"),
MovedModule("copyreg", "copy_reg"),
MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
MovedModule("http_cookies", "Cookie", "http.cookies"),
MovedModule("html_entities", "htmlentitydefs", "html.entities"),
MovedModule("html_parser", "HTMLParser", "html.parser"),
MovedModule("http_client", "httplib", "http.client"),
MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
MovedModule("cPickle", "cPickle", "pickle"),
MovedModule("queue", "Queue"),
MovedModule("reprlib", "repr"),
MovedModule("socketserver", "SocketServer"),
MovedModule("_thread", "thread", "_thread"),
MovedModule("tkinter", "Tkinter"),
MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
MovedModule("tkinter_colorchooser", "tkColorChooser",
"tkinter.colorchooser"),
MovedModule("tkinter_commondialog", "tkCommonDialog",
"tkinter.commondialog"),
MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
MovedModule("tkinter_font", "tkFont", "tkinter.font"),
MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
"tkinter.simpledialog"),
MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
]
# Add windows specific modules.
if sys.platform == "win32":
_moved_attributes += [
MovedModule("winreg", "_winreg"),
]
for attr in _moved_attributes:
setattr(_MovedItems, attr.name, attr)
if isinstance(attr, MovedModule):
_importer._add_module(attr, "moves." + attr.name)
del attr
_MovedItems._moved_attributes = _moved_attributes
moves = _MovedItems(__name__ + ".moves")
_importer._add_module(moves, "moves")
class Module_six_moves_urllib_parse(_LazyModule):
"""Lazy loading of moved objects in six.moves.urllib_parse"""
_urllib_parse_moved_attributes = [
MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
MovedAttribute("urljoin", "urlparse", "urllib.parse"),
MovedAttribute("urlparse", "urlparse", "urllib.parse"),
MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
MovedAttribute("quote", "urllib", "urllib.parse"),
MovedAttribute("quote_plus", "urllib", "urllib.parse"),
MovedAttribute("unquote", "urllib", "urllib.parse"),
MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
MovedAttribute("urlencode", "urllib", "urllib.parse"),
MovedAttribute("splitquery", "urllib", "urllib.parse"),
MovedAttribute("splittag", "urllib", "urllib.parse"),
MovedAttribute("splituser", "urllib", "urllib.parse"),
MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
MovedAttribute("uses_params", "urlparse", "urllib.parse"),
MovedAttribute("uses_query", "urlparse", "urllib.parse"),
MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
]
for attr in _urllib_parse_moved_attributes:
setattr(Module_six_moves_urllib_parse, attr.name, attr)
del attr
Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
"moves.urllib_parse", "moves.urllib.parse")
class Module_six_moves_urllib_error(_LazyModule):
"""Lazy loading of moved objects in six.moves.urllib_error"""
_urllib_error_moved_attributes = [
MovedAttribute("URLError", "urllib2", "urllib.error"),
MovedAttribute("HTTPError", "urllib2", "urllib.error"),
MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
]
for attr in _urllib_error_moved_attributes:
setattr(Module_six_moves_urllib_error, attr.name, attr)
del attr
Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
"moves.urllib_error", "moves.urllib.error")
class Module_six_moves_urllib_request(_LazyModule):
"""Lazy loading of moved objects in six.moves.urllib_request"""
_urllib_request_moved_attributes = [
MovedAttribute("urlopen", "urllib2", "urllib.request"),
MovedAttribute("install_opener", "urllib2", "urllib.request"),
MovedAttribute("build_opener", "urllib2", "urllib.request"),
MovedAttribute("pathname2url", "urllib", "urllib.request"),
MovedAttribute("url2pathname", "urllib", "urllib.request"),
MovedAttribute("getproxies", "urllib", "urllib.request"),
MovedAttribute("Request", "urllib2", "urllib.request"),
MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
MovedAttribute("FileHandler", "urllib2", "urllib.request"),
MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
MovedAttribute("urlretrieve", "urllib", "urllib.request"),
MovedAttribute("urlcleanup", "urllib", "urllib.request"),
MovedAttribute("URLopener", "urllib", "urllib.request"),
MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
]
for attr in _urllib_request_moved_attributes:
setattr(Module_six_moves_urllib_request, attr.name, attr)
del attr
Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
"moves.urllib_request", "moves.urllib.request")
class Module_six_moves_urllib_response(_LazyModule):
"""Lazy loading of moved objects in six.moves.urllib_response"""
_urllib_response_moved_attributes = [
MovedAttribute("addbase", "urllib", "urllib.response"),
MovedAttribute("addclosehook", "urllib", "urllib.response"),
MovedAttribute("addinfo", "urllib", "urllib.response"),
MovedAttribute("addinfourl", "urllib", "urllib.response"),
]
for attr in _urllib_response_moved_attributes:
setattr(Module_six_moves_urllib_response, attr.name, attr)
del attr
Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
"moves.urllib_response", "moves.urllib.response")
class Module_six_moves_urllib_robotparser(_LazyModule):
"""Lazy loading of moved objects in six.moves.urllib_robotparser"""
_urllib_robotparser_moved_attributes = [
MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
]
for attr in _urllib_robotparser_moved_attributes:
setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
del attr
Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
"moves.urllib_robotparser", "moves.urllib.robotparser")
class Module_six_moves_urllib(types.ModuleType):
"""Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
__path__ = [] # mark as package
parse = _importer._get_module("moves.urllib_parse")
error = _importer._get_module("moves.urllib_error")
request = _importer._get_module("moves.urllib_request")
response = _importer._get_module("moves.urllib_response")
robotparser = _importer._get_module("moves.urllib_robotparser")
def __dir__(self):
return ['parse', 'error', 'request', 'response', 'robotparser']
_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
"moves.urllib")
def add_move(move):
"""Add an item to six.moves."""
setattr(_MovedItems, move.name, move)
def remove_move(name):
"""Remove item from six.moves."""
try:
delattr(_MovedItems, name)
except AttributeError:
try:
del moves.__dict__[name]
except KeyError:
raise AttributeError("no such move, %r" % (name,))
if PY3:
_meth_func = "__func__"
_meth_self = "__self__"
_func_closure = "__closure__"
_func_code = "__code__"
_func_defaults = "__defaults__"
_func_globals = "__globals__"
else:
_meth_func = "im_func"
_meth_self = "im_self"
_func_closure = "func_closure"
_func_code = "func_code"
_func_defaults = "func_defaults"
_func_globals = "func_globals"
try:
advance_iterator = next
except NameError:
def advance_iterator(it):
return it.next()
next = advance_iterator
try:
callable = callable
except NameError:
def callable(obj):
return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
if PY3:
def get_unbound_function(unbound):
return unbound
create_bound_method = types.MethodType
def create_unbound_method(func, cls):
return func
Iterator = object
else:
def get_unbound_function(unbound):
return unbound.im_func
def create_bound_method(func, obj):
return types.MethodType(func, obj, obj.__class__)
def create_unbound_method(func, cls):
return types.MethodType(func, None, cls)
class Iterator(object):
def next(self):
return type(self).__next__(self)
callable = callable
_add_doc(get_unbound_function,
"""Get the function out of a possibly unbound function""")
get_method_function = operator.attrgetter(_meth_func)
get_method_self = operator.attrgetter(_meth_self)
get_function_closure = operator.attrgetter(_func_closure)
get_function_code = operator.attrgetter(_func_code)
get_function_defaults = operator.attrgetter(_func_defaults)
get_function_globals = operator.attrgetter(_func_globals)
if PY3:
def iterkeys(d, **kw):
return iter(d.keys(**kw))
def itervalues(d, **kw):
return iter(d.values(**kw))
def iteritems(d, **kw):
return iter(d.items(**kw))
def iterlists(d, **kw):
return iter(d.lists(**kw))
viewkeys = operator.methodcaller("keys")
viewvalues = operator.methodcaller("values")
viewitems = operator.methodcaller("items")
else:
def iterkeys(d, **kw):
return d.iterkeys(**kw)
def itervalues(d, **kw):
return d.itervalues(**kw)
def iteritems(d, **kw):
return d.iteritems(**kw)
def iterlists(d, **kw):
return d.iterlists(**kw)
viewkeys = operator.methodcaller("viewkeys")
viewvalues = operator.methodcaller("viewvalues")
viewitems = operator.methodcaller("viewitems")
_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
_add_doc(iteritems,
"Return an iterator over the (key, value) pairs of a dictionary.")
_add_doc(iterlists,
"Return an iterator over the (key, [values]) pairs of a dictionary.")
if PY3:
def b(s):
return s.encode("latin-1")
def u(s):
return s
unichr = chr
import struct
int2byte = struct.Struct(">B").pack
del struct
byte2int = operator.itemgetter(0)
indexbytes = operator.getitem
iterbytes = iter
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
_assertCountEqual = "assertCountEqual"
if sys.version_info[1] <= 1:
_assertRaisesRegex = "assertRaisesRegexp"
_assertRegex = "assertRegexpMatches"
else:
_assertRaisesRegex = "assertRaisesRegex"
_assertRegex = "assertRegex"
else:
def b(s):
return s
# Workaround for standalone backslash
def u(s):
return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
unichr = unichr
int2byte = chr
def byte2int(bs):
return ord(bs[0])
def indexbytes(buf, i):
return ord(buf[i])
iterbytes = functools.partial(itertools.imap, ord)
import StringIO
StringIO = BytesIO = StringIO.StringIO
_assertCountEqual = "assertItemsEqual"
_assertRaisesRegex = "assertRaisesRegexp"
_assertRegex = "assertRegexpMatches"
_add_doc(b, """Byte literal""")
_add_doc(u, """Text literal""")
def assertCountEqual(self, *args, **kwargs):
return getattr(self, _assertCountEqual)(*args, **kwargs)
def assertRaisesRegex(self, *args, **kwargs):
return getattr(self, _assertRaisesRegex)(*args, **kwargs)
def assertRegex(self, *args, **kwargs):
return getattr(self, _assertRegex)(*args, **kwargs)
if PY3:
exec_ = getattr(moves.builtins, "exec")
def reraise(tp, value, tb=None):
if value is None:
value = tp()
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
raise value
else:
def exec_(_code_, _globs_=None, _locs_=None):
"""Execute code in a namespace."""
if _globs_ is None:
frame = sys._getframe(1)
_globs_ = frame.f_globals
if _locs_ is None:
_locs_ = frame.f_locals
del frame
elif _locs_ is None:
_locs_ = _globs_
exec("""exec _code_ in _globs_, _locs_""")
exec_("""def reraise(tp, value, tb=None):
raise tp, value, tb
""")
if sys.version_info[:2] == (3, 2):
exec_("""def raise_from(value, from_value):
if from_value is None:
raise value
raise value from from_value
""")
elif sys.version_info[:2] > (3, 2):
exec_("""def raise_from(value, from_value):
raise value from from_value
""")
else:
def raise_from(value, from_value):
raise value
print_ = getattr(moves.builtins, "print", None)
if print_ is None:
def print_(*args, **kwargs):
"""The new-style print function for Python 2.4 and 2.5."""
fp = kwargs.pop("file", sys.stdout)
if fp is None:
return
def write(data):
if not isinstance(data, basestring):
data = str(data)
# If the file has an encoding, encode unicode with it.
if (isinstance(fp, file) and
isinstance(data, unicode) and
fp.encoding is not None):
errors = getattr(fp, "errors", None)
if errors is None:
errors = "strict"
data = data.encode(fp.encoding, errors)
fp.write(data)
want_unicode = False
sep = kwargs.pop("sep", None)
if sep is not None:
if isinstance(sep, unicode):
want_unicode = True
elif not isinstance(sep, str):
raise TypeError("sep must be None or a string")
end = kwargs.pop("end", None)
if end is not None:
if isinstance(end, unicode):
want_unicode = True
elif not isinstance(end, str):
raise TypeError("end must be None or a string")
if kwargs:
raise TypeError("invalid keyword arguments to print()")
if not want_unicode:
for arg in args:
if isinstance(arg, unicode):
want_unicode = True
break
if want_unicode:
newline = unicode("\n")
space = unicode(" ")
else:
newline = "\n"
space = " "
if sep is None:
sep = space
if end is None:
end = newline
for i, arg in enumerate(args):
if i:
write(sep)
write(arg)
write(end)
if sys.version_info[:2] < (3, 3):
_print = print_
def print_(*args, **kwargs):
fp = kwargs.get("file", sys.stdout)
flush = kwargs.pop("flush", False)
_print(*args, **kwargs)
if flush and fp is not None:
fp.flush()
_add_doc(reraise, """Reraise an exception.""")
if sys.version_info[0:2] < (3, 4):
def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
updated=functools.WRAPPER_UPDATES):
def wrapper(f):
f = functools.wraps(wrapped, assigned, updated)(f)
f.__wrapped__ = wrapped
return f
return wrapper
else:
wraps = functools.wraps
def with_metaclass(meta, *bases):
"""Create a base class with a metaclass."""
# This requires a bit of explanation: the basic idea is to make a dummy
# metaclass for one level of class instantiation that replaces itself with
# the actual metaclass.
class metaclass(meta):
def __new__(cls, name, this_bases, d):
return meta(name, bases, d)
return type.__new__(metaclass, 'temporary_class', (), {})
def add_metaclass(metaclass):
"""Class decorator for creating a class with a metaclass."""
def wrapper(cls):
orig_vars = cls.__dict__.copy()
slots = orig_vars.get('__slots__')
if slots is not None:
if isinstance(slots, str):
slots = [slots]
for slots_var in slots:
orig_vars.pop(slots_var)
orig_vars.pop('__dict__', None)
orig_vars.pop('__weakref__', None)
return metaclass(cls.__name__, cls.__bases__, orig_vars)
return wrapper
def python_2_unicode_compatible(klass):
"""
A decorator that defines __unicode__ and __str__ methods under Python 2.
Under Python 3 it does nothing.
To support Python 2 and 3 with a single code base, define a __str__ method
returning text and apply this decorator to the class.
"""
if PY2:
if '__str__' not in klass.__dict__:
raise ValueError("@python_2_unicode_compatible cannot be applied "
"to %s because it doesn't define __str__()." %
klass.__name__)
klass.__unicode__ = klass.__str__
klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
return klass
# Complete the moves implementation.
# This code is at the end of this module to speed up module loading.
# Turn this module into a package.
__path__ = [] # required for PEP 302 and PEP 451
__package__ = __name__ # see PEP 366 @ReservedAssignment
if globals().get("__spec__") is not None:
__spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable
# Remove other six meta path importers, since they cause problems. This can
# happen if six is removed from sys.modules and then reloaded. (Setuptools does
# this for some reason.)
if sys.meta_path:
for i, importer in enumerate(sys.meta_path):
# Here's some real nastiness: Another "instance" of the six module might
# be floating around. Therefore, we can't use isinstance() to check for
# the six meta path importer, since the other six instance will have
# inserted an importer with different class.
if (type(importer).__name__ == "_SixMetaPathImporter" and
importer.name == __name__):
del sys.meta_path[i]
break
del i, importer
# Finally, add the importer to the meta path import hook.
sys.meta_path.append(_importer)

View File

@@ -0,0 +1,19 @@
import sys
try:
# Our match_hostname function is the same as 3.5's, so we only want to
# import the match_hostname function if it's at least that good.
if sys.version_info < (3, 5):
raise ImportError("Fallback to vendored code")
from ssl import CertificateError, match_hostname
except ImportError:
try:
# Backport of the function from a pypi module
from backports.ssl_match_hostname import CertificateError, match_hostname
except ImportError:
# Our vendored copy
from ._implementation import CertificateError, match_hostname
# Not needed, but documenting what we provide.
__all__ = ('CertificateError', 'match_hostname')

View File

@@ -0,0 +1,156 @@
"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
# Note: This file is under the PSF license as the code comes from the python
# stdlib. http://docs.python.org/3/license.html
import re
import sys
# ipaddress has been backported to 2.6+ in pypi. If it is installed on the
# system, use it to handle IPAddress ServerAltnames (this was added in
# python-3.5) otherwise only do DNS matching. This allows
# backports.ssl_match_hostname to continue to be used in Python 2.7.
try:
from pip._vendor import ipaddress
except ImportError:
ipaddress = None
__version__ = '3.5.0.1'
class CertificateError(ValueError):
pass
def _dnsname_match(dn, hostname, max_wildcards=1):
"""Matching according to RFC 6125, section 6.4.3
http://tools.ietf.org/html/rfc6125#section-6.4.3
"""
pats = []
if not dn:
return False
# Ported from python3-syntax:
# leftmost, *remainder = dn.split(r'.')
parts = dn.split(r'.')
leftmost = parts[0]
remainder = parts[1:]
wildcards = leftmost.count('*')
if wildcards > max_wildcards:
# Issue #17980: avoid denials of service by refusing more
# than one wildcard per fragment. A survey of established
# policy among SSL implementations showed it to be a
# reasonable choice.
raise CertificateError(
"too many wildcards in certificate DNS name: " + repr(dn))
# speed up common case w/o wildcards
if not wildcards:
return dn.lower() == hostname.lower()
# RFC 6125, section 6.4.3, subitem 1.
# The client SHOULD NOT attempt to match a presented identifier in which
# the wildcard character comprises a label other than the left-most label.
if leftmost == '*':
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append('[^.]+')
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
# add the remaining fragments, ignore any wildcards
for frag in remainder:
pats.append(re.escape(frag))
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
return pat.match(hostname)
def _to_unicode(obj):
if isinstance(obj, str) and sys.version_info < (3,):
obj = unicode(obj, encoding='ascii', errors='strict')
return obj
def _ipaddress_match(ipname, host_ip):
"""Exact matching of IP addresses.
RFC 6125 explicitly doesn't define an algorithm for this
(section 1.7.2 - "Out of Scope").
"""
# OpenSSL may add a trailing newline to a subjectAltName's IP address
# Divergence from upstream: ipaddress can't handle byte str
ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
return ip == host_ip
def match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
rules are followed, but IP addresses are not accepted for *hostname*.
CertificateError is raised on failure. On success, the function
returns nothing.
"""
if not cert:
raise ValueError("empty or no certificate, match_hostname needs a "
"SSL socket or SSL context with either "
"CERT_OPTIONAL or CERT_REQUIRED")
try:
# Divergence from upstream: ipaddress can't handle byte str
host_ip = ipaddress.ip_address(_to_unicode(hostname))
except ValueError:
# Not an IP address (common case)
host_ip = None
except UnicodeError:
# Divergence from upstream: Have to deal with ipaddress not taking
# byte strings. addresses should be all ascii, so we consider it not
# an ipaddress in this case
host_ip = None
except AttributeError:
# Divergence from upstream: Make ipaddress library optional
if ipaddress is None:
host_ip = None
else:
raise
dnsnames = []
san = cert.get('subjectAltName', ())
for key, value in san:
if key == 'DNS':
if host_ip is None and _dnsname_match(value, hostname):
return
dnsnames.append(value)
elif key == 'IP Address':
if host_ip is not None and _ipaddress_match(value, host_ip):
return
dnsnames.append(value)
if not dnsnames:
# The subject is only checked when there is no dNSName entry
# in subjectAltName
for sub in cert.get('subject', ()):
for key, value in sub:
# XXX according to RFC 2818, the most specific Common Name
# must be used.
if key == 'commonName':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if len(dnsnames) > 1:
raise CertificateError("hostname %r "
"doesn't match either of %s"
% (hostname, ', '.join(map(repr, dnsnames))))
elif len(dnsnames) == 1:
raise CertificateError("hostname %r "
"doesn't match %r"
% (hostname, dnsnames[0]))
else:
raise CertificateError("no appropriate commonName or "
"subjectAltName fields were found")