This commit is contained in:
mofixx
2025-08-08 10:41:30 +02:00
parent 4444be3799
commit a5df3861fd
1674 changed files with 234266 additions and 0 deletions

View File

@ -0,0 +1,117 @@
"""
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
depend on something external.
Files inside of pip._vendor should be considered immutable and should only be
updated to versions from upstream.
"""
from __future__ import absolute_import
import glob
import os.path
import sys
# Downstream redistributors which have debundled our dependencies should also
# patch this value to be true. This will trigger the additional patching
# to cause things like "six" to be available as pip.
DEBUNDLED = False
# By default, look in this directory for a bunch of .whl files which we will
# add to the beginning of sys.path before attempting to import anything. This
# is done to support downstream re-distributors like Debian and Fedora who
# wish to create their own Wheels for our dependencies to aid in debundling.
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
# Define a small helper function to alias our vendored modules to the real ones
# if the vendored ones do not exist. This idea of this was taken from
# https://github.com/kennethreitz/requests/pull/2567.
def vendored(modulename):
vendored_name = "{0}.{1}".format(__name__, modulename)
try:
__import__(modulename, globals(), locals(), level=0)
except ImportError:
# We can just silently allow import failures to pass here. If we
# got to this point it means that ``import pip._vendor.whatever``
# failed and so did ``import whatever``. Since we're importing this
# upfront in an attempt to alias imports, not erroring here will
# just mean we get a regular import error whenever pip *actually*
# tries to import one of these modules to use it, which actually
# gives us a better error message than we would have otherwise
# gotten.
pass
else:
sys.modules[vendored_name] = sys.modules[modulename]
base, head = vendored_name.rsplit(".", 1)
setattr(sys.modules[base], head, sys.modules[modulename])
# If we're operating in a debundled setup, then we want to go ahead and trigger
# the aliasing of our vendored libraries as well as looking for wheels to add
# to our sys.path. This will cause all of this code to be a no-op typically
# however downstream redistributors can enable it in a consistent way across
# all platforms.
if DEBUNDLED:
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
# front of our sys.path.
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
# Actually alias all of our vendored dependencies.
vendored("cachecontrol")
vendored("certifi")
vendored("dependency-groups")
vendored("distlib")
vendored("distro")
vendored("packaging")
vendored("packaging.version")
vendored("packaging.specifiers")
vendored("pkg_resources")
vendored("platformdirs")
vendored("progress")
vendored("pyproject_hooks")
vendored("requests")
vendored("requests.exceptions")
vendored("requests.packages")
vendored("requests.packages.urllib3")
vendored("requests.packages.urllib3._collections")
vendored("requests.packages.urllib3.connection")
vendored("requests.packages.urllib3.connectionpool")
vendored("requests.packages.urllib3.contrib")
vendored("requests.packages.urllib3.contrib.ntlmpool")
vendored("requests.packages.urllib3.contrib.pyopenssl")
vendored("requests.packages.urllib3.exceptions")
vendored("requests.packages.urllib3.fields")
vendored("requests.packages.urllib3.filepost")
vendored("requests.packages.urllib3.packages")
vendored("requests.packages.urllib3.packages.ordered_dict")
vendored("requests.packages.urllib3.packages.six")
vendored("requests.packages.urllib3.packages.ssl_match_hostname")
vendored("requests.packages.urllib3.packages.ssl_match_hostname."
"_implementation")
vendored("requests.packages.urllib3.poolmanager")
vendored("requests.packages.urllib3.request")
vendored("requests.packages.urllib3.response")
vendored("requests.packages.urllib3.util")
vendored("requests.packages.urllib3.util.connection")
vendored("requests.packages.urllib3.util.request")
vendored("requests.packages.urllib3.util.response")
vendored("requests.packages.urllib3.util.retry")
vendored("requests.packages.urllib3.util.ssl_")
vendored("requests.packages.urllib3.util.timeout")
vendored("requests.packages.urllib3.util.url")
vendored("resolvelib")
vendored("rich")
vendored("rich.console")
vendored("rich.highlighter")
vendored("rich.logging")
vendored("rich.markup")
vendored("rich.progress")
vendored("rich.segment")
vendored("rich.style")
vendored("rich.text")
vendored("rich.traceback")
if sys.version_info < (3, 11):
vendored("tomli")
vendored("truststore")
vendored("urllib3")

View File

@ -0,0 +1,29 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.14.3"
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.controller import CacheController
from pip._vendor.cachecontrol.wrapper import CacheControl
__all__ = [
"__author__",
"__email__",
"__version__",
"CacheControlAdapter",
"CacheController",
"CacheControl",
]
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())

View File

@ -0,0 +1,70 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from argparse import ArgumentParser
from typing import TYPE_CHECKING
from pip._vendor import requests
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import logger
if TYPE_CHECKING:
from argparse import Namespace
from pip._vendor.cachecontrol.controller import CacheController
def setup_logging() -> None:
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)
def get_session() -> requests.Session:
adapter = CacheControlAdapter(
DictCache(), cache_etags=True, serializer=None, heuristic=None
)
sess = requests.Session()
sess.mount("http://", adapter)
sess.mount("https://", adapter)
sess.cache_controller = adapter.controller # type: ignore[attr-defined]
return sess
def get_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("url", help="The URL to try and cache")
return parser.parse_args()
def main() -> None:
args = get_args()
sess = get_session()
# Make a request to get a response
resp = sess.get(args.url)
# Turn on logging
setup_logging()
# try setting the cache
cache_controller: CacheController = (
sess.cache_controller # type: ignore[attr-defined]
)
cache_controller.cache_response(resp.request, resp.raw)
# Now try to get it
if cache_controller.cached_request(resp.request):
print("Cached!")
else:
print("Not cached :(")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,168 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import functools
import types
import weakref
import zlib
from typing import TYPE_CHECKING, Any, Collection, Mapping
from pip._vendor.requests.adapters import HTTPAdapter
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController
from pip._vendor.cachecontrol.filewrapper import CallbackFileWrapper
if TYPE_CHECKING:
from pip._vendor.requests import PreparedRequest, Response
from pip._vendor.urllib3 import HTTPResponse
from pip._vendor.cachecontrol.cache import BaseCache
from pip._vendor.cachecontrol.heuristics import BaseHeuristic
from pip._vendor.cachecontrol.serialize import Serializer
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = {"PUT", "PATCH", "DELETE"}
def __init__(
self,
cache: BaseCache | None = None,
cache_etags: bool = True,
controller_class: type[CacheController] | None = None,
serializer: Serializer | None = None,
heuristic: BaseHeuristic | None = None,
cacheable_methods: Collection[str] | None = None,
*args: Any,
**kw: Any,
) -> None:
super().__init__(*args, **kw)
self.cache = DictCache() if cache is None else cache
self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ("GET",)
controller_factory = controller_class or CacheController
self.controller = controller_factory(
self.cache, cache_etags=cache_etags, serializer=serializer
)
def send(
self,
request: PreparedRequest,
stream: bool = False,
timeout: None | float | tuple[float, float] | tuple[float, None] = None,
verify: bool | str = True,
cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None,
proxies: Mapping[str, str] | None = None,
cacheable_methods: Collection[str] | None = None,
) -> Response:
"""
Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can.
"""
cacheable = cacheable_methods or self.cacheable_methods
if request.method in cacheable:
try:
cached_response = self.controller.cached_request(request)
except zlib.error:
cached_response = None
if cached_response:
return self.build_response(request, cached_response, from_cache=True)
# check for etags and add headers if appropriate
request.headers.update(self.controller.conditional_headers(request))
resp = super().send(request, stream, timeout, verify, cert, proxies)
return resp
def build_response( # type: ignore[override]
self,
request: PreparedRequest,
response: HTTPResponse,
from_cache: bool = False,
cacheable_methods: Collection[str] | None = None,
) -> Response:
"""
Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
cacheable = cacheable_methods or self.cacheable_methods
if not from_cache and request.method in cacheable:
# Check for any heuristics that might update headers
# before trying to cache.
if self.heuristic:
response = self.heuristic.apply(response)
# apply any expiration heuristics
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
cached_response = self.controller.update_cached_response(
request, response
)
if cached_response is not response:
from_cache = True
# We are done with the server response, read a
# possible response body (compliant servers will
# not return one, but we cannot be 100% sure) and
# release the connection back to the pool.
response.read(decode_content=False)
response.release_conn()
response = cached_response
# We always cache the 301 responses
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
self.controller.cache_response(request, response)
else:
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper( # type: ignore[assignment]
response._fp, # type: ignore[arg-type]
functools.partial(
self.controller.cache_response, request, weakref.ref(response)
),
)
if response.chunked:
super_update_chunk_length = response.__class__._update_chunk_length
def _update_chunk_length(
weak_self: weakref.ReferenceType[HTTPResponse],
) -> None:
self = weak_self()
if self is None:
return
super_update_chunk_length(self)
if self.chunk_left == 0:
self._fp._close() # type: ignore[union-attr]
response._update_chunk_length = functools.partial( # type: ignore[method-assign]
_update_chunk_length, weakref.ref(response)
)
resp: Response = super().build_response(request, response)
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
assert request.url is not None
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Give the request a from_cache attr to let people use it
resp.from_cache = from_cache # type: ignore[attr-defined]
return resp
def close(self) -> None:
self.cache.close()
super().close() # type: ignore[no-untyped-call]

View File

@ -0,0 +1,75 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
from __future__ import annotations
from threading import Lock
from typing import IO, TYPE_CHECKING, MutableMapping
if TYPE_CHECKING:
from datetime import datetime
class BaseCache:
def get(self, key: str) -> bytes | None:
raise NotImplementedError()
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
raise NotImplementedError()
def delete(self, key: str) -> None:
raise NotImplementedError()
def close(self) -> None:
pass
class DictCache(BaseCache):
def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None:
self.lock = Lock()
self.data = init_dict or {}
def get(self, key: str) -> bytes | None:
return self.data.get(key, None)
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
with self.lock:
self.data.update({key: value})
def delete(self, key: str) -> None:
with self.lock:
if key in self.data:
self.data.pop(key)
class SeparateBodyBaseCache(BaseCache):
"""
In this variant, the body is not stored mixed in with the metadata, but is
passed in (as a bytes-like object) in a separate call to ``set_body()``.
That is, the expected interaction pattern is::
cache.set(key, serialized_metadata)
cache.set_body(key)
Similarly, the body should be loaded separately via ``get_body()``.
"""
def set_body(self, key: str, body: bytes) -> None:
raise NotImplementedError()
def get_body(self, key: str) -> IO[bytes] | None:
"""
Return the body as file-like object.
"""
raise NotImplementedError()

View File

@ -0,0 +1,8 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from pip._vendor.cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache
from pip._vendor.cachecontrol.caches.redis_cache import RedisCache
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]

View File

@ -0,0 +1,145 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import hashlib
import os
import tempfile
from textwrap import dedent
from typing import IO, TYPE_CHECKING
from pathlib import Path
from pip._vendor.cachecontrol.cache import BaseCache, SeparateBodyBaseCache
from pip._vendor.cachecontrol.controller import CacheController
if TYPE_CHECKING:
from datetime import datetime
from filelock import BaseFileLock
class _FileCacheMixin:
"""Shared implementation for both FileCache variants."""
def __init__(
self,
directory: str | Path,
forever: bool = False,
filemode: int = 0o0600,
dirmode: int = 0o0700,
lock_class: type[BaseFileLock] | None = None,
) -> None:
try:
if lock_class is None:
from filelock import FileLock
lock_class = FileLock
except ImportError:
notice = dedent(
"""
NOTE: In order to use the FileCache you must have
filelock installed. You can install it via pip:
pip install cachecontrol[filecache]
"""
)
raise ImportError(notice)
self.directory = directory
self.forever = forever
self.filemode = filemode
self.dirmode = dirmode
self.lock_class = lock_class
@staticmethod
def encode(x: str) -> str:
return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name: str) -> str:
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key: str) -> bytes | None:
name = self._fn(key)
try:
with open(name, "rb") as fh:
return fh.read()
except FileNotFoundError:
return None
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
name = self._fn(key)
self._write(name, value)
def _write(self, path: str, data: bytes) -> None:
"""
Safely write the data to the given path.
"""
# Make sure the directory exists
dirname = os.path.dirname(path)
os.makedirs(dirname, self.dirmode, exist_ok=True)
with self.lock_class(path + ".lock"):
# Write our actual file
(fd, name) = tempfile.mkstemp(dir=dirname)
try:
os.write(fd, data)
finally:
os.close(fd)
os.chmod(name, self.filemode)
os.replace(name, path)
def _delete(self, key: str, suffix: str) -> None:
name = self._fn(key) + suffix
if not self.forever:
try:
os.remove(name)
except FileNotFoundError:
pass
class FileCache(_FileCacheMixin, BaseCache):
"""
Traditional FileCache: body is stored in memory, so not suitable for large
downloads.
"""
def delete(self, key: str) -> None:
self._delete(key, "")
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
"""
Memory-efficient FileCache: body is stored in a separate file, reducing
peak memory usage.
"""
def get_body(self, key: str) -> IO[bytes] | None:
name = self._fn(key) + ".body"
try:
return open(name, "rb")
except FileNotFoundError:
return None
def set_body(self, key: str, body: bytes) -> None:
name = self._fn(key) + ".body"
self._write(name, body)
def delete(self, key: str) -> None:
self._delete(key, "")
self._delete(key, ".body")
def url_to_file_path(url: str, filecache: FileCache) -> str:
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
return filecache._fn(key)

View File

@ -0,0 +1,48 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from datetime import datetime, timezone
from typing import TYPE_CHECKING
from pip._vendor.cachecontrol.cache import BaseCache
if TYPE_CHECKING:
from redis import Redis
class RedisCache(BaseCache):
def __init__(self, conn: Redis[bytes]) -> None:
self.conn = conn
def get(self, key: str) -> bytes | None:
return self.conn.get(key)
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
if not expires:
self.conn.set(key, value)
elif isinstance(expires, datetime):
now_utc = datetime.now(timezone.utc)
if expires.tzinfo is None:
now_utc = now_utc.replace(tzinfo=None)
delta = expires - now_utc
self.conn.setex(key, int(delta.total_seconds()), value)
else:
self.conn.setex(key, expires, value)
def delete(self, key: str) -> None:
self.conn.delete(key)
def clear(self) -> None:
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
def close(self) -> None:
"""Redis uses connection pooling, no need to close the connection."""
pass

View File

@ -0,0 +1,511 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The httplib2 algorithms ported for use with requests.
"""
from __future__ import annotations
import calendar
import logging
import re
import time
import weakref
from email.utils import parsedate_tz
from typing import TYPE_CHECKING, Collection, Mapping
from pip._vendor.requests.structures import CaseInsensitiveDict
from pip._vendor.cachecontrol.cache import DictCache, SeparateBodyBaseCache
from pip._vendor.cachecontrol.serialize import Serializer
if TYPE_CHECKING:
from typing import Literal
from pip._vendor.requests import PreparedRequest
from pip._vendor.urllib3 import HTTPResponse
from pip._vendor.cachecontrol.cache import BaseCache
logger = logging.getLogger(__name__)
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
PERMANENT_REDIRECT_STATUSES = (301, 308)
def parse_uri(uri: str) -> tuple[str, str, str, str, str]:
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
match = URI.match(uri)
assert match is not None
groups = match.groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController:
"""An interface to see if request should cached or not."""
def __init__(
self,
cache: BaseCache | None = None,
cache_etags: bool = True,
serializer: Serializer | None = None,
status_codes: Collection[int] | None = None,
):
self.cache = DictCache() if cache is None else cache
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
@classmethod
def _urlnorm(cls, uri: str) -> str:
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
scheme = scheme.lower()
authority = authority.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
@classmethod
def cache_url(cls, uri: str) -> str:
return cls._urlnorm(uri)
def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]:
known_directives = {
# https://tools.ietf.org/html/rfc7234#section-5.2
"max-age": (int, True),
"max-stale": (int, False),
"min-fresh": (int, True),
"no-cache": (None, False),
"no-store": (None, False),
"no-transform": (None, False),
"only-if-cached": (None, False),
"must-revalidate": (None, False),
"public": (None, False),
"private": (None, False),
"proxy-revalidate": (None, False),
"s-maxage": (int, True),
}
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
retval: dict[str, int | None] = {}
for cc_directive in cc_headers.split(","):
if not cc_directive.strip():
continue
parts = cc_directive.split("=", 1)
directive = parts[0].strip()
try:
typ, required = known_directives[directive]
except KeyError:
logger.debug("Ignoring unknown cache-control directive: %s", directive)
continue
if not typ or not required:
retval[directive] = None
if typ:
try:
retval[directive] = typ(parts[1].strip())
except IndexError:
if required:
logger.debug(
"Missing value for cache-control " "directive: %s",
directive,
)
except ValueError:
logger.debug(
"Invalid value for cache-control directive " "%s, must be %s",
directive,
typ.__name__,
)
return retval
def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:
"""
Load a cached response, or return None if it's not available.
"""
# We do not support caching of partial content: so if the request contains a
# Range header then we don't want to load anything from the cache.
if "Range" in request.headers:
return None
cache_url = request.url
assert cache_url is not None
cache_data = self.cache.get(cache_url)
if cache_data is None:
logger.debug("No cache entry available")
return None
if isinstance(self.cache, SeparateBodyBaseCache):
body_file = self.cache.get_body(cache_url)
else:
body_file = None
result = self.serializer.loads(request, cache_data, body_file)
if result is None:
logger.warning("Cache entry deserialization failed, entry ignored")
return result
def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]:
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)
# Bail out if the request insists on fresh data
if "no-cache" in cc:
logger.debug('Request header has "no-cache", cache bypassed')
return False
if "max-age" in cc and cc["max-age"] == 0:
logger.debug('Request header has "max_age" as 0, cache bypassed')
return False
# Check whether we can load the response from the cache:
resp = self._load_from_cache(request)
if not resp:
return False
# If we have a cached permanent redirect, return it immediately. We
# don't need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
#
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
msg = (
"Returning cached permanent redirect response "
"(ignoring date and etag information)"
)
logger.debug(msg)
return resp
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if not headers or "date" not in headers:
if "etag" not in headers:
# Without date or etag, the cached response can never be used
# and should be deleted.
logger.debug("Purging cached response: no date or etag")
self.cache.delete(cache_url)
logger.debug("Ignoring cached response: no date")
return False
now = time.time()
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
current_age = max(0, now - date)
logger.debug("Current age based on date: %i", current_age)
# TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(headers)
# determine freshness
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
max_age = resp_cc.get("max-age")
if max_age is not None:
freshness_lifetime = max_age
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
# If there isn't a max-age, check for an expires header
elif "expires" in headers:
expires = parsedate_tz(headers["expires"])
if expires is not None:
expire_time = calendar.timegm(expires[:6]) - date
freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
# Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response.
max_age = cc.get("max-age")
if max_age is not None:
freshness_lifetime = max_age
logger.debug(
"Freshness lifetime from request max-age: %i", freshness_lifetime
)
min_fresh = cc.get("min-fresh")
if min_fresh is not None:
# adjust our current age by our min fresh
current_age += min_fresh
logger.debug("Adjusted current age from min-fresh: %i", current_age)
# Return entry if it is fresh enough
if freshness_lifetime > current_age:
logger.debug('The response is "fresh", returning cached response')
logger.debug("%i > %i", freshness_lifetime, current_age)
return resp
# we're not fresh. If we don't have an Etag, clear it out
if "etag" not in headers:
logger.debug('The cached response is "stale" with no etag, purging')
self.cache.delete(cache_url)
# return the original handler
return False
def conditional_headers(self, request: PreparedRequest) -> dict[str, str]:
resp = self._load_from_cache(request)
new_headers = {}
if resp:
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if "etag" in headers:
new_headers["If-None-Match"] = headers["ETag"]
if "last-modified" in headers:
new_headers["If-Modified-Since"] = headers["Last-Modified"]
return new_headers
def _cache_set(
self,
cache_url: str,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
expires_time: int | None = None,
) -> None:
"""
Store the data in the cache.
"""
if isinstance(self.cache, SeparateBodyBaseCache):
# We pass in the body separately; just put a placeholder empty
# string in the metadata.
self.cache.set(
cache_url,
self.serializer.dumps(request, response, b""),
expires=expires_time,
)
# body is None can happen when, for example, we're only updating
# headers, as is the case in update_cached_response().
if body is not None:
self.cache.set_body(cache_url, body)
else:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
def cache_response(
self,
request: PreparedRequest,
response_or_ref: HTTPResponse | weakref.ReferenceType[HTTPResponse],
body: bytes | None = None,
status_codes: Collection[int] | None = None,
) -> None:
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
if isinstance(response_or_ref, weakref.ReferenceType):
response = response_or_ref()
if response is None:
# The weakref can be None only in case the user used streamed request
# and did not consume or close it, and holds no reference to requests.Response.
# In such case, we don't want to cache the response.
return
else:
response = response_or_ref
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
cacheable_status_codes = status_codes or self.cacheable_status_codes
if response.status not in cacheable_status_codes:
logger.debug(
"Status code %s not in %s", response.status, cacheable_status_codes
)
return
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
response.headers
)
if "date" in response_headers:
time_tuple = parsedate_tz(response_headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
else:
date = 0
# If we've been given a body, our response has a Content-Length, that
# Content-Length is valid then we can check to see if the body we've
# been given matches the expected size, and if it doesn't we'll just
# skip trying to cache it.
if (
body is not None
and "content-length" in response_headers
and response_headers["content-length"].isdigit()
and int(response_headers["content-length"]) != len(body)
):
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Updating cache with response from "%s"', cache_url)
# Delete it from the cache if we happen to have it stored there
no_store = False
if "no-store" in cc:
no_store = True
logger.debug('Response header has "no-store"')
if "no-store" in cc_req:
no_store = True
logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url):
logger.debug('Purging existing cache entry to honor "no-store"')
self.cache.delete(cache_url)
if no_store:
return
# https://tools.ietf.org/html/rfc7234#section-4.1:
# A Vary header field-value of "*" always fails to match.
# Storing such a response leads to a deserialization warning
# during cache lookup and is not allowed to ever be served,
# so storing it can be avoided.
if "*" in response_headers.get("vary", ""):
logger.debug('Response header has "Vary: *"')
return
# If we've been given an etag, then keep the response
if self.cache_etags and "etag" in response_headers:
expires_time = 0
if response_headers.get("expires"):
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires[:6]) - date
expires_time = max(expires_time, 14 * 86400)
logger.debug(f"etag object cached for {expires_time} seconds")
logger.debug("Caching due to etag")
self._cache_set(cache_url, request, response, body, expires_time)
# Add to the cache any permanent redirects. We do this before looking
# that the Date headers.
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
logger.debug("Caching permanent redirect")
self._cache_set(cache_url, request, response, b"")
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif "date" in response_headers:
time_tuple = parsedate_tz(response_headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
# cache when there is a max-age > 0
max_age = cc.get("max-age")
if max_age is not None and max_age > 0:
logger.debug("Caching b/c date exists and max-age > 0")
expires_time = max_age
self._cache_set(
cache_url,
request,
response,
body,
expires_time,
)
# If the request can expire, it means we should cache it
# in the meantime.
elif "expires" in response_headers:
if response_headers["expires"]:
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires[:6]) - date
else:
expires_time = None
logger.debug(
"Caching b/c of expires header. expires in {} seconds".format(
expires_time
)
)
self._cache_set(
cache_url,
request,
response,
body,
expires_time,
)
def update_cached_response(
self, request: PreparedRequest, response: HTTPResponse
) -> HTTPResponse:
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
assert request.url is not None
cache_url = self.cache_url(request.url)
cached_response = self._load_from_cache(request)
if not cached_response:
# we didn't have a cached response
return response
# Lets update our headers with the headers from the new request:
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
#
# The server isn't supposed to send headers that would make
# the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to
# typical assumptions.
excluded_headers = ["content-length"]
cached_response.headers.update(
{
k: v
for k, v in response.headers.items()
if k.lower() not in excluded_headers
}
)
# we want a 200 b/c we have content via the cache
cached_response.status = 200
# update our cache
self._cache_set(cache_url, request, cached_response)
return cached_response

View File

@ -0,0 +1,119 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import mmap
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any, Callable
if TYPE_CHECKING:
from http.client import HTTPResponse
class CallbackFileWrapper:
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
contents of that buffer.
All attributes are proxied to the underlying file object.
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
The data is stored in a temporary file until it is all available. As long
as the temporary files directory is disk-based (sometimes it's a
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
pressure is high. For small files the disk usually won't be used at all,
it'll all be in the filesystem memory cache, so there should be no
performance impact.
"""
def __init__(
self, fp: HTTPResponse, callback: Callable[[bytes], None] | None
) -> None:
self.__buf = NamedTemporaryFile("rb+", delete=True)
self.__fp = fp
self.__callback = callback
def __getattr__(self, name: str) -> Any:
# The vagaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop things from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__("_CallbackFileWrapper__fp")
return getattr(fp, name)
def __is_fp_closed(self) -> bool:
try:
return self.__fp.fp is None
except AttributeError:
pass
try:
closed: bool = self.__fp.closed
return closed
except AttributeError:
pass
# We just don't cache it then.
# TODO: Add some logging here...
return False
def _close(self) -> None:
if self.__callback:
if self.__buf.tell() == 0:
# Empty file:
result = b""
else:
# Return the data without actually loading it into memory,
# relying on Python's buffer API and mmap(). mmap() just gives
# a view directly into the filesystem's memory cache, so it
# doesn't result in duplicate memory use.
self.__buf.seek(0, 0)
result = memoryview(
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
)
self.__callback(result)
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
# because the callback is holding a reference to something which
# has a __del__ method. Setting this to None breaks the cycle
# and allows the garbage collector to do it's thing normally.
self.__callback = None
# Closing the temporary file releases memory and frees disk space.
# Important when caching big files.
self.__buf.close()
def read(self, amt: int | None = None) -> bytes:
data: bytes = self.__fp.read(amt)
if data:
# We may be dealing with b'', a sign that things are over:
# it's passed e.g. after we've already closed self.__buf.
self.__buf.write(data)
if self.__is_fp_closed():
self._close()
return data
def _safe_read(self, amt: int) -> bytes:
data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined]
if amt == 2 and data == b"\r\n":
# urllib executes this read to toss the CRLF at the end
# of the chunk.
return data
self.__buf.write(data)
if self.__is_fp_closed():
self._close()
return data

View File

@ -0,0 +1,157 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import calendar
import time
from datetime import datetime, timedelta, timezone
from email.utils import formatdate, parsedate, parsedate_tz
from typing import TYPE_CHECKING, Any, Mapping
if TYPE_CHECKING:
from pip._vendor.urllib3 import HTTPResponse
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta: timedelta, date: datetime | None = None) -> datetime:
date = date or datetime.now(timezone.utc)
return date + delta
def datetime_to_header(dt: datetime) -> str:
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic:
def warning(self, response: HTTPResponse) -> str | None:
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response: HTTPResponse) -> HTTPResponse:
updated_headers = self.update_headers(response)
if updated_headers:
response.headers.update(updated_headers)
warning_header_value = self.warning(response)
if warning_header_value is not None:
response.headers.update({"Warning": warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
headers = {}
if "expires" not in response.headers:
date = parsedate(response.headers["date"])
expires = expire_after(
timedelta(days=1),
date=datetime(*date[:6], tzinfo=timezone.utc), # type: ignore[index,misc]
)
headers["expires"] = datetime_to_header(expires)
headers["cache-control"] = "public"
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw: Any) -> None:
self.delta = timedelta(**kw)
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
expires = expire_after(self.delta)
return {"expires": datetime_to_header(expires), "cache-control": "public"}
def warning(self, response: HTTPResponse) -> str | None:
tmpl = "110 - Automatically cached for %s. Response might be stale"
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = {
200,
203,
204,
206,
300,
301,
404,
405,
410,
414,
501,
}
def update_headers(self, resp: HTTPResponse) -> dict[str, str]:
headers: Mapping[str, str] = resp.headers
if "expires" in headers:
return {}
if "cache-control" in headers and headers["cache-control"] != "public":
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if "date" not in headers or "last-modified" not in headers:
return {}
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
last_modified = parsedate(headers["last-modified"])
if last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp: HTTPResponse) -> str | None:
return None

View File

@ -0,0 +1,146 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import io
from typing import IO, TYPE_CHECKING, Any, Mapping, cast
from pip._vendor import msgpack
from pip._vendor.requests.structures import CaseInsensitiveDict
from pip._vendor.urllib3 import HTTPResponse
if TYPE_CHECKING:
from pip._vendor.requests import PreparedRequest
class Serializer:
serde_version = "4"
def dumps(
self,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
) -> bytes:
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
response.headers
)
if body is None:
# When a body isn't passed in, we'll read the response. We
# also update the response with a new file handler to be
# sure it acts as though it was never read.
body = response.read(decode_content=False)
response._fp = io.BytesIO(body) # type: ignore[assignment]
response.length_remaining = len(body)
data = {
"response": {
"body": body, # Empty bytestring if body is stored separately
"headers": {str(k): str(v) for k, v in response.headers.items()},
"status": response.status,
"version": response.version,
"reason": str(response.reason),
"decode_content": response.decode_content,
}
}
# Construct our vary headers
data["vary"] = {}
if "vary" in response_headers:
varied_headers = response_headers["vary"].split(",")
for header in varied_headers:
header = str(header).strip()
header_value = request.headers.get(header, None)
if header_value is not None:
header_value = str(header_value)
data["vary"][header] = header_value
return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)])
def serialize(self, data: dict[str, Any]) -> bytes:
return cast(bytes, msgpack.dumps(data, use_bin_type=True))
def loads(
self,
request: PreparedRequest,
data: bytes,
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
# Short circuit if we've been given an empty set of data
if not data:
return None
# Previous versions of this library supported other serialization
# formats, but these have all been removed.
if not data.startswith(f"cc={self.serde_version},".encode()):
return None
data = data[5:]
return self._loads_v4(request, data, body_file)
def prepare_response(
self,
request: PreparedRequest,
cached: Mapping[str, Any],
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
# This case is also handled in the controller code when creating
# a cache entry, but is left here for backwards compatibility.
if "*" in cached.get("vary", {}):
return None
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return None
body_raw = cached["response"].pop("body")
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
data=cached["response"]["headers"]
)
if headers.get("transfer-encoding", "") == "chunked":
headers.pop("transfer-encoding")
cached["response"]["headers"] = headers
try:
body: IO[bytes]
if body_file is None:
body = io.BytesIO(body_raw)
else:
body = body_file
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
# a Python 3 str (unicode string), which will cause the above to
# fail with:
#
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode("utf8"))
# Discard any `strict` parameter serialized by older version of cachecontrol.
cached["response"].pop("strict", None)
return HTTPResponse(body=body, preload_content=False, **cached["response"])
def _loads_v4(
self,
request: PreparedRequest,
data: bytes,
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
try:
cached = msgpack.loads(data, raw=False)
except ValueError:
return None
return self.prepare_response(request, cached, body_file)

View File

@ -0,0 +1,43 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import TYPE_CHECKING, Collection
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.cache import DictCache
if TYPE_CHECKING:
from pip._vendor import requests
from pip._vendor.cachecontrol.cache import BaseCache
from pip._vendor.cachecontrol.controller import CacheController
from pip._vendor.cachecontrol.heuristics import BaseHeuristic
from pip._vendor.cachecontrol.serialize import Serializer
def CacheControl(
sess: requests.Session,
cache: BaseCache | None = None,
cache_etags: bool = True,
serializer: Serializer | None = None,
heuristic: BaseHeuristic | None = None,
controller_class: type[CacheController] | None = None,
adapter_class: type[CacheControlAdapter] | None = None,
cacheable_methods: Collection[str] | None = None,
) -> requests.Session:
cache = DictCache() if cache is None else cache
adapter_class = adapter_class or CacheControlAdapter
adapter = adapter_class(
cache,
cache_etags=cache_etags,
serializer=serializer,
heuristic=heuristic,
controller_class=controller_class,
cacheable_methods=cacheable_methods,
)
sess.mount("http://", adapter)
sess.mount("https://", adapter)
return sess

View File

@ -0,0 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2025.07.14"

View File

@ -0,0 +1,12 @@
import argparse
from pip._vendor.certifi import contents, where
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--contents", action="store_true")
args = parser.parse_args()
if args.contents:
print(contents())
else:
print(where())

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,83 @@
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem or its contents.
"""
import sys
import atexit
def exit_cacert_ctx() -> None:
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
if sys.version_info >= (3, 11):
from importlib.resources import as_file, files
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the file
# in cases where we're inside of a zipimport situation until someone
# actually calls where(), but we don't want to re-extract the file
# on every call of where(), so we'll do it once then store it in a
# global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you to
# manage the cleanup of this file, so it doesn't actually return a
# path, it returns a context manager that will give you the path
# when you enter it and will do any cleanup when you leave it. In
# the common case of not needing a temporary file, it will just
# return the file system location and the __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = as_file(files("pip._vendor.certifi").joinpath("cacert.pem"))
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return files("pip._vendor.certifi").joinpath("cacert.pem").read_text(encoding="ascii")
else:
from importlib.resources import path as get_path, read_text
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the
# file in cases where we're inside of a zipimport situation until
# someone actually calls where(), but we don't want to re-extract
# the file on every call of where(), so we'll do it once then store
# it in a global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you
# to manage the cleanup of this file, so it doesn't actually
# return a path, it returns a context manager that will give
# you the path when you enter it and will do any cleanup when
# you leave it. In the common case of not needing a temporary
# file, it will just return the file system location and the
# __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = get_path("pip._vendor.certifi", "cacert.pem")
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return read_text("pip._vendor.certifi", "cacert.pem", encoding="ascii")

View File

@ -0,0 +1,13 @@
from ._implementation import (
CyclicDependencyError,
DependencyGroupInclude,
DependencyGroupResolver,
resolve,
)
__all__ = (
"CyclicDependencyError",
"DependencyGroupInclude",
"DependencyGroupResolver",
"resolve",
)

View File

@ -0,0 +1,65 @@
import argparse
import sys
from ._implementation import resolve
from ._toml_compat import tomllib
def main() -> None:
if tomllib is None:
print(
"Usage error: dependency-groups CLI requires tomli or Python 3.11+",
file=sys.stderr,
)
raise SystemExit(2)
parser = argparse.ArgumentParser(
description=(
"A dependency-groups CLI. Prints out a resolved group, newline-delimited."
)
)
parser.add_argument(
"GROUP_NAME", nargs="*", help="The dependency group(s) to resolve."
)
parser.add_argument(
"-f",
"--pyproject-file",
default="pyproject.toml",
help="The pyproject.toml file. Defaults to trying in the current directory.",
)
parser.add_argument(
"-o",
"--output",
help="An output file. Defaults to stdout.",
)
parser.add_argument(
"-l",
"--list",
action="store_true",
help="List the available dependency groups",
)
args = parser.parse_args()
with open(args.pyproject_file, "rb") as fp:
pyproject = tomllib.load(fp)
dependency_groups_raw = pyproject.get("dependency-groups", {})
if args.list:
print(*dependency_groups_raw.keys())
return
if not args.GROUP_NAME:
print("A GROUP_NAME is required", file=sys.stderr)
raise SystemExit(3)
content = "\n".join(resolve(dependency_groups_raw, *args.GROUP_NAME))
if args.output is None or args.output == "-":
print(content)
else:
with open(args.output, "w", encoding="utf-8") as fp:
print(content, file=fp)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,209 @@
from __future__ import annotations
import dataclasses
import re
from collections.abc import Mapping
from pip._vendor.packaging.requirements import Requirement
def _normalize_name(name: str) -> str:
return re.sub(r"[-_.]+", "-", name).lower()
def _normalize_group_names(
dependency_groups: Mapping[str, str | Mapping[str, str]],
) -> Mapping[str, str | Mapping[str, str]]:
original_names: dict[str, list[str]] = {}
normalized_groups = {}
for group_name, value in dependency_groups.items():
normed_group_name = _normalize_name(group_name)
original_names.setdefault(normed_group_name, []).append(group_name)
normalized_groups[normed_group_name] = value
errors = []
for normed_name, names in original_names.items():
if len(names) > 1:
errors.append(f"{normed_name} ({', '.join(names)})")
if errors:
raise ValueError(f"Duplicate dependency group names: {', '.join(errors)}")
return normalized_groups
@dataclasses.dataclass
class DependencyGroupInclude:
include_group: str
class CyclicDependencyError(ValueError):
"""
An error representing the detection of a cycle.
"""
def __init__(self, requested_group: str, group: str, include_group: str) -> None:
self.requested_group = requested_group
self.group = group
self.include_group = include_group
if include_group == group:
reason = f"{group} includes itself"
else:
reason = f"{include_group} -> {group}, {group} -> {include_group}"
super().__init__(
"Cyclic dependency group include while resolving "
f"{requested_group}: {reason}"
)
class DependencyGroupResolver:
"""
A resolver for Dependency Group data.
This class handles caching, name normalization, cycle detection, and other
parsing requirements. There are only two public methods for exploring the data:
``lookup()`` and ``resolve()``.
:param dependency_groups: A mapping, as provided via pyproject
``[dependency-groups]``.
"""
def __init__(
self,
dependency_groups: Mapping[str, str | Mapping[str, str]],
) -> None:
if not isinstance(dependency_groups, Mapping):
raise TypeError("Dependency Groups table is not a mapping")
self.dependency_groups = _normalize_group_names(dependency_groups)
# a map of group names to parsed data
self._parsed_groups: dict[
str, tuple[Requirement | DependencyGroupInclude, ...]
] = {}
# a map of group names to their ancestors, used for cycle detection
self._include_graph_ancestors: dict[str, tuple[str, ...]] = {}
# a cache of completed resolutions to Requirement lists
self._resolve_cache: dict[str, tuple[Requirement, ...]] = {}
def lookup(self, group: str) -> tuple[Requirement | DependencyGroupInclude, ...]:
"""
Lookup a group name, returning the parsed dependency data for that group.
This will not resolve includes.
:param group: the name of the group to lookup
:raises ValueError: if the data does not appear to be valid dependency group
data
:raises TypeError: if the data is not a string
:raises LookupError: if group name is absent
:raises packaging.requirements.InvalidRequirement: if a specifier is not valid
"""
if not isinstance(group, str):
raise TypeError("Dependency group name is not a str")
group = _normalize_name(group)
return self._parse_group(group)
def resolve(self, group: str) -> tuple[Requirement, ...]:
"""
Resolve a dependency group to a list of requirements.
:param group: the name of the group to resolve
:raises TypeError: if the inputs appear to be the wrong types
:raises ValueError: if the data does not appear to be valid dependency group
data
:raises LookupError: if group name is absent
:raises packaging.requirements.InvalidRequirement: if a specifier is not valid
"""
if not isinstance(group, str):
raise TypeError("Dependency group name is not a str")
group = _normalize_name(group)
return self._resolve(group, group)
def _parse_group(
self, group: str
) -> tuple[Requirement | DependencyGroupInclude, ...]:
# short circuit -- never do the work twice
if group in self._parsed_groups:
return self._parsed_groups[group]
if group not in self.dependency_groups:
raise LookupError(f"Dependency group '{group}' not found")
raw_group = self.dependency_groups[group]
if not isinstance(raw_group, list):
raise TypeError(f"Dependency group '{group}' is not a list")
elements: list[Requirement | DependencyGroupInclude] = []
for item in raw_group:
if isinstance(item, str):
# packaging.requirements.Requirement parsing ensures that this is a
# valid PEP 508 Dependency Specifier
# raises InvalidRequirement on failure
elements.append(Requirement(item))
elif isinstance(item, dict):
if tuple(item.keys()) != ("include-group",):
raise ValueError(f"Invalid dependency group item: {item}")
include_group = next(iter(item.values()))
elements.append(DependencyGroupInclude(include_group=include_group))
else:
raise ValueError(f"Invalid dependency group item: {item}")
self._parsed_groups[group] = tuple(elements)
return self._parsed_groups[group]
def _resolve(self, group: str, requested_group: str) -> tuple[Requirement, ...]:
"""
This is a helper for cached resolution to strings.
:param group: The name of the group to resolve.
:param requested_group: The group which was used in the original, user-facing
request.
"""
if group in self._resolve_cache:
return self._resolve_cache[group]
parsed = self._parse_group(group)
resolved_group = []
for item in parsed:
if isinstance(item, Requirement):
resolved_group.append(item)
elif isinstance(item, DependencyGroupInclude):
include_group = _normalize_name(item.include_group)
if include_group in self._include_graph_ancestors.get(group, ()):
raise CyclicDependencyError(
requested_group, group, item.include_group
)
self._include_graph_ancestors[include_group] = (
*self._include_graph_ancestors.get(group, ()),
group,
)
resolved_group.extend(self._resolve(include_group, requested_group))
else: # unreachable
raise NotImplementedError(
f"Invalid dependency group item after parse: {item}"
)
self._resolve_cache[group] = tuple(resolved_group)
return self._resolve_cache[group]
def resolve(
dependency_groups: Mapping[str, str | Mapping[str, str]], /, *groups: str
) -> tuple[str, ...]:
"""
Resolve a dependency group to a tuple of requirements, as strings.
:param dependency_groups: the parsed contents of the ``[dependency-groups]`` table
from ``pyproject.toml``
:param groups: the name of the group(s) to resolve
:raises TypeError: if the inputs appear to be the wrong types
:raises ValueError: if the data does not appear to be valid dependency group data
:raises LookupError: if group name is absent
:raises packaging.requirements.InvalidRequirement: if a specifier is not valid
"""
resolver = DependencyGroupResolver(dependency_groups)
return tuple(str(r) for group in groups for r in resolver.resolve(group))

View File

@ -0,0 +1,59 @@
from __future__ import annotations
import argparse
import sys
from ._implementation import DependencyGroupResolver
from ._toml_compat import tomllib
def main(*, argv: list[str] | None = None) -> None:
if tomllib is None:
print(
"Usage error: dependency-groups CLI requires tomli or Python 3.11+",
file=sys.stderr,
)
raise SystemExit(2)
parser = argparse.ArgumentParser(
description=(
"Lint Dependency Groups for validity. "
"This will eagerly load and check all of your Dependency Groups."
)
)
parser.add_argument(
"-f",
"--pyproject-file",
default="pyproject.toml",
help="The pyproject.toml file. Defaults to trying in the current directory.",
)
args = parser.parse_args(argv if argv is not None else sys.argv[1:])
with open(args.pyproject_file, "rb") as fp:
pyproject = tomllib.load(fp)
dependency_groups_raw = pyproject.get("dependency-groups", {})
errors: list[str] = []
try:
resolver = DependencyGroupResolver(dependency_groups_raw)
except (ValueError, TypeError) as e:
errors.append(f"{type(e).__name__}: {e}")
else:
for groupname in resolver.dependency_groups:
try:
resolver.resolve(groupname)
except (LookupError, ValueError, TypeError) as e:
errors.append(f"{type(e).__name__}: {e}")
if errors:
print("errors encountered while examining dependency groups:")
for msg in errors:
print(f" {msg}")
sys.exit(1)
else:
print("ok")
sys.exit(0)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,62 @@
from __future__ import annotations
import argparse
import subprocess
import sys
from ._implementation import DependencyGroupResolver
from ._toml_compat import tomllib
def _invoke_pip(deps: list[str]) -> None:
subprocess.check_call([sys.executable, "-m", "pip", "install", *deps])
def main(*, argv: list[str] | None = None) -> None:
if tomllib is None:
print(
"Usage error: dependency-groups CLI requires tomli or Python 3.11+",
file=sys.stderr,
)
raise SystemExit(2)
parser = argparse.ArgumentParser(description="Install Dependency Groups.")
parser.add_argument(
"DEPENDENCY_GROUP", nargs="+", help="The dependency groups to install."
)
parser.add_argument(
"-f",
"--pyproject-file",
default="pyproject.toml",
help="The pyproject.toml file. Defaults to trying in the current directory.",
)
args = parser.parse_args(argv if argv is not None else sys.argv[1:])
with open(args.pyproject_file, "rb") as fp:
pyproject = tomllib.load(fp)
dependency_groups_raw = pyproject.get("dependency-groups", {})
errors: list[str] = []
resolved: list[str] = []
try:
resolver = DependencyGroupResolver(dependency_groups_raw)
except (ValueError, TypeError) as e:
errors.append(f"{type(e).__name__}: {e}")
else:
for groupname in args.DEPENDENCY_GROUP:
try:
resolved.extend(str(r) for r in resolver.resolve(groupname))
except (LookupError, ValueError, TypeError) as e:
errors.append(f"{type(e).__name__}: {e}")
if errors:
print("errors encountered while examining dependency groups:")
for msg in errors:
print(f" {msg}")
sys.exit(1)
_invoke_pip(resolved)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,9 @@
try:
import tomllib
except ImportError:
try:
from pip._vendor import tomli as tomllib # type: ignore[no-redef, unused-ignore]
except ModuleNotFoundError: # pragma: no cover
tomllib = None # type: ignore[assignment, unused-ignore]
__all__ = ("tomllib",)

View File

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2024 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import logging
__version__ = '0.4.0'
class DistlibException(Exception):
pass
try:
from logging import NullHandler
except ImportError: # pragma: no cover
class NullHandler(logging.Handler):
def handle(self, record):
pass
def emit(self, record):
pass
def createLock(self):
self.lock = None
logger = logging.getLogger(__name__)
logger.addHandler(NullHandler())

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,358 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2017 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from __future__ import unicode_literals
import bisect
import io
import logging
import os
import pkgutil
import sys
import types
import zipimport
from . import DistlibException
from .util import cached_property, get_cache_base, Cache
logger = logging.getLogger(__name__)
cache = None # created when needed
class ResourceCache(Cache):
def __init__(self, base=None):
if base is None:
# Use native string to avoid issues on 2.x: see Python #20140.
base = os.path.join(get_cache_base(), str('resource-cache'))
super(ResourceCache, self).__init__(base)
def is_stale(self, resource, path):
"""
Is the cache stale for the given resource?
:param resource: The :class:`Resource` being cached.
:param path: The path of the resource in the cache.
:return: True if the cache is stale.
"""
# Cache invalidation is a hard problem :-)
return True
def get(self, resource):
"""
Get a resource into the cache,
:param resource: A :class:`Resource` instance.
:return: The pathname of the resource in the cache.
"""
prefix, path = resource.finder.get_cache_info(resource)
if prefix is None:
result = path
else:
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
dirname = os.path.dirname(result)
if not os.path.isdir(dirname):
os.makedirs(dirname)
if not os.path.exists(result):
stale = True
else:
stale = self.is_stale(resource, path)
if stale:
# write the bytes of the resource to the cache location
with open(result, 'wb') as f:
f.write(resource.bytes)
return result
class ResourceBase(object):
def __init__(self, finder, name):
self.finder = finder
self.name = name
class Resource(ResourceBase):
"""
A class representing an in-package resource, such as a data file. This is
not normally instantiated by user code, but rather by a
:class:`ResourceFinder` which manages the resource.
"""
is_container = False # Backwards compatibility
def as_stream(self):
"""
Get the resource as a stream.
This is not a property to make it obvious that it returns a new stream
each time.
"""
return self.finder.get_stream(self)
@cached_property
def file_path(self):
global cache
if cache is None:
cache = ResourceCache()
return cache.get(self)
@cached_property
def bytes(self):
return self.finder.get_bytes(self)
@cached_property
def size(self):
return self.finder.get_size(self)
class ResourceContainer(ResourceBase):
is_container = True # Backwards compatibility
@cached_property
def resources(self):
return self.finder.get_resources(self)
class ResourceFinder(object):
"""
Resource finder for file system resources.
"""
if sys.platform.startswith('java'):
skipped_extensions = ('.pyc', '.pyo', '.class')
else:
skipped_extensions = ('.pyc', '.pyo')
def __init__(self, module):
self.module = module
self.loader = getattr(module, '__loader__', None)
self.base = os.path.dirname(getattr(module, '__file__', ''))
def _adjust_path(self, path):
return os.path.realpath(path)
def _make_path(self, resource_name):
# Issue #50: need to preserve type of path on Python 2.x
# like os.path._get_sep
if isinstance(resource_name, bytes): # should only happen on 2.x
sep = b'/'
else:
sep = '/'
parts = resource_name.split(sep)
parts.insert(0, self.base)
result = os.path.join(*parts)
return self._adjust_path(result)
def _find(self, path):
return os.path.exists(path)
def get_cache_info(self, resource):
return None, resource.path
def find(self, resource_name):
path = self._make_path(resource_name)
if not self._find(path):
result = None
else:
if self._is_directory(path):
result = ResourceContainer(self, resource_name)
else:
result = Resource(self, resource_name)
result.path = path
return result
def get_stream(self, resource):
return open(resource.path, 'rb')
def get_bytes(self, resource):
with open(resource.path, 'rb') as f:
return f.read()
def get_size(self, resource):
return os.path.getsize(resource.path)
def get_resources(self, resource):
def allowed(f):
return (f != '__pycache__' and not
f.endswith(self.skipped_extensions))
return set([f for f in os.listdir(resource.path) if allowed(f)])
def is_container(self, resource):
return self._is_directory(resource.path)
_is_directory = staticmethod(os.path.isdir)
def iterator(self, resource_name):
resource = self.find(resource_name)
if resource is not None:
todo = [resource]
while todo:
resource = todo.pop(0)
yield resource
if resource.is_container:
rname = resource.name
for name in resource.resources:
if not rname:
new_name = name
else:
new_name = '/'.join([rname, name])
child = self.find(new_name)
if child.is_container:
todo.append(child)
else:
yield child
class ZipResourceFinder(ResourceFinder):
"""
Resource finder for resources in .zip files.
"""
def __init__(self, module):
super(ZipResourceFinder, self).__init__(module)
archive = self.loader.archive
self.prefix_len = 1 + len(archive)
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
if hasattr(self.loader, '_files'):
self._files = self.loader._files
else:
self._files = zipimport._zip_directory_cache[archive]
self.index = sorted(self._files)
def _adjust_path(self, path):
return path
def _find(self, path):
path = path[self.prefix_len:]
if path in self._files:
result = True
else:
if path and path[-1] != os.sep:
path = path + os.sep
i = bisect.bisect(self.index, path)
try:
result = self.index[i].startswith(path)
except IndexError:
result = False
if not result:
logger.debug('_find failed: %r %r', path, self.loader.prefix)
else:
logger.debug('_find worked: %r %r', path, self.loader.prefix)
return result
def get_cache_info(self, resource):
prefix = self.loader.archive
path = resource.path[1 + len(prefix):]
return prefix, path
def get_bytes(self, resource):
return self.loader.get_data(resource.path)
def get_stream(self, resource):
return io.BytesIO(self.get_bytes(resource))
def get_size(self, resource):
path = resource.path[self.prefix_len:]
return self._files[path][3]
def get_resources(self, resource):
path = resource.path[self.prefix_len:]
if path and path[-1] != os.sep:
path += os.sep
plen = len(path)
result = set()
i = bisect.bisect(self.index, path)
while i < len(self.index):
if not self.index[i].startswith(path):
break
s = self.index[i][plen:]
result.add(s.split(os.sep, 1)[0]) # only immediate children
i += 1
return result
def _is_directory(self, path):
path = path[self.prefix_len:]
if path and path[-1] != os.sep:
path += os.sep
i = bisect.bisect(self.index, path)
try:
result = self.index[i].startswith(path)
except IndexError:
result = False
return result
_finder_registry = {
type(None): ResourceFinder,
zipimport.zipimporter: ZipResourceFinder
}
try:
# In Python 3.6, _frozen_importlib -> _frozen_importlib_external
try:
import _frozen_importlib_external as _fi
except ImportError:
import _frozen_importlib as _fi
_finder_registry[_fi.SourceFileLoader] = ResourceFinder
_finder_registry[_fi.FileFinder] = ResourceFinder
# See issue #146
_finder_registry[_fi.SourcelessFileLoader] = ResourceFinder
del _fi
except (ImportError, AttributeError):
pass
def register_finder(loader, finder_maker):
_finder_registry[type(loader)] = finder_maker
_finder_cache = {}
def finder(package):
"""
Return a resource finder for a package.
:param package: The name of the package.
:return: A :class:`ResourceFinder` instance for the package.
"""
if package in _finder_cache:
result = _finder_cache[package]
else:
if package not in sys.modules:
__import__(package)
module = sys.modules[package]
path = getattr(module, '__path__', None)
if path is None:
raise DistlibException('You cannot get a finder for a module, '
'only for a package')
loader = getattr(module, '__loader__', None)
finder_maker = _finder_registry.get(type(loader))
if finder_maker is None:
raise DistlibException('Unable to locate finder for %r' % package)
result = finder_maker(module)
_finder_cache[package] = result
return result
_dummy_module = types.ModuleType(str('__dummy__'))
def finder_for_path(path):
"""
Return a resource finder for a path, which should represent a container.
:param path: The path.
:return: A :class:`ResourceFinder` instance for the path.
"""
result = None
# calls any path hooks, gets importer into cache
pkgutil.get_importer(path)
loader = sys.path_importer_cache.get(path)
finder = _finder_registry.get(type(loader))
if finder:
module = _dummy_module
module.__file__ = os.path.join(path, '')
module.__loader__ = loader
result = finder(module)
return result

View File

@ -0,0 +1,447 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2023 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from io import BytesIO
import logging
import os
import re
import struct
import sys
import time
from zipfile import ZipInfo
from .compat import sysconfig, detect_encoding, ZipFile
from .resources import finder
from .util import (FileOperator, get_export_entry, convert_path, get_executable, get_platform, in_venv)
logger = logging.getLogger(__name__)
_DEFAULT_MANIFEST = '''
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
<assemblyIdentity version="1.0.0.0"
processorArchitecture="X86"
name="%s"
type="win32"/>
<!-- Identify the application security requirements. -->
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
<security>
<requestedPrivileges>
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
</requestedPrivileges>
</security>
</trustInfo>
</assembly>'''.strip()
# check if Python is called on the first line with this expression
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
SCRIPT_TEMPLATE = r'''# -*- coding: utf-8 -*-
import re
import sys
if __name__ == '__main__':
from %(module)s import %(import_name)s
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(%(func)s())
'''
# Pre-fetch the contents of all executable wrapper stubs.
# This is to address https://github.com/pypa/pip/issues/12666.
# When updating pip, we rename the old pip in place before installing the
# new version. If we try to fetch a wrapper *after* that rename, the finder
# machinery will be confused as the package is no longer available at the
# location where it was imported from. So we load everything into memory in
# advance.
if os.name == 'nt' or (os.name == 'java' and os._name == 'nt'):
# Issue 31: don't hardcode an absolute package name, but
# determine it relative to the current package
DISTLIB_PACKAGE = __name__.rsplit('.', 1)[0]
WRAPPERS = {
r.name: r.bytes
for r in finder(DISTLIB_PACKAGE).iterator("")
if r.name.endswith(".exe")
}
def enquote_executable(executable):
if ' ' in executable:
# make sure we quote only the executable in case of env
# for example /usr/bin/env "/dir with spaces/bin/jython"
# instead of "/usr/bin/env /dir with spaces/bin/jython"
# otherwise whole
if executable.startswith('/usr/bin/env '):
env, _executable = executable.split(' ', 1)
if ' ' in _executable and not _executable.startswith('"'):
executable = '%s "%s"' % (env, _executable)
else:
if not executable.startswith('"'):
executable = '"%s"' % executable
return executable
# Keep the old name around (for now), as there is at least one project using it!
_enquote_executable = enquote_executable
class ScriptMaker(object):
"""
A class to copy or create scripts from source scripts or callable
specifications.
"""
script_template = SCRIPT_TEMPLATE
executable = None # for shebangs
def __init__(self, source_dir, target_dir, add_launchers=True, dry_run=False, fileop=None):
self.source_dir = source_dir
self.target_dir = target_dir
self.add_launchers = add_launchers
self.force = False
self.clobber = False
# It only makes sense to set mode bits on POSIX.
self.set_mode = (os.name == 'posix') or (os.name == 'java' and os._name == 'posix')
self.variants = set(('', 'X.Y'))
self._fileop = fileop or FileOperator(dry_run)
self._is_nt = os.name == 'nt' or (os.name == 'java' and os._name == 'nt')
self.version_info = sys.version_info
def _get_alternate_executable(self, executable, options):
if options.get('gui', False) and self._is_nt: # pragma: no cover
dn, fn = os.path.split(executable)
fn = fn.replace('python', 'pythonw')
executable = os.path.join(dn, fn)
return executable
if sys.platform.startswith('java'): # pragma: no cover
def _is_shell(self, executable):
"""
Determine if the specified executable is a script
(contains a #! line)
"""
try:
with open(executable) as fp:
return fp.read(2) == '#!'
except (OSError, IOError):
logger.warning('Failed to open %s', executable)
return False
def _fix_jython_executable(self, executable):
if self._is_shell(executable):
# Workaround for Jython is not needed on Linux systems.
import java
if java.lang.System.getProperty('os.name') == 'Linux':
return executable
elif executable.lower().endswith('jython.exe'):
# Use wrapper exe for Jython on Windows
return executable
return '/usr/bin/env %s' % executable
def _build_shebang(self, executable, post_interp):
"""
Build a shebang line. In the simple case (on Windows, or a shebang line
which is not too long or contains spaces) use a simple formulation for
the shebang. Otherwise, use /bin/sh as the executable, with a contrived
shebang which allows the script to run either under Python or sh, using
suitable quoting. Thanks to Harald Nordgren for his input.
See also: http://www.in-ulm.de/~mascheck/various/shebang/#length
https://hg.mozilla.org/mozilla-central/file/tip/mach
"""
if os.name != 'posix':
simple_shebang = True
elif getattr(sys, "cross_compiling", False):
# In a cross-compiling environment, the shebang will likely be a
# script; this *must* be invoked with the "safe" version of the
# shebang, or else using os.exec() to run the entry script will
# fail, raising "OSError 8 [Errno 8] Exec format error".
simple_shebang = False
else:
# Add 3 for '#!' prefix and newline suffix.
shebang_length = len(executable) + len(post_interp) + 3
if sys.platform == 'darwin':
max_shebang_length = 512
else:
max_shebang_length = 127
simple_shebang = ((b' ' not in executable) and (shebang_length <= max_shebang_length))
if simple_shebang:
result = b'#!' + executable + post_interp + b'\n'
else:
result = b'#!/bin/sh\n'
result += b"'''exec' " + executable + post_interp + b' "$0" "$@"\n'
result += b"' '''\n"
return result
def _get_shebang(self, encoding, post_interp=b'', options=None):
enquote = True
if self.executable:
executable = self.executable
enquote = False # assume this will be taken care of
elif not sysconfig.is_python_build():
executable = get_executable()
elif in_venv(): # pragma: no cover
executable = os.path.join(sysconfig.get_path('scripts'), 'python%s' % sysconfig.get_config_var('EXE'))
else: # pragma: no cover
if os.name == 'nt':
# for Python builds from source on Windows, no Python executables with
# a version suffix are created, so we use python.exe
executable = os.path.join(sysconfig.get_config_var('BINDIR'),
'python%s' % (sysconfig.get_config_var('EXE')))
else:
executable = os.path.join(
sysconfig.get_config_var('BINDIR'),
'python%s%s' % (sysconfig.get_config_var('VERSION'), sysconfig.get_config_var('EXE')))
if options:
executable = self._get_alternate_executable(executable, options)
if sys.platform.startswith('java'): # pragma: no cover
executable = self._fix_jython_executable(executable)
# Normalise case for Windows - COMMENTED OUT
# executable = os.path.normcase(executable)
# N.B. The normalising operation above has been commented out: See
# issue #124. Although paths in Windows are generally case-insensitive,
# they aren't always. For example, a path containing a ẞ (which is a
# LATIN CAPITAL LETTER SHARP S - U+1E9E) is normcased to ß (which is a
# LATIN SMALL LETTER SHARP S' - U+00DF). The two are not considered by
# Windows as equivalent in path names.
# If the user didn't specify an executable, it may be necessary to
# cater for executable paths with spaces (not uncommon on Windows)
if enquote:
executable = enquote_executable(executable)
# Issue #51: don't use fsencode, since we later try to
# check that the shebang is decodable using utf-8.
executable = executable.encode('utf-8')
# in case of IronPython, play safe and enable frames support
if (sys.platform == 'cli' and '-X:Frames' not in post_interp and
'-X:FullFrames' not in post_interp): # pragma: no cover
post_interp += b' -X:Frames'
shebang = self._build_shebang(executable, post_interp)
# Python parser starts to read a script using UTF-8 until
# it gets a #coding:xxx cookie. The shebang has to be the
# first line of a file, the #coding:xxx cookie cannot be
# written before. So the shebang has to be decodable from
# UTF-8.
try:
shebang.decode('utf-8')
except UnicodeDecodeError: # pragma: no cover
raise ValueError('The shebang (%r) is not decodable from utf-8' % shebang)
# If the script is encoded to a custom encoding (use a
# #coding:xxx cookie), the shebang has to be decodable from
# the script encoding too.
if encoding != 'utf-8':
try:
shebang.decode(encoding)
except UnicodeDecodeError: # pragma: no cover
raise ValueError('The shebang (%r) is not decodable '
'from the script encoding (%r)' % (shebang, encoding))
return shebang
def _get_script_text(self, entry):
return self.script_template % dict(
module=entry.prefix, import_name=entry.suffix.split('.')[0], func=entry.suffix)
manifest = _DEFAULT_MANIFEST
def get_manifest(self, exename):
base = os.path.basename(exename)
return self.manifest % base
def _write_script(self, names, shebang, script_bytes, filenames, ext):
use_launcher = self.add_launchers and self._is_nt
if not use_launcher:
script_bytes = shebang + script_bytes
else: # pragma: no cover
if ext == 'py':
launcher = self._get_launcher('t')
else:
launcher = self._get_launcher('w')
stream = BytesIO()
with ZipFile(stream, 'w') as zf:
source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
if source_date_epoch:
date_time = time.gmtime(int(source_date_epoch))[:6]
zinfo = ZipInfo(filename='__main__.py', date_time=date_time)
zf.writestr(zinfo, script_bytes)
else:
zf.writestr('__main__.py', script_bytes)
zip_data = stream.getvalue()
script_bytes = launcher + shebang + zip_data
for name in names:
outname = os.path.join(self.target_dir, name)
if use_launcher: # pragma: no cover
n, e = os.path.splitext(outname)
if e.startswith('.py'):
outname = n
outname = '%s.exe' % outname
try:
self._fileop.write_binary_file(outname, script_bytes)
except Exception:
# Failed writing an executable - it might be in use.
logger.warning('Failed to write executable - trying to '
'use .deleteme logic')
dfname = '%s.deleteme' % outname
if os.path.exists(dfname):
os.remove(dfname) # Not allowed to fail here
os.rename(outname, dfname) # nor here
self._fileop.write_binary_file(outname, script_bytes)
logger.debug('Able to replace executable using '
'.deleteme logic')
try:
os.remove(dfname)
except Exception:
pass # still in use - ignore error
else:
if self._is_nt and not outname.endswith('.' + ext): # pragma: no cover
outname = '%s.%s' % (outname, ext)
if os.path.exists(outname) and not self.clobber:
logger.warning('Skipping existing file %s', outname)
continue
self._fileop.write_binary_file(outname, script_bytes)
if self.set_mode:
self._fileop.set_executable_mode([outname])
filenames.append(outname)
variant_separator = '-'
def get_script_filenames(self, name):
result = set()
if '' in self.variants:
result.add(name)
if 'X' in self.variants:
result.add('%s%s' % (name, self.version_info[0]))
if 'X.Y' in self.variants:
result.add('%s%s%s.%s' % (name, self.variant_separator, self.version_info[0], self.version_info[1]))
return result
def _make_script(self, entry, filenames, options=None):
post_interp = b''
if options:
args = options.get('interpreter_args', [])
if args:
args = ' %s' % ' '.join(args)
post_interp = args.encode('utf-8')
shebang = self._get_shebang('utf-8', post_interp, options=options)
script = self._get_script_text(entry).encode('utf-8')
scriptnames = self.get_script_filenames(entry.name)
if options and options.get('gui', False):
ext = 'pyw'
else:
ext = 'py'
self._write_script(scriptnames, shebang, script, filenames, ext)
def _copy_script(self, script, filenames):
adjust = False
script = os.path.join(self.source_dir, convert_path(script))
outname = os.path.join(self.target_dir, os.path.basename(script))
if not self.force and not self._fileop.newer(script, outname):
logger.debug('not copying %s (up-to-date)', script)
return
# Always open the file, but ignore failures in dry-run mode --
# that way, we'll get accurate feedback if we can read the
# script.
try:
f = open(script, 'rb')
except IOError: # pragma: no cover
if not self.dry_run:
raise
f = None
else:
first_line = f.readline()
if not first_line: # pragma: no cover
logger.warning('%s is an empty file (skipping)', script)
return
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
if match:
adjust = True
post_interp = match.group(1) or b''
if not adjust:
if f:
f.close()
self._fileop.copy_file(script, outname)
if self.set_mode:
self._fileop.set_executable_mode([outname])
filenames.append(outname)
else:
logger.info('copying and adjusting %s -> %s', script, self.target_dir)
if not self._fileop.dry_run:
encoding, lines = detect_encoding(f.readline)
f.seek(0)
shebang = self._get_shebang(encoding, post_interp)
if b'pythonw' in first_line: # pragma: no cover
ext = 'pyw'
else:
ext = 'py'
n = os.path.basename(outname)
self._write_script([n], shebang, f.read(), filenames, ext)
if f:
f.close()
@property
def dry_run(self):
return self._fileop.dry_run
@dry_run.setter
def dry_run(self, value):
self._fileop.dry_run = value
if os.name == 'nt' or (os.name == 'java' and os._name == 'nt'): # pragma: no cover
# Executable launcher support.
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
def _get_launcher(self, kind):
if struct.calcsize('P') == 8: # 64-bit
bits = '64'
else:
bits = '32'
platform_suffix = '-arm' if get_platform() == 'win-arm64' else ''
name = '%s%s%s.exe' % (kind, bits, platform_suffix)
if name not in WRAPPERS:
msg = ('Unable to find resource %s in package %s' %
(name, DISTLIB_PACKAGE))
raise ValueError(msg)
return WRAPPERS[name]
# Public API follows
def make(self, specification, options=None):
"""
Make a script.
:param specification: The specification, which is either a valid export
entry specification (to make a script from a
callable) or a filename (to make a script by
copying from a source location).
:param options: A dictionary of options controlling script generation.
:return: A list of all absolute pathnames written to.
"""
filenames = []
entry = get_export_entry(specification)
if entry is None:
self._copy_script(specification, filenames)
else:
self._make_script(entry, filenames, options=options)
return filenames
def make_multiple(self, specifications, options=None):
"""
Take a list of specifications and make scripts from them,
:param specifications: A list of specifications.
:return: A list of all absolute pathnames written to,
"""
filenames = []
for specification in specifications:
filenames.extend(self.make(specification, options))
return filenames

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
from .distro import (
NORMALIZED_DISTRO_ID,
NORMALIZED_LSB_ID,
NORMALIZED_OS_ID,
LinuxDistribution,
__version__,
build_number,
codename,
distro_release_attr,
distro_release_info,
id,
info,
like,
linux_distribution,
lsb_release_attr,
lsb_release_info,
major_version,
minor_version,
name,
os_release_attr,
os_release_info,
uname_attr,
uname_info,
version,
version_parts,
)
__all__ = [
"NORMALIZED_DISTRO_ID",
"NORMALIZED_LSB_ID",
"NORMALIZED_OS_ID",
"LinuxDistribution",
"build_number",
"codename",
"distro_release_attr",
"distro_release_info",
"id",
"info",
"like",
"linux_distribution",
"lsb_release_attr",
"lsb_release_info",
"major_version",
"minor_version",
"name",
"os_release_attr",
"os_release_info",
"uname_attr",
"uname_info",
"version",
"version_parts",
]
__version__ = __version__

View File

@ -0,0 +1,4 @@
from .distro import main
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,45 @@
from .core import (
IDNABidiError,
IDNAError,
InvalidCodepoint,
InvalidCodepointContext,
alabel,
check_bidi,
check_hyphen_ok,
check_initial_combiner,
check_label,
check_nfc,
decode,
encode,
ulabel,
uts46_remap,
valid_contextj,
valid_contexto,
valid_label_length,
valid_string_length,
)
from .intranges import intranges_contain
from .package_data import __version__
__all__ = [
"__version__",
"IDNABidiError",
"IDNAError",
"InvalidCodepoint",
"InvalidCodepointContext",
"alabel",
"check_bidi",
"check_hyphen_ok",
"check_initial_combiner",
"check_label",
"check_nfc",
"decode",
"encode",
"intranges_contain",
"ulabel",
"uts46_remap",
"valid_contextj",
"valid_contexto",
"valid_label_length",
"valid_string_length",
]

View File

@ -0,0 +1,122 @@
import codecs
import re
from typing import Any, Optional, Tuple
from .core import IDNAError, alabel, decode, encode, ulabel
_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
class Codec(codecs.Codec):
def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]:
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data:
return b"", 0
return encode(data), len(data)
def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]:
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data:
return "", 0
return decode(data), len(data)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]:
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data:
return b"", 0
labels = _unicode_dots_re.split(data)
trailing_dot = b""
if labels:
if not labels[-1]:
trailing_dot = b"."
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = b"."
result = []
size = 0
for label in labels:
result.append(alabel(label))
if size:
size += 1
size += len(label)
# Join with U+002E
result_bytes = b".".join(result) + trailing_dot
size += len(trailing_dot)
return result_bytes, size
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]:
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data:
return ("", 0)
if not isinstance(data, str):
data = str(data, "ascii")
labels = _unicode_dots_re.split(data)
trailing_dot = ""
if labels:
if not labels[-1]:
trailing_dot = "."
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = "."
result = []
size = 0
for label in labels:
result.append(ulabel(label))
if size:
size += 1
size += len(label)
result_str = ".".join(result) + trailing_dot
size += len(trailing_dot)
return (result_str, size)
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec, codecs.StreamReader):
pass
def search_function(name: str) -> Optional[codecs.CodecInfo]:
if name != "idna2008":
return None
return codecs.CodecInfo(
name=name,
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)
codecs.register(search_function)

View File

@ -0,0 +1,15 @@
from typing import Any, Union
from .core import decode, encode
def ToASCII(label: str) -> bytes:
return encode(label)
def ToUnicode(label: Union[bytes, bytearray]) -> str:
return decode(label)
def nameprep(s: Any) -> None:
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")

View File

@ -0,0 +1,437 @@
import bisect
import re
import unicodedata
from typing import Optional, Union
from . import idnadata
from .intranges import intranges_contain
_virama_combining_class = 9
_alabel_prefix = b"xn--"
_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
class IDNAError(UnicodeError):
"""Base exception for all IDNA-encoding related problems"""
pass
class IDNABidiError(IDNAError):
"""Exception when bidirectional requirements are not satisfied"""
pass
class InvalidCodepoint(IDNAError):
"""Exception when a disallowed or unallocated codepoint is used"""
pass
class InvalidCodepointContext(IDNAError):
"""Exception when the codepoint is not valid in the context it is used"""
pass
def _combining_class(cp: int) -> int:
v = unicodedata.combining(chr(cp))
if v == 0:
if not unicodedata.name(chr(cp)):
raise ValueError("Unknown character in unicodedata")
return v
def _is_script(cp: str, script: str) -> bool:
return intranges_contain(ord(cp), idnadata.scripts[script])
def _punycode(s: str) -> bytes:
return s.encode("punycode")
def _unot(s: int) -> str:
return "U+{:04X}".format(s)
def valid_label_length(label: Union[bytes, str]) -> bool:
if len(label) > 63:
return False
return True
def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:
if len(label) > (254 if trailing_dot else 253):
return False
return True
def check_bidi(label: str, check_ltr: bool = False) -> bool:
# Bidi rules should only be applied if string contains RTL characters
bidi_label = False
for idx, cp in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if direction == "":
# String likely comes from a newer version of Unicode
raise IDNABidiError("Unknown directionality in label {} at position {}".format(repr(label), idx))
if direction in ["R", "AL", "AN"]:
bidi_label = True
if not bidi_label and not check_ltr:
return True
# Bidi rule 1
direction = unicodedata.bidirectional(label[0])
if direction in ["R", "AL"]:
rtl = True
elif direction == "L":
rtl = False
else:
raise IDNABidiError("First codepoint in label {} must be directionality L, R or AL".format(repr(label)))
valid_ending = False
number_type: Optional[str] = None
for idx, cp in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if rtl:
# Bidi rule 2
if direction not in [
"R",
"AL",
"AN",
"EN",
"ES",
"CS",
"ET",
"ON",
"BN",
"NSM",
]:
raise IDNABidiError("Invalid direction for codepoint at position {} in a right-to-left label".format(idx))
# Bidi rule 3
if direction in ["R", "AL", "EN", "AN"]:
valid_ending = True
elif direction != "NSM":
valid_ending = False
# Bidi rule 4
if direction in ["AN", "EN"]:
if not number_type:
number_type = direction
else:
if number_type != direction:
raise IDNABidiError("Can not mix numeral types in a right-to-left label")
else:
# Bidi rule 5
if direction not in ["L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"]:
raise IDNABidiError("Invalid direction for codepoint at position {} in a left-to-right label".format(idx))
# Bidi rule 6
if direction in ["L", "EN"]:
valid_ending = True
elif direction != "NSM":
valid_ending = False
if not valid_ending:
raise IDNABidiError("Label ends with illegal codepoint directionality")
return True
def check_initial_combiner(label: str) -> bool:
if unicodedata.category(label[0])[0] == "M":
raise IDNAError("Label begins with an illegal combining character")
return True
def check_hyphen_ok(label: str) -> bool:
if label[2:4] == "--":
raise IDNAError("Label has disallowed hyphens in 3rd and 4th position")
if label[0] == "-" or label[-1] == "-":
raise IDNAError("Label must not start or end with a hyphen")
return True
def check_nfc(label: str) -> None:
if unicodedata.normalize("NFC", label) != label:
raise IDNAError("Label must be in Normalization Form C")
def valid_contextj(label: str, pos: int) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x200C:
if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True
ok = False
for i in range(pos - 1, -1, -1):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord("T"):
continue
elif joining_type in [ord("L"), ord("D")]:
ok = True
break
else:
break
if not ok:
return False
ok = False
for i in range(pos + 1, len(label)):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord("T"):
continue
elif joining_type in [ord("R"), ord("D")]:
ok = True
break
else:
break
return ok
if cp_value == 0x200D:
if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True
return False
else:
return False
def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x00B7:
if 0 < pos < len(label) - 1:
if ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C:
return True
return False
elif cp_value == 0x0375:
if pos < len(label) - 1 and len(label) > 1:
return _is_script(label[pos + 1], "Greek")
return False
elif cp_value == 0x05F3 or cp_value == 0x05F4:
if pos > 0:
return _is_script(label[pos - 1], "Hebrew")
return False
elif cp_value == 0x30FB:
for cp in label:
if cp == "\u30fb":
continue
if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"):
return True
return False
elif 0x660 <= cp_value <= 0x669:
for cp in label:
if 0x6F0 <= ord(cp) <= 0x06F9:
return False
return True
elif 0x6F0 <= cp_value <= 0x6F9:
for cp in label:
if 0x660 <= ord(cp) <= 0x0669:
return False
return True
return False
def check_label(label: Union[str, bytes, bytearray]) -> None:
if isinstance(label, (bytes, bytearray)):
label = label.decode("utf-8")
if len(label) == 0:
raise IDNAError("Empty Label")
check_nfc(label)
check_hyphen_ok(label)
check_initial_combiner(label)
for pos, cp in enumerate(label):
cp_value = ord(cp)
if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]):
continue
elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]):
try:
if not valid_contextj(label, pos):
raise InvalidCodepointContext(
"Joiner {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label))
)
except ValueError:
raise IDNAError(
"Unknown codepoint adjacent to joiner {} at position {} in {}".format(
_unot(cp_value), pos + 1, repr(label)
)
)
elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]):
if not valid_contexto(label, pos):
raise InvalidCodepointContext(
"Codepoint {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label))
)
else:
raise InvalidCodepoint(
"Codepoint {} at position {} of {} not allowed".format(_unot(cp_value), pos + 1, repr(label))
)
check_bidi(label)
def alabel(label: str) -> bytes:
try:
label_bytes = label.encode("ascii")
ulabel(label_bytes)
if not valid_label_length(label_bytes):
raise IDNAError("Label too long")
return label_bytes
except UnicodeEncodeError:
pass
check_label(label)
label_bytes = _alabel_prefix + _punycode(label)
if not valid_label_length(label_bytes):
raise IDNAError("Label too long")
return label_bytes
def ulabel(label: Union[str, bytes, bytearray]) -> str:
if not isinstance(label, (bytes, bytearray)):
try:
label_bytes = label.encode("ascii")
except UnicodeEncodeError:
check_label(label)
return label
else:
label_bytes = label
label_bytes = label_bytes.lower()
if label_bytes.startswith(_alabel_prefix):
label_bytes = label_bytes[len(_alabel_prefix) :]
if not label_bytes:
raise IDNAError("Malformed A-label, no Punycode eligible content found")
if label_bytes.decode("ascii")[-1] == "-":
raise IDNAError("A-label must not end with a hyphen")
else:
check_label(label_bytes)
return label_bytes.decode("ascii")
try:
label = label_bytes.decode("punycode")
except UnicodeError:
raise IDNAError("Invalid A-label")
check_label(label)
return label
def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str:
"""Re-map the characters in the string according to UTS46 processing."""
from .uts46data import uts46data
output = ""
for pos, char in enumerate(domain):
code_point = ord(char)
try:
uts46row = uts46data[code_point if code_point < 256 else bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
status = uts46row[1]
replacement: Optional[str] = None
if len(uts46row) == 3:
replacement = uts46row[2]
if (
status == "V"
or (status == "D" and not transitional)
or (status == "3" and not std3_rules and replacement is None)
):
output += char
elif replacement is not None and (
status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional)
):
output += replacement
elif status != "I":
raise IndexError()
except IndexError:
raise InvalidCodepoint(
"Codepoint {} not allowed at position {} in {}".format(_unot(code_point), pos + 1, repr(domain))
)
return unicodedata.normalize("NFC", output)
def encode(
s: Union[str, bytes, bytearray],
strict: bool = False,
uts46: bool = False,
std3_rules: bool = False,
transitional: bool = False,
) -> bytes:
if not isinstance(s, str):
try:
s = str(s, "ascii")
except UnicodeDecodeError:
raise IDNAError("should pass a unicode string to the function rather than a byte string.")
if uts46:
s = uts46_remap(s, std3_rules, transitional)
trailing_dot = False
result = []
if strict:
labels = s.split(".")
else:
labels = _unicode_dots_re.split(s)
if not labels or labels == [""]:
raise IDNAError("Empty domain")
if labels[-1] == "":
del labels[-1]
trailing_dot = True
for label in labels:
s = alabel(label)
if s:
result.append(s)
else:
raise IDNAError("Empty label")
if trailing_dot:
result.append(b"")
s = b".".join(result)
if not valid_string_length(s, trailing_dot):
raise IDNAError("Domain too long")
return s
def decode(
s: Union[str, bytes, bytearray],
strict: bool = False,
uts46: bool = False,
std3_rules: bool = False,
) -> str:
try:
if not isinstance(s, str):
s = str(s, "ascii")
except UnicodeDecodeError:
raise IDNAError("Invalid ASCII in A-label")
if uts46:
s = uts46_remap(s, std3_rules, False)
trailing_dot = False
result = []
if not strict:
labels = _unicode_dots_re.split(s)
else:
labels = s.split(".")
if not labels or labels == [""]:
raise IDNAError("Empty domain")
if not labels[-1]:
del labels[-1]
trailing_dot = True
for label in labels:
s = ulabel(label)
if s:
result.append(s)
else:
raise IDNAError("Empty label")
if trailing_dot:
result.append("")
return ".".join(result)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
"""
Given a list of integers, made up of (hopefully) a small number of long runs
of consecutive integers, compute a representation of the form
((start1, end1), (start2, end2) ...). Then answer the question "was x present
in the original list?" in time O(log(# runs)).
"""
import bisect
from typing import List, Tuple
def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
"""Represent a list of integers as a sequence of ranges:
((start_0, end_0), (start_1, end_1), ...), such that the original
integers are exactly those x such that start_i <= x < end_i for some i.
Ranges are encoded as single integers (start << 32 | end), not as tuples.
"""
sorted_list = sorted(list_)
ranges = []
last_write = -1
for i in range(len(sorted_list)):
if i + 1 < len(sorted_list):
if sorted_list[i] == sorted_list[i + 1] - 1:
continue
current_range = sorted_list[last_write + 1 : i + 1]
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
last_write = i
return tuple(ranges)
def _encode_range(start: int, end: int) -> int:
return (start << 32) | end
def _decode_range(r: int) -> Tuple[int, int]:
return (r >> 32), (r & ((1 << 32) - 1))
def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool:
"""Determine if `int_` falls into one of the ranges in `ranges`."""
tuple_ = _encode_range(int_, 0)
pos = bisect.bisect_left(ranges, tuple_)
# we could be immediately ahead of a tuple (start, end)
# with start < int_ <= end
if pos > 0:
left, right = _decode_range(ranges[pos - 1])
if left <= int_ < right:
return True
# or we could be immediately behind a tuple (int_, end)
if pos < len(ranges):
left, _ = _decode_range(ranges[pos])
if left == int_:
return True
return False

View File

@ -0,0 +1 @@
__version__ = "3.10"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,55 @@
# ruff: noqa: F401
import os
from .exceptions import * # noqa: F403
from .ext import ExtType, Timestamp
version = (1, 1, 1)
__version__ = "1.1.1"
if os.environ.get("MSGPACK_PUREPYTHON"):
from .fallback import Packer, Unpacker, unpackb
else:
try:
from ._cmsgpack import Packer, Unpacker, unpackb
except ImportError:
from .fallback import Packer, Unpacker, unpackb
def pack(o, stream, **kwargs):
"""
Pack object `o` and write it to `stream`
See :class:`Packer` for options.
"""
packer = Packer(**kwargs)
stream.write(packer.pack(o))
def packb(o, **kwargs):
"""
Pack object `o` and return packed bytes
See :class:`Packer` for options.
"""
return Packer(**kwargs).pack(o)
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
Raises `ExtraData` when `stream` contains extra bytes.
See :class:`Unpacker` for options.
"""
data = stream.read()
return unpackb(data, **kwargs)
# alias for compatibility to simplejson/marshal/pickle.
load = unpack
loads = unpackb
dump = pack
dumps = packb

View File

@ -0,0 +1,48 @@
class UnpackException(Exception):
"""Base class for some exceptions raised while unpacking.
NOTE: unpack may raise exception other than subclass of
UnpackException. If you want to catch all error, catch
Exception instead.
"""
class BufferFull(UnpackException):
pass
class OutOfData(UnpackException):
pass
class FormatError(ValueError, UnpackException):
"""Invalid msgpack format"""
class StackError(ValueError, UnpackException):
"""Too nested"""
# Deprecated. Use ValueError instead
UnpackValueError = ValueError
class ExtraData(UnpackValueError):
"""ExtraData is raised when there is trailing data.
This exception is raised while only one-shot (not streaming)
unpack.
"""
def __init__(self, unpacked, extra):
self.unpacked = unpacked
self.extra = extra
def __str__(self):
return "unpack(b) received extra data."
# Deprecated. Use Exception instead to catch all exception during packing.
PackException = Exception
PackValueError = ValueError
PackOverflowError = OverflowError

View File

@ -0,0 +1,170 @@
import datetime
import struct
from collections import namedtuple
class ExtType(namedtuple("ExtType", "code data")):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super().__new__(cls, code, data)
class Timestamp:
"""Timestamp represents the Timestamp extension type in msgpack.
When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`.
When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and
unpack `Timestamp`.
This class is immutable: Do not override seconds and nanoseconds.
"""
__slots__ = ["seconds", "nanoseconds"]
def __init__(self, seconds, nanoseconds=0):
"""Initialize a Timestamp object.
:param int seconds:
Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds).
May be negative.
:param int nanoseconds:
Number of nanoseconds to add to `seconds` to get fractional time.
Maximum is 999_999_999. Default is 0.
Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns.
"""
if not isinstance(seconds, int):
raise TypeError("seconds must be an integer")
if not isinstance(nanoseconds, int):
raise TypeError("nanoseconds must be an integer")
if not (0 <= nanoseconds < 10**9):
raise ValueError("nanoseconds must be a non-negative integer less than 999999999.")
self.seconds = seconds
self.nanoseconds = nanoseconds
def __repr__(self):
"""String representation of Timestamp."""
return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})"
def __eq__(self, other):
"""Check for equality with another Timestamp object"""
if type(other) is self.__class__:
return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
return False
def __ne__(self, other):
"""not-equals method (see :func:`__eq__()`)"""
return not self.__eq__(other)
def __hash__(self):
return hash((self.seconds, self.nanoseconds))
@staticmethod
def from_bytes(b):
"""Unpack bytes into a `Timestamp` object.
Used for pure-Python msgpack unpacking.
:param b: Payload from msgpack ext message with code -1
:type b: bytes
:returns: Timestamp object unpacked from msgpack ext payload
:rtype: Timestamp
"""
if len(b) == 4:
seconds = struct.unpack("!L", b)[0]
nanoseconds = 0
elif len(b) == 8:
data64 = struct.unpack("!Q", b)[0]
seconds = data64 & 0x00000003FFFFFFFF
nanoseconds = data64 >> 34
elif len(b) == 12:
nanoseconds, seconds = struct.unpack("!Iq", b)
else:
raise ValueError(
"Timestamp type can only be created from 32, 64, or 96-bit byte objects"
)
return Timestamp(seconds, nanoseconds)
def to_bytes(self):
"""Pack this Timestamp object into bytes.
Used for pure-Python msgpack packing.
:returns data: Payload for EXT message with code -1 (timestamp type)
:rtype: bytes
"""
if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits
data64 = self.nanoseconds << 34 | self.seconds
if data64 & 0xFFFFFFFF00000000 == 0:
# nanoseconds is zero and seconds < 2**32, so timestamp 32
data = struct.pack("!L", data64)
else:
# timestamp 64
data = struct.pack("!Q", data64)
else:
# timestamp 96
data = struct.pack("!Iq", self.nanoseconds, self.seconds)
return data
@staticmethod
def from_unix(unix_sec):
"""Create a Timestamp from posix timestamp in seconds.
:param unix_float: Posix timestamp in seconds.
:type unix_float: int or float
"""
seconds = int(unix_sec // 1)
nanoseconds = int((unix_sec % 1) * 10**9)
return Timestamp(seconds, nanoseconds)
def to_unix(self):
"""Get the timestamp as a floating-point value.
:returns: posix timestamp
:rtype: float
"""
return self.seconds + self.nanoseconds / 1e9
@staticmethod
def from_unix_nano(unix_ns):
"""Create a Timestamp from posix timestamp in nanoseconds.
:param int unix_ns: Posix timestamp in nanoseconds.
:rtype: Timestamp
"""
return Timestamp(*divmod(unix_ns, 10**9))
def to_unix_nano(self):
"""Get the timestamp as a unixtime in nanoseconds.
:returns: posix timestamp in nanoseconds
:rtype: int
"""
return self.seconds * 10**9 + self.nanoseconds
def to_datetime(self):
"""Get the timestamp as a UTC datetime.
:rtype: `datetime.datetime`
"""
utc = datetime.timezone.utc
return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(
seconds=self.seconds, microseconds=self.nanoseconds // 1000
)
@staticmethod
def from_datetime(dt):
"""Create a Timestamp from datetime with tzinfo.
:rtype: Timestamp
"""
return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000)

View File

@ -0,0 +1,929 @@
"""Fallback pure Python implementation of msgpack"""
import struct
import sys
from datetime import datetime as _DateTime
if hasattr(sys, "pypy_version_info"):
from __pypy__ import newlist_hint
from __pypy__.builders import BytesBuilder
_USING_STRINGBUILDER = True
class BytesIO:
def __init__(self, s=b""):
if s:
self.builder = BytesBuilder(len(s))
self.builder.append(s)
else:
self.builder = BytesBuilder()
def write(self, s):
if isinstance(s, memoryview):
s = s.tobytes()
elif isinstance(s, bytearray):
s = bytes(s)
self.builder.append(s)
def getvalue(self):
return self.builder.build()
else:
from io import BytesIO
_USING_STRINGBUILDER = False
def newlist_hint(size):
return []
from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError
from .ext import ExtType, Timestamp
EX_SKIP = 0
EX_CONSTRUCT = 1
EX_READ_ARRAY_HEADER = 2
EX_READ_MAP_HEADER = 3
TYPE_IMMEDIATE = 0
TYPE_ARRAY = 1
TYPE_MAP = 2
TYPE_RAW = 3
TYPE_BIN = 4
TYPE_EXT = 5
DEFAULT_RECURSE_LIMIT = 511
def _check_type_strict(obj, t, type=type, tuple=tuple):
if type(t) is tuple:
return type(obj) in t
else:
return type(obj) is t
def _get_data_from_buffer(obj):
view = memoryview(obj)
if view.itemsize != 1:
raise ValueError("cannot unpack from multi-byte object")
return view
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``ValueError`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
See :class:`Unpacker` for options.
"""
unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs)
unpacker.feed(packed)
try:
ret = unpacker._unpack()
except OutOfData:
raise ValueError("Unpack failed: incomplete input")
except RecursionError:
raise StackError
if unpacker._got_extradata():
raise ExtraData(ret, unpacker._get_extradata())
return ret
_NO_FORMAT_USED = ""
_MSGPACK_HEADERS = {
0xC4: (1, _NO_FORMAT_USED, TYPE_BIN),
0xC5: (2, ">H", TYPE_BIN),
0xC6: (4, ">I", TYPE_BIN),
0xC7: (2, "Bb", TYPE_EXT),
0xC8: (3, ">Hb", TYPE_EXT),
0xC9: (5, ">Ib", TYPE_EXT),
0xCA: (4, ">f"),
0xCB: (8, ">d"),
0xCC: (1, _NO_FORMAT_USED),
0xCD: (2, ">H"),
0xCE: (4, ">I"),
0xCF: (8, ">Q"),
0xD0: (1, "b"),
0xD1: (2, ">h"),
0xD2: (4, ">i"),
0xD3: (8, ">q"),
0xD4: (1, "b1s", TYPE_EXT),
0xD5: (2, "b2s", TYPE_EXT),
0xD6: (4, "b4s", TYPE_EXT),
0xD7: (8, "b8s", TYPE_EXT),
0xD8: (16, "b16s", TYPE_EXT),
0xD9: (1, _NO_FORMAT_USED, TYPE_RAW),
0xDA: (2, ">H", TYPE_RAW),
0xDB: (4, ">I", TYPE_RAW),
0xDC: (2, ">H", TYPE_ARRAY),
0xDD: (4, ">I", TYPE_ARRAY),
0xDE: (2, ">H", TYPE_MAP),
0xDF: (4, ">I", TYPE_MAP),
}
class Unpacker:
"""Streaming unpacker.
Arguments:
:param file_like:
File-like object having `.read(n)` method.
If specified, unpacker reads serialized data from it and `.feed()` is not usable.
:param int read_size:
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
:param bool use_list:
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
:param bool raw:
If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param int timestamp:
Control how timestamp type is unpacked:
0 - Timestamp
1 - float (Seconds from the EPOCH)
2 - int (Nanoseconds from the EPOCH)
3 - datetime.datetime (UTC).
:param bool strict_map_key:
If true (default), only str or bytes are accepted for map (dict) keys.
:param object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
(See also simplejson)
:param object_pairs_hook:
When specified, it should be callable.
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
:param str unicode_errors:
The error handler for decoding unicode. (default: 'strict')
This option should be used only when you have msgpack data which
contains invalid UTF-8 string.
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means 2**32-1.
The default value is 100*1024*1024 (100MiB).
Raises `BufferFull` exception when it is insufficient.
You should set this parameter when unpacking data from untrusted source.
:param int max_str_len:
Deprecated, use *max_buffer_size* instead.
Limits max length of str. (default: max_buffer_size)
:param int max_bin_len:
Deprecated, use *max_buffer_size* instead.
Limits max length of bin. (default: max_buffer_size)
:param int max_array_len:
Limits max length of array.
(default: max_buffer_size)
:param int max_map_len:
Limits max length of map.
(default: max_buffer_size//2)
:param int max_ext_len:
Deprecated, use *max_buffer_size* instead.
Limits max size of ext type. (default: max_buffer_size)
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
unpacker = Unpacker()
while True:
buf = sock.recv(1024**2)
if not buf:
break
unpacker.feed(buf)
for o in unpacker:
process(o)
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``OutOfData`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
"""
def __init__(
self,
file_like=None,
*,
read_size=0,
use_list=True,
raw=False,
timestamp=0,
strict_map_key=True,
object_hook=None,
object_pairs_hook=None,
list_hook=None,
unicode_errors=None,
max_buffer_size=100 * 1024 * 1024,
ext_hook=ExtType,
max_str_len=-1,
max_bin_len=-1,
max_array_len=-1,
max_map_len=-1,
max_ext_len=-1,
):
if unicode_errors is None:
unicode_errors = "strict"
if file_like is None:
self._feeding = True
else:
if not callable(file_like.read):
raise TypeError("`file_like.read` must be callable")
self.file_like = file_like
self._feeding = False
#: array of bytes fed.
self._buffer = bytearray()
#: Which position we currently reads
self._buff_i = 0
# When Unpacker is used as an iterable, between the calls to next(),
# the buffer is not "consumed" completely, for efficiency sake.
# Instead, it is done sloppily. To make sure we raise BufferFull at
# the correct moments, we have to keep track of how sloppy we were.
# Furthermore, when the buffer is incomplete (that is: in the case
# we raise an OutOfData) we need to rollback the buffer to the correct
# state, which _buf_checkpoint records.
self._buf_checkpoint = 0
if not max_buffer_size:
max_buffer_size = 2**31 - 1
if max_str_len == -1:
max_str_len = max_buffer_size
if max_bin_len == -1:
max_bin_len = max_buffer_size
if max_array_len == -1:
max_array_len = max_buffer_size
if max_map_len == -1:
max_map_len = max_buffer_size // 2
if max_ext_len == -1:
max_ext_len = max_buffer_size
self._max_buffer_size = max_buffer_size
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
self._read_size = read_size or min(self._max_buffer_size, 16 * 1024)
self._raw = bool(raw)
self._strict_map_key = bool(strict_map_key)
self._unicode_errors = unicode_errors
self._use_list = use_list
if not (0 <= timestamp <= 3):
raise ValueError("timestamp must be 0..3")
self._timestamp = timestamp
self._list_hook = list_hook
self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook
self._ext_hook = ext_hook
self._max_str_len = max_str_len
self._max_bin_len = max_bin_len
self._max_array_len = max_array_len
self._max_map_len = max_map_len
self._max_ext_len = max_ext_len
self._stream_offset = 0
if list_hook is not None and not callable(list_hook):
raise TypeError("`list_hook` is not callable")
if object_hook is not None and not callable(object_hook):
raise TypeError("`object_hook` is not callable")
if object_pairs_hook is not None and not callable(object_pairs_hook):
raise TypeError("`object_pairs_hook` is not callable")
if object_hook is not None and object_pairs_hook is not None:
raise TypeError("object_pairs_hook and object_hook are mutually exclusive")
if not callable(ext_hook):
raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes):
assert self._feeding
view = _get_data_from_buffer(next_bytes)
if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size:
raise BufferFull
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
del self._buffer[: self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython
self._buffer.extend(view)
view.release()
def _consume(self):
"""Gets rid of the used parts of the buffer."""
self._stream_offset += self._buff_i - self._buf_checkpoint
self._buf_checkpoint = self._buff_i
def _got_extradata(self):
return self._buff_i < len(self._buffer)
def _get_extradata(self):
return self._buffer[self._buff_i :]
def read_bytes(self, n):
ret = self._read(n, raise_outofdata=False)
self._consume()
return ret
def _read(self, n, raise_outofdata=True):
# (int) -> bytearray
self._reserve(n, raise_outofdata=raise_outofdata)
i = self._buff_i
ret = self._buffer[i : i + n]
self._buff_i = i + len(ret)
return ret
def _reserve(self, n, raise_outofdata=True):
remain_bytes = len(self._buffer) - self._buff_i - n
# Fast path: buffer has n bytes already
if remain_bytes >= 0:
return
if self._feeding:
self._buff_i = self._buf_checkpoint
raise OutOfData
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
del self._buffer[: self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Read from file
remain_bytes = -remain_bytes
if remain_bytes + len(self._buffer) > self._max_buffer_size:
raise BufferFull
while remain_bytes > 0:
to_read_bytes = max(self._read_size, remain_bytes)
read_data = self.file_like.read(to_read_bytes)
if not read_data:
break
assert isinstance(read_data, bytes)
self._buffer += read_data
remain_bytes -= len(read_data)
if len(self._buffer) < n + self._buff_i and raise_outofdata:
self._buff_i = 0 # rollback
raise OutOfData
def _read_header(self):
typ = TYPE_IMMEDIATE
n = 0
obj = None
self._reserve(1)
b = self._buffer[self._buff_i]
self._buff_i += 1
if b & 0b10000000 == 0:
obj = b
elif b & 0b11100000 == 0b11100000:
obj = -1 - (b ^ 0xFF)
elif b & 0b11100000 == 0b10100000:
n = b & 0b00011111
typ = TYPE_RAW
if n > self._max_str_len:
raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
obj = self._read(n)
elif b & 0b11110000 == 0b10010000:
n = b & 0b00001111
typ = TYPE_ARRAY
if n > self._max_array_len:
raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif b & 0b11110000 == 0b10000000:
n = b & 0b00001111
typ = TYPE_MAP
if n > self._max_map_len:
raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
elif b == 0xC0:
obj = None
elif b == 0xC2:
obj = False
elif b == 0xC3:
obj = True
elif 0xC4 <= b <= 0xC6:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
else:
n = self._buffer[self._buff_i]
self._buff_i += size
if n > self._max_bin_len:
raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})")
obj = self._read(n)
elif 0xC7 <= b <= 0xC9:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
L, n = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if L > self._max_ext_len:
raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})")
obj = self._read(L)
elif 0xCA <= b <= 0xD3:
size, fmt = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
else:
obj = self._buffer[self._buff_i]
self._buff_i += size
elif 0xD4 <= b <= 0xD8:
size, fmt, typ = _MSGPACK_HEADERS[b]
if self._max_ext_len < size:
raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})")
self._reserve(size + 1)
n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size + 1
elif 0xD9 <= b <= 0xDB:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
else:
n = self._buffer[self._buff_i]
self._buff_i += size
if n > self._max_str_len:
raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
obj = self._read(n)
elif 0xDC <= b <= 0xDD:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if n > self._max_array_len:
raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif 0xDE <= b <= 0xDF:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if n > self._max_map_len:
raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
else:
raise FormatError("Unknown header: 0x%x" % b)
return typ, n, obj
def _unpack(self, execute=EX_CONSTRUCT):
typ, n, obj = self._read_header()
if execute == EX_READ_ARRAY_HEADER:
if typ != TYPE_ARRAY:
raise ValueError("Expected array")
return n
if execute == EX_READ_MAP_HEADER:
if typ != TYPE_MAP:
raise ValueError("Expected map")
return n
# TODO should we eliminate the recursion?
if typ == TYPE_ARRAY:
if execute == EX_SKIP:
for i in range(n):
# TODO check whether we need to call `list_hook`
self._unpack(EX_SKIP)
return
ret = newlist_hint(n)
for i in range(n):
ret.append(self._unpack(EX_CONSTRUCT))
if self._list_hook is not None:
ret = self._list_hook(ret)
# TODO is the interaction between `list_hook` and `use_list` ok?
return ret if self._use_list else tuple(ret)
if typ == TYPE_MAP:
if execute == EX_SKIP:
for i in range(n):
# TODO check whether we need to call hooks
self._unpack(EX_SKIP)
self._unpack(EX_SKIP)
return
if self._object_pairs_hook is not None:
ret = self._object_pairs_hook(
(self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n)
)
else:
ret = {}
for _ in range(n):
key = self._unpack(EX_CONSTRUCT)
if self._strict_map_key and type(key) not in (str, bytes):
raise ValueError("%s is not allowed for map key" % str(type(key)))
if isinstance(key, str):
key = sys.intern(key)
ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
ret = self._object_hook(ret)
return ret
if execute == EX_SKIP:
return
if typ == TYPE_RAW:
if self._raw:
obj = bytes(obj)
else:
obj = obj.decode("utf_8", self._unicode_errors)
return obj
if typ == TYPE_BIN:
return bytes(obj)
if typ == TYPE_EXT:
if n == -1: # timestamp
ts = Timestamp.from_bytes(bytes(obj))
if self._timestamp == 1:
return ts.to_unix()
elif self._timestamp == 2:
return ts.to_unix_nano()
elif self._timestamp == 3:
return ts.to_datetime()
else:
return ts
else:
return self._ext_hook(n, bytes(obj))
assert typ == TYPE_IMMEDIATE
return obj
def __iter__(self):
return self
def __next__(self):
try:
ret = self._unpack(EX_CONSTRUCT)
self._consume()
return ret
except OutOfData:
self._consume()
raise StopIteration
except RecursionError:
raise StackError
next = __next__
def skip(self):
self._unpack(EX_SKIP)
self._consume()
def unpack(self):
try:
ret = self._unpack(EX_CONSTRUCT)
except RecursionError:
raise StackError
self._consume()
return ret
def read_array_header(self):
ret = self._unpack(EX_READ_ARRAY_HEADER)
self._consume()
return ret
def read_map_header(self):
ret = self._unpack(EX_READ_MAP_HEADER)
self._consume()
return ret
def tell(self):
return self._stream_offset
class Packer:
"""
MessagePack Packer
Usage::
packer = Packer()
astream.write(packer.pack(a))
astream.write(packer.pack(b))
Packer's constructor has some keyword arguments:
:param default:
When specified, it should be callable.
Convert user type to builtin type that Packer supports.
See also simplejson's document.
:param bool use_single_float:
Use single precision float type for float. (default: False)
:param bool autoreset:
Reset buffer after each pack and return its content as `bytes`. (default: True).
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enables str8 type for unicode. (default: True)
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
from serializable types will not be serialized and will be
treated as unsupported type and forwarded to default.
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.
:param bool datetime:
If set to true, datetime with tzinfo is packed into Timestamp type.
Note that the tzinfo is stripped in the timestamp.
You can get UTC datetime with `timestamp=3` option of the Unpacker.
:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage.
:param int buf_size:
Internal buffer size. This option is used only for C implementation.
"""
def __init__(
self,
*,
default=None,
use_single_float=False,
autoreset=True,
use_bin_type=True,
strict_types=False,
datetime=False,
unicode_errors=None,
buf_size=None,
):
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._buffer = BytesIO()
self._datetime = bool(datetime)
self._unicode_errors = unicode_errors or "strict"
if default is not None and not callable(default):
raise TypeError("default must be callable")
self._default = default
def _pack(
self,
obj,
nest_limit=DEFAULT_RECURSE_LIMIT,
check=isinstance,
check_type_strict=_check_type_strict,
):
default_used = False
if self._strict_types:
check = check_type_strict
list_types = list
else:
list_types = (list, tuple)
while True:
if nest_limit < 0:
raise ValueError("recursion limit exceeded")
if obj is None:
return self._buffer.write(b"\xc0")
if check(obj, bool):
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if check(obj, int):
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
return self._buffer.write(struct.pack("b", obj))
if 0x80 <= obj <= 0xFF:
return self._buffer.write(struct.pack("BB", 0xCC, obj))
if -0x80 <= obj < 0:
return self._buffer.write(struct.pack(">Bb", 0xD0, obj))
if 0xFF < obj <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xCD, obj))
if -0x8000 <= obj < -0x80:
return self._buffer.write(struct.pack(">Bh", 0xD1, obj))
if 0xFFFF < obj <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xCE, obj))
if -0x80000000 <= obj < -0x8000:
return self._buffer.write(struct.pack(">Bi", 0xD2, obj))
if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF:
return self._buffer.write(struct.pack(">BQ", 0xCF, obj))
if -0x8000000000000000 <= obj < -0x80000000:
return self._buffer.write(struct.pack(">Bq", 0xD3, obj))
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = True
continue
raise OverflowError("Integer value out of range")
if check(obj, (bytes, bytearray)):
n = len(obj)
if n >= 2**32:
raise ValueError("%s is too large" % type(obj).__name__)
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, str):
obj = obj.encode("utf-8", self._unicode_errors)
n = len(obj)
if n >= 2**32:
raise ValueError("String is too large")
self._pack_raw_header(n)
return self._buffer.write(obj)
if check(obj, memoryview):
n = obj.nbytes
if n >= 2**32:
raise ValueError("Memoryview is too large")
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, float):
if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xCA, obj))
return self._buffer.write(struct.pack(">Bd", 0xCB, obj))
if check(obj, (ExtType, Timestamp)):
if check(obj, Timestamp):
code = -1
data = obj.to_bytes()
else:
code = obj.code
data = obj.data
assert isinstance(code, int)
assert isinstance(data, bytes)
L = len(data)
if L == 1:
self._buffer.write(b"\xd4")
elif L == 2:
self._buffer.write(b"\xd5")
elif L == 4:
self._buffer.write(b"\xd6")
elif L == 8:
self._buffer.write(b"\xd7")
elif L == 16:
self._buffer.write(b"\xd8")
elif L <= 0xFF:
self._buffer.write(struct.pack(">BB", 0xC7, L))
elif L <= 0xFFFF:
self._buffer.write(struct.pack(">BH", 0xC8, L))
else:
self._buffer.write(struct.pack(">BI", 0xC9, L))
self._buffer.write(struct.pack("b", code))
self._buffer.write(data)
return
if check(obj, list_types):
n = len(obj)
self._pack_array_header(n)
for i in range(n):
self._pack(obj[i], nest_limit - 1)
return
if check(obj, dict):
return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1)
if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None:
obj = Timestamp.from_datetime(obj)
default_used = 1
continue
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1
continue
if self._datetime and check(obj, _DateTime):
raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None")
raise TypeError(f"Cannot serialize {obj!r}")
def pack(self, obj):
try:
self._pack(obj)
except:
self._buffer = BytesIO() # force reset
raise
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = BytesIO()
return ret
def pack_map_pairs(self, pairs):
self._pack_map_pairs(len(pairs), pairs)
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = BytesIO()
return ret
def pack_array_header(self, n):
if n >= 2**32:
raise ValueError
self._pack_array_header(n)
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = BytesIO()
return ret
def pack_map_header(self, n):
if n >= 2**32:
raise ValueError
self._pack_map_header(n)
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = BytesIO()
return ret
def pack_ext_type(self, typecode, data):
if not isinstance(typecode, int):
raise TypeError("typecode must have int type.")
if not 0 <= typecode <= 127:
raise ValueError("typecode should be 0-127")
if not isinstance(data, bytes):
raise TypeError("data must have bytes type")
L = len(data)
if L > 0xFFFFFFFF:
raise ValueError("Too large data")
if L == 1:
self._buffer.write(b"\xd4")
elif L == 2:
self._buffer.write(b"\xd5")
elif L == 4:
self._buffer.write(b"\xd6")
elif L == 8:
self._buffer.write(b"\xd7")
elif L == 16:
self._buffer.write(b"\xd8")
elif L <= 0xFF:
self._buffer.write(b"\xc7" + struct.pack("B", L))
elif L <= 0xFFFF:
self._buffer.write(b"\xc8" + struct.pack(">H", L))
else:
self._buffer.write(b"\xc9" + struct.pack(">I", L))
self._buffer.write(struct.pack("B", typecode))
self._buffer.write(data)
def _pack_array_header(self, n):
if n <= 0x0F:
return self._buffer.write(struct.pack("B", 0x90 + n))
if n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xDC, n))
if n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xDD, n))
raise ValueError("Array is too large")
def _pack_map_header(self, n):
if n <= 0x0F:
return self._buffer.write(struct.pack("B", 0x80 + n))
if n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xDE, n))
if n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xDF, n))
raise ValueError("Dict is too large")
def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
self._pack_map_header(n)
for k, v in pairs:
self._pack(k, nest_limit - 1)
self._pack(v, nest_limit - 1)
def _pack_raw_header(self, n):
if n <= 0x1F:
self._buffer.write(struct.pack("B", 0xA0 + n))
elif self._use_bin_type and n <= 0xFF:
self._buffer.write(struct.pack(">BB", 0xD9, n))
elif n <= 0xFFFF:
self._buffer.write(struct.pack(">BH", 0xDA, n))
elif n <= 0xFFFFFFFF:
self._buffer.write(struct.pack(">BI", 0xDB, n))
else:
raise ValueError("Raw is too large")
def _pack_bin_header(self, n):
if not self._use_bin_type:
return self._pack_raw_header(n)
elif n <= 0xFF:
return self._buffer.write(struct.pack(">BB", 0xC4, n))
elif n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xC5, n))
elif n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xC6, n))
else:
raise ValueError("Bin is too large")
def bytes(self):
"""Return internal buffer contents as bytes object"""
return self._buffer.getvalue()
def reset(self):
"""Reset internal buffer.
This method is useful only when autoreset=False.
"""
self._buffer = BytesIO()
def getbuffer(self):
"""Return view of internal buffer."""
if _USING_STRINGBUILDER:
return memoryview(self.bytes())
else:
return self._buffer.getbuffer()

View File

@ -0,0 +1,15 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
__title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "25.0"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
__license__ = "BSD-2-Clause or Apache-2.0"
__copyright__ = f"2014 {__author__}"

Some files were not shown because too many files have changed in this diff Show More