Update .gitignore and build.sh

This commit is contained in:
Wizzard 2024-01-23 13:52:41 -05:00
parent 955b5c1c4d
commit eea82d92da
3646 changed files with 356105 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
venv/

3
build.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
source venv/bin/activate
pyinstaller --onefile main.py

View File

@ -0,0 +1,64 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
__all__ = ('HOMEPATH', 'PLATFORM', '__version__', 'DEFAULT_DISTPATH', 'DEFAULT_SPECPATH', 'DEFAULT_WORKPATH')
import os
import sys
from PyInstaller import compat
from PyInstaller.utils.git import get_repo_revision
# Note: Keep this variable as plain string so it could be updated automatically when doing a release.
__version__ = '6.3.0'
# Absolute path of this package's directory. Save this early so all submodules can use the absolute path. This is
# required for example if the current directory changes prior to loading the hooks.
PACKAGEPATH = os.path.abspath(os.path.dirname(__file__))
HOMEPATH = os.path.dirname(PACKAGEPATH)
# Update __version__ as necessary.
if os.path.exists(os.path.join(HOMEPATH, 'setup.py')):
# PyInstaller is run directly from source without installation, or __version__ is called from 'setup.py'...
if compat.getenv('PYINSTALLER_DO_RELEASE') == '1':
# Suppress the git revision when doing a release.
pass
elif 'sdist' not in sys.argv:
# and 'setup.py' was not called with 'sdist' argument. For creating source tarball we do not want git revision
# in the filename.
try:
__version__ += get_repo_revision()
except Exception:
# Write to stderr because stdout is used for eval() statement in some subprocesses.
sys.stderr.write('WARN: failed to parse git revision')
else:
# PyInstaller was installed by `python setup.py install'.
from importlib.metadata import version
__version__ = version('PyInstaller')
# Default values of paths where to put files created by PyInstaller. If changing these, do not forget to update the
# help text for corresponding command-line options, defined in build_main.
# Where to put created .spec file.
DEFAULT_SPECPATH = os.getcwd()
# Where to put the final frozen application.
DEFAULT_DISTPATH = os.path.join(os.getcwd(), 'dist')
# Where to put all the temporary files; .log, .pyz, etc.
DEFAULT_WORKPATH = os.path.join(os.getcwd(), 'build')
PLATFORM = compat.system + '-' + compat.architecture
# Include machine name in path to bootloader for some machines (e.g., 'arm'). Explicitly avoid doing this on macOS,
# where we keep universal2 bootloaders in Darwin-64bit folder regardless of whether we are on x86_64 or arm64.
if compat.machine and not compat.is_darwin:
PLATFORM += '-' + compat.machine
# Similarly, disambiguate musl Linux from glibc Linux.
if compat.is_musl:
PLATFORM += '-musl'

View File

@ -0,0 +1,218 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Main command-line interface to PyInstaller.
"""
from __future__ import annotations
import argparse
import os
import platform
import sys
from collections import defaultdict
from PyInstaller import __version__
from PyInstaller import log as logging
# Note: do not import anything else until compat.check_requirements function is run!
from PyInstaller import compat
try:
from argcomplete import autocomplete
except ImportError:
def autocomplete(parser):
return None
logger = logging.getLogger(__name__)
# Taken from https://stackoverflow.com/a/22157136 to format args more flexibly: any help text which beings with ``R|``
# will have all newlines preserved; the help text will be line wrapped. See
# https://docs.python.org/3/library/argparse.html#formatter-class.
# This is used by the ``--debug`` option.
class _SmartFormatter(argparse.HelpFormatter):
def _split_lines(self, text, width):
if text.startswith('R|'):
# The underlying implementation of ``RawTextHelpFormatter._split_lines`` invokes this; mimic it.
return text[2:].splitlines()
else:
# Invoke the usual formatter.
return super()._split_lines(text, width)
def run_makespec(filenames, **opts):
# Split pathex by using the path separator
temppaths = opts['pathex'][:]
pathex = opts['pathex'] = []
for p in temppaths:
pathex.extend(p.split(os.pathsep))
import PyInstaller.building.makespec
spec_file = PyInstaller.building.makespec.main(filenames, **opts)
logger.info('wrote %s' % spec_file)
return spec_file
def run_build(pyi_config, spec_file, **kwargs):
import PyInstaller.building.build_main
PyInstaller.building.build_main.main(pyi_config, spec_file, **kwargs)
def __add_options(parser):
parser.add_argument(
'-v',
'--version',
action='version',
version=__version__,
help='Show program version info and exit.',
)
class _PyiArgumentParser(argparse.ArgumentParser):
def __init__(self, *args, **kwargs):
self._pyi_action_groups = defaultdict(list)
super().__init__(*args, **kwargs)
def _add_options(self, __add_options: callable, name: str = ""):
"""
Mutate self with the given callable, storing any new actions added in a named group
"""
n_actions_before = len(getattr(self, "_actions", []))
__add_options(self) # preserves old behavior
new_actions = getattr(self, "_actions", [])[n_actions_before:]
self._pyi_action_groups[name].extend(new_actions)
def _option_name(self, action):
"""
Get the option name(s) associated with an action
For options that define both short and long names, this function will
return the long names joined by "/"
"""
longnames = [name for name in action.option_strings if name.startswith("--")]
if longnames:
name = "/".join(longnames)
else:
name = action.option_strings[0]
return name
def _forbid_options(self, args: argparse.Namespace, group: str, errmsg: str = ""):
"""Forbid options from a named action group"""
options = defaultdict(str)
for action in self._pyi_action_groups[group]:
dest = action.dest
name = self._option_name(action)
if getattr(args, dest) is not self.get_default(dest):
if dest in options:
options[dest] += "/"
options[dest] += name
# if any options from the forbidden group are not the default values,
# the user must have passed them in, so issue an error report
if options:
sep = "\n "
bad = sep.join(options.values())
if errmsg:
errmsg = "\n" + errmsg
raise SystemExit(f"option(s) not allowed:{sep}{bad}{errmsg}")
def generate_parser() -> _PyiArgumentParser:
"""
Build an argparse parser for PyInstaller's main CLI.
"""
import PyInstaller.building.build_main
import PyInstaller.building.makespec
import PyInstaller.log
parser = _PyiArgumentParser(formatter_class=_SmartFormatter)
parser.prog = "pyinstaller"
parser._add_options(__add_options)
parser._add_options(PyInstaller.building.makespec.__add_options, name="makespec")
parser._add_options(PyInstaller.building.build_main.__add_options, name="build_main")
parser._add_options(PyInstaller.log.__add_options, name="log")
parser.add_argument(
'filenames',
metavar='scriptname',
nargs='+',
help="Name of scriptfiles to be processed or exactly one .spec file. If a .spec file is specified, most "
"options are unnecessary and are ignored.",
)
return parser
def run(pyi_args: list | None = None, pyi_config: dict | None = None):
"""
pyi_args allows running PyInstaller programmatically without a subprocess
pyi_config allows checking configuration once when running multiple tests
"""
compat.check_requirements()
import PyInstaller.log
old_sys_argv = sys.argv
try:
parser = generate_parser()
autocomplete(parser)
if pyi_args is None:
pyi_args = sys.argv[1:]
try:
index = pyi_args.index("--")
except ValueError:
index = len(pyi_args)
args = parser.parse_args(pyi_args[:index])
spec_args = pyi_args[index + 1:]
PyInstaller.log.__process_options(parser, args)
# Print PyInstaller version, Python version, and platform as the first line to stdout. This helps us identify
# PyInstaller, Python, and platform version when users report issues.
logger.info('PyInstaller: %s' % __version__)
logger.info('Python: %s%s', platform.python_version(), " (conda)" if compat.is_conda else "")
logger.info('Platform: %s' % platform.platform())
# Skip creating .spec when .spec file is supplied.
if args.filenames[0].endswith('.spec'):
parser._forbid_options(
args, group="makespec", errmsg="makespec options not valid when a .spec file is given"
)
spec_file = args.filenames[0]
else:
spec_file = run_makespec(**vars(args))
sys.argv = [spec_file, *spec_args]
run_build(pyi_config, spec_file, **vars(args))
except KeyboardInterrupt:
raise SystemExit("Aborted by user request.")
except RecursionError:
from PyInstaller import _recursion_too_deep_message
_recursion_too_deep_message.raise_with_msg()
finally:
sys.argv = old_sys_argv
def _console_script_run():
# Python prepends the main script's parent directory to sys.path. When PyInstaller is ran via the usual
# `pyinstaller` CLI entry point, this directory is $pythonprefix/bin which should not be in sys.path.
if os.path.basename(sys.path[0]) in ("bin", "Scripts"):
sys.path.pop(0)
run()
if __name__ == '__main__':
run()

View File

@ -0,0 +1,45 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
msg = """
=============================================================
A RecursionError (maximum recursion depth exceeded) occurred.
For working around please follow these instructions
=============================================================
1. In your program's .spec file add this line near the top::
import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
2. Build your program by running PyInstaller with the .spec file as
argument::
pyinstaller myprog.spec
3. If this fails, you most probably hit an endless recursion in
PyInstaller. Please try to track this down has far as possible,
create a minimal example so we can reproduce and open an issue at
https://github.com/pyinstaller/pyinstaller/issues following the
instructions in the issue template. Many thanks.
Explanation: Python's stack-limit is a safety-belt against endless recursion,
eating up memory. PyInstaller imports modules recursively. If the structure
how modules are imported within your program is awkward, this leads to the
nesting being too deep and hitting Python's stack-limit.
With the default recursion limit (1000), the recursion error occurs at about
115 nested imported, with limit 2000 at about 240, with limit 5000 at about
660.
"""
def raise_with_msg():
raise SystemExit(msg)

View File

@ -0,0 +1,86 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Code to be shared by PyInstaller and the bootloader/wscript file.
This code must not assume that either PyInstaller or any of its dependencies installed. I.e., the only imports allowed
in here are standard library ones. Within reason, it is preferable that this file should still run under Python 2.7 as
many compiler docker images still have only Python 2 installed.
"""
import platform
import re
def _pyi_machine(machine, system):
# type: (str, str) -> str
"""
Choose an intentionally simplified architecture identifier to be used in the bootloader's directory name.
Args:
machine:
The output of ``platform.machine()`` or any known architecture alias or shorthand that may be used by a
C compiler.
system:
The output of ``platform.system()`` on the target machine.
Returns:
Either a string tag or, on platforms that don't need an architecture tag, ``None``.
Ideally, we would just use ``platform.machine()`` directly, but that makes cross-compiling the bootloader almost
impossible, because you need to know at compile time exactly what ``platform.machine()`` will be at run time, based
only on the machine name alias or shorthand reported by the C compiler at the build time. Rather, use a loose
differentiation, and trust that anyone mixing armv6l with armv6h knows what they are doing.
"""
# See the corresponding tests in tests/unit/test_compat.py for examples.
if platform.machine() == "sw_64" or platform.machine() == "loongarch64":
# This explicitly inhibits cross compiling the bootloader for or on SunWay and LoongArch machine.
return platform.machine()
if system == "Windows":
if machine.lower().startswith("arm"):
return "arm"
else:
return "intel"
if system != "Linux":
# No architecture specifier for anything par Linux.
# - macOS is on two 64 bit architectures, but they are merged into one "universal2" bootloader.
# - BSD supports a wide range of architectures, but according to PyPI's download statistics, every one of our
# BSD users are on x86_64. This may change in the distant future.
return
if machine.startswith(("arm", "aarch")):
# ARM has a huge number of similar and aliased sub-versions, such as armv5, armv6l armv8h, aarch64.
return "arm"
if machine in ("thumb"):
# Reported by waf/gcc when Thumb instruction set is enabled on 32-bit ARM. The platform.machine() returns "arm"
# regardless of the instruction set.
return "arm"
if machine in ("x86_64", "x64", "x86"):
return "intel"
if re.fullmatch("i[1-6]86", machine):
return "intel"
if machine.startswith(("ppc", "powerpc")):
# PowerPC comes in 64 vs 32 bit and little vs big endian variants.
return "ppc"
if machine in ("mips64", "mips"):
return "mips"
if machine.startswith("riscv"):
return "riscv"
# Machines with no known aliases :)
if machine in ("s390x",):
return machine
# Unknown architectures are allowed by default, but will all be placed under one directory. In theory, trying to
# have multiple unknown architectures in one copy of PyInstaller will not work, but that should be sufficiently
# unlikely to ever happen.
return "unknown"

View File

@ -0,0 +1 @@
__author__ = 'martin'

View File

@ -0,0 +1,16 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
class PyiBlockCipher:
def __init__(self, key=None):
from PyInstaller.exceptions import RemovedCipherFeatureError
raise RemovedCipherFeatureError("Please remove cipher and block_cipher parameters from your spec file.")

View File

@ -0,0 +1,227 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Python-based CArchive (PKG) reader implementation. Used only in the archive_viewer utility.
"""
import os
import struct
from PyInstaller.loader.pyimod01_archive import ZlibArchiveReader, ArchiveReadError
class NotAnArchiveError(TypeError):
pass
# Type codes for CArchive TOC entries
PKG_ITEM_BINARY = 'b' # binary
PKG_ITEM_DEPENDENCY = 'd' # runtime option
PKG_ITEM_PYZ = 'z' # zlib (pyz) - frozen Python code
PKG_ITEM_ZIPFILE = 'Z' # zlib (pyz) - frozen Python code
PKG_ITEM_PYPACKAGE = 'M' # Python package (__init__.py)
PKG_ITEM_PYMODULE = 'm' # Python module
PKG_ITEM_PYSOURCE = 's' # Python script (v3)
PKG_ITEM_DATA = 'x' # data
PKG_ITEM_RUNTIME_OPTION = 'o' # runtime option
PKG_ITEM_SPLASH = 'l' # splash resources
class CArchiveReader:
"""
Reader for PyInstaller's CArchive (PKG) archive.
"""
# Cookie - holds some information for the bootloader. C struct format definition. '!' at the beginning means network
# byte order. C struct looks like:
#
# typedef struct _cookie {
# char magic[8]; /* 'MEI\014\013\012\013\016' */
# uint32_t len; /* len of entire package */
# uint32_t TOC; /* pos (rel to start) of TableOfContents */
# int TOClen; /* length of TableOfContents */
# int pyvers; /* new in v4 */
# char pylibname[64]; /* Filename of Python dynamic library. */
# } COOKIE;
#
_COOKIE_MAGIC_PATTERN = b'MEI\014\013\012\013\016'
_COOKIE_FORMAT = '!8sIIii64s'
_COOKIE_LENGTH = struct.calcsize(_COOKIE_FORMAT)
# TOC entry:
#
# typedef struct _toc {
# int structlen; /* len of this one - including full len of name */
# uint32_t pos; /* pos rel to start of concatenation */
# uint32_t len; /* len of the data (compressed) */
# uint32_t ulen; /* len of data (uncompressed) */
# char cflag; /* is it compressed (really a byte) */
# char typcd; /* type code -'b' binary, 'z' zlib, 'm' module,
# * 's' script (v3),'x' data, 'o' runtime option */
# char name[1]; /* the name to save it as */
# /* starting in v5, we stretch this out to a mult of 16 */
# } TOC;
#
_TOC_ENTRY_FORMAT = '!iIIIBB'
_TOC_ENTRY_LENGTH = struct.calcsize(_TOC_ENTRY_FORMAT)
def __init__(self, filename):
self._filename = filename
self._start_offset = 0
self._toc_offset = 0
self._toc_length = 0
self.toc = {}
self.options = []
# Load TOC
with open(self._filename, "rb") as fp:
# Find cookie MAGIC pattern
cookie_start_offset = self._find_magic_pattern(fp, self._COOKIE_MAGIC_PATTERN)
if cookie_start_offset == -1:
raise ArchiveReadError("Could not find COOKIE magic pattern!")
# Read the whole cookie
fp.seek(cookie_start_offset, os.SEEK_SET)
cookie_data = fp.read(self._COOKIE_LENGTH)
magic, archive_length, toc_offset, toc_length, pyvers, pylib_name = \
struct.unpack(self._COOKIE_FORMAT, cookie_data)
# Compute start of the the archive
self._start_offset = (cookie_start_offset + self._COOKIE_LENGTH) - archive_length
# Verify that Python shared library name is set
if not pylib_name:
raise ArchiveReadError("Python shared library name not set in the archive!")
# Read whole toc
fp.seek(self._start_offset + toc_offset)
toc_data = fp.read(toc_length)
self.toc, self.options = self._parse_toc(toc_data)
@staticmethod
def _find_magic_pattern(fp, magic_pattern):
# Start at the end of file, and scan back-to-start
fp.seek(0, os.SEEK_END)
end_pos = fp.tell()
# Scan from back
SEARCH_CHUNK_SIZE = 8192
magic_offset = -1
while end_pos >= len(magic_pattern):
start_pos = max(end_pos - SEARCH_CHUNK_SIZE, 0)
chunk_size = end_pos - start_pos
# Is the remaining chunk large enough to hold the pattern?
if chunk_size < len(magic_pattern):
break
# Read and scan the chunk
fp.seek(start_pos, os.SEEK_SET)
buf = fp.read(chunk_size)
pos = buf.rfind(magic_pattern)
if pos != -1:
magic_offset = start_pos + pos
break
# Adjust search location for next chunk; ensure proper overlap
end_pos = start_pos + len(magic_pattern) - 1
return magic_offset
@classmethod
def _parse_toc(cls, data):
options = []
toc = {}
cur_pos = 0
while cur_pos < len(data):
# Read and parse the fixed-size TOC entry header
entry_length, entry_offset, data_length, uncompressed_length, compression_flag, typecode = \
struct.unpack(cls._TOC_ENTRY_FORMAT, data[cur_pos:(cur_pos + cls._TOC_ENTRY_LENGTH)])
cur_pos += cls._TOC_ENTRY_LENGTH
# Read variable-length name
name_length = entry_length - cls._TOC_ENTRY_LENGTH
name, *_ = struct.unpack(f'{name_length}s', data[cur_pos:(cur_pos + name_length)])
cur_pos += name_length
# Name string may contain up to 15 bytes of padding
name = name.rstrip(b'\0').decode('utf-8')
typecode = chr(typecode)
# The TOC should not contain duplicates, except for OPTION entries. Therefore, keep those
# in a separate list. With options, the rest of the entries do not make sense, anyway.
if typecode == 'o':
options.append(name)
else:
toc[name] = (entry_offset, data_length, uncompressed_length, compression_flag, typecode)
return toc, options
def extract(self, name):
"""
Extract data for the given entry name.
"""
entry = self.toc.get(name)
if entry is None:
raise KeyError(f"No entry named {name} found in the archive!")
entry_offset, data_length, uncompressed_length, compression_flag, typecode = entry
with open(self._filename, "rb") as fp:
fp.seek(self._start_offset + entry_offset, os.SEEK_SET)
data = fp.read(data_length)
if compression_flag:
import zlib
data = zlib.decompress(data)
return data
def open_embedded_archive(self, name):
"""
Open new archive reader for the embedded archive.
"""
entry = self.toc.get(name)
if entry is None:
raise KeyError(f"No entry named {name} found in the archive!")
entry_offset, data_length, uncompressed_length, compression_flag, typecode = entry
if typecode == PKG_ITEM_PYZ:
# Open as embedded archive, without extraction.
return ZlibArchiveReader(self._filename, self._start_offset + entry_offset)
elif typecode == PKG_ITEM_ZIPFILE:
raise NotAnArchiveError("Zipfile archives not supported yet!")
else:
raise NotAnArchiveError(f"Entry {name} is not a supported embedded archive!")
def pkg_archive_contents(filename, recursive=True):
"""
List the contents of the PKG / CArchive. If `recursive` flag is set (the default), the contents of the embedded PYZ
archive is included as well.
Used by the tests.
"""
contents = []
pkg_archive = CArchiveReader(filename)
for name, toc_entry in pkg_archive.toc.items():
*_, typecode = toc_entry
contents.append(name)
if typecode == PKG_ITEM_PYZ and recursive:
pyz_archive = pkg_archive.open_embedded_archive(name)
for name in pyz_archive.toc.keys():
contents.append(name)
return contents

View File

@ -0,0 +1,407 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Utilities to create data structures for embedding Python modules and additional files into the executable.
"""
import marshal
import os
import shutil
import struct
import sys
import zlib
from PyInstaller.building.utils import get_code_object, strip_paths_in_code
from PyInstaller.compat import BYTECODE_MAGIC, is_win, strict_collect_mode
from PyInstaller.loader.pyimod01_archive import PYZ_ITEM_MODULE, PYZ_ITEM_NSPKG, PYZ_ITEM_PKG
class ZlibArchiveWriter:
"""
Writer for PyInstaller's PYZ (ZlibArchive) archive. The archive is used to store collected byte-compiled Python
modules, as individually-compressed entries.
"""
_PYZ_MAGIC_PATTERN = b'PYZ\0'
_HEADER_LENGTH = 12 + 5
_COMPRESSION_LEVEL = 6 # zlib compression level
def __init__(self, filename, entries, code_dict=None):
"""
filename
Target filename of the archive.
entries
An iterable containing entries in the form of tuples: (name, src_path, typecode), where `name` is the name
under which the resource is stored (e.g., python module name, without suffix), `src_path` is name of the
file from which the resource is read, and `typecode` is the Analysis-level TOC typecode (`PYMODULE`).
code_dict
Optional code dictionary containing code objects for analyzed/collected python modules.
"""
code_dict = code_dict or {}
with open(filename, "wb") as fp:
# Reserve space for the header.
fp.write(b'\0' * self._HEADER_LENGTH)
# Write entries' data and collect TOC entries
toc = []
for entry in entries:
toc_entry = self._write_entry(fp, entry, code_dict)
toc.append(toc_entry)
# Write TOC
toc_offset = fp.tell()
toc_data = marshal.dumps(toc)
fp.write(toc_data)
# Write header:
# - PYZ magic pattern (4 bytes)
# - python bytecode magic pattern (4 bytes)
# - TOC offset (32-bit int, 4 bytes)
# - 4 unused bytes
fp.seek(0, os.SEEK_SET)
fp.write(self._PYZ_MAGIC_PATTERN)
fp.write(BYTECODE_MAGIC)
fp.write(struct.pack('!i', toc_offset))
@classmethod
def _write_entry(cls, fp, entry, code_dict):
name, src_path, typecode = entry
assert typecode == 'PYMODULE'
typecode = PYZ_ITEM_MODULE
if src_path in ('-', None):
# This is a NamespacePackage, modulegraph marks them by using the filename '-'. (But wants to use None,
# so check for None, too, to be forward-compatible.)
typecode = PYZ_ITEM_NSPKG
else:
src_basename, _ = os.path.splitext(os.path.basename(src_path))
if src_basename == '__init__':
typecode = PYZ_ITEM_PKG
data = marshal.dumps(code_dict[name])
# First compress, then encrypt.
obj = zlib.compress(data, cls._COMPRESSION_LEVEL)
# Create TOC entry
toc_entry = (name, (typecode, fp.tell(), len(obj)))
# Write data blob
fp.write(obj)
return toc_entry
class CArchiveWriter:
"""
Writer for PyInstaller's CArchive (PKG) archive.
This archive contains all files that are bundled within an executable; a PYZ (ZlibArchive), DLLs, Python C
extensions, and other data files that are bundled in onefile mode.
The archive can be read from either C (bootloader code at application's run-time) or Python (for debug purposes).
"""
_COOKIE_MAGIC_PATTERN = b'MEI\014\013\012\013\016'
# For cookie and TOC entry structure, see `PyInstaller.archive.readers.CArchiveReader`.
_COOKIE_FORMAT = '!8sIIii64s'
_COOKIE_LENGTH = struct.calcsize(_COOKIE_FORMAT)
_TOC_ENTRY_FORMAT = '!iIIIBB'
_TOC_ENTRY_LENGTH = struct.calcsize(_TOC_ENTRY_FORMAT)
_COMPRESSION_LEVEL = 9 # zlib compression level
def __init__(self, filename, entries, pylib_name):
"""
filename
Target filename of the archive.
entries
An iterable containing entries in the form of tuples: (dest_name, src_name, compress, typecode), where
`dest_name` is the name under which the resource is stored in the archive (and name under which it is
extracted at runtime), `src_name` is name of the file from which the resouce is read, `compress` is a
boolean compression flag, and `typecode` is the Analysis-level TOC typecode.
pylib_name
Name of the python shared library.
"""
self._collected_names = set() # Track collected names for strict package mode.
with open(filename, "wb") as fp:
# Write entries' data and collect TOC entries
toc = []
for entry in entries:
toc_entry = self._write_entry(fp, entry)
toc.append(toc_entry)
# Write TOC
toc_offset = fp.tell()
toc_data = self._serialize_toc(toc)
toc_length = len(toc_data)
fp.write(toc_data)
# Write cookie
archive_length = toc_offset + toc_length + self._COOKIE_LENGTH
pyvers = sys.version_info[0] * 100 + sys.version_info[1]
cookie_data = struct.pack(
self._COOKIE_FORMAT,
self._COOKIE_MAGIC_PATTERN,
archive_length,
toc_offset,
toc_length,
pyvers,
pylib_name.encode('ascii'),
)
fp.write(cookie_data)
def _write_entry(self, fp, entry):
dest_name, src_name, compress, typecode = entry
# Write OPTION entries as-is, without normalizing them. This also exempts them from duplication check,
# allowing them to be specified multiple times.
if typecode == 'o':
return self._write_blob(fp, b"", dest_name, typecode)
# Ensure forward slashes in paths are on Windows converted to back slashes '\\', as on Windows the bootloader
# works only with back slashes.
dest_name = os.path.normpath(dest_name)
if is_win and os.path.sep == '/':
# When building under MSYS, the above path normalization uses Unix-style separators, so replace them
# manually.
dest_name = dest_name.replace(os.path.sep, '\\')
# Strict pack/collect mode: keep track of the destination names, and raise an error if we try to add a duplicate
# (a file with same destination name, subject to OS case normalization rules).
if strict_collect_mode:
normalized_dest = None
if typecode in ('s', 'm', 'M'):
# Exempt python source scripts and modules from the check.
pass
else:
# Everything else; normalize the case
normalized_dest = os.path.normcase(dest_name)
# Check for existing entry, if applicable
if normalized_dest:
if normalized_dest in self._collected_names:
raise ValueError(
f"Attempting to collect a duplicated file into CArchive: {normalized_dest} (type: {typecode})"
)
self._collected_names.add(normalized_dest)
if typecode == 'd':
# Dependency; merge src_name (= reference path prefix) and dest_name (= name) into single-string format that
# is parsed by bootloader.
return self._write_blob(fp, b"", f"{src_name}:{dest_name}", typecode)
elif typecode == 's':
# If it is a source code file, compile it to a code object and marshal the object, so it can be unmarshalled
# by the bootloader.
code = get_code_object(dest_name, src_name)
code = strip_paths_in_code(code)
return self._write_blob(fp, marshal.dumps(code), dest_name, typecode, compress=compress)
elif typecode in ('m', 'M'):
# Read the PYC file
with open(src_name, "rb") as in_fp:
data = in_fp.read()
assert data[:4] == BYTECODE_MAGIC
# Skip the PYC header, load the code object.
code = marshal.loads(data[16:])
code = strip_paths_in_code(code)
# These module entries are loaded and executed within the bootloader, which requires only the code
# object, without the PYC header.
return self._write_blob(fp, marshal.dumps(code), dest_name, typecode, compress=compress)
elif typecode == 'n':
# Symbolic link; store target name (as NULL-terminated string)
data = src_name.encode('utf-8') + b'\x00'
return self._write_blob(fp, data, dest_name, typecode, compress=compress)
else:
return self._write_file(fp, src_name, dest_name, typecode, compress=compress)
def _write_blob(self, out_fp, blob: bytes, dest_name, typecode, compress=False):
"""
Write the binary contents (**blob**) of a small file to the archive and return the corresponding CArchive TOC
entry.
"""
data_offset = out_fp.tell()
data_length = len(blob)
if compress:
blob = zlib.compress(blob, level=self._COMPRESSION_LEVEL)
out_fp.write(blob)
return (data_offset, len(blob), data_length, int(compress), typecode, dest_name)
def _write_file(self, out_fp, src_name, dest_name, typecode, compress=False):
"""
Stream copy a large file into the archive and return the corresponding CArchive TOC entry.
"""
data_offset = out_fp.tell()
data_length = os.stat(src_name).st_size
with open(src_name, 'rb') as in_fp:
if compress:
tmp_buffer = bytearray(16 * 1024)
compressor = zlib.compressobj(self._COMPRESSION_LEVEL)
while True:
num_read = in_fp.readinto(tmp_buffer)
if not num_read:
break
out_fp.write(compressor.compress(tmp_buffer[:num_read]))
out_fp.write(compressor.flush())
else:
shutil.copyfileobj(in_fp, out_fp)
return (data_offset, out_fp.tell() - data_offset, data_length, int(compress), typecode, dest_name)
@classmethod
def _serialize_toc(cls, toc):
serialized_toc = []
for toc_entry in toc:
data_offset, compressed_length, data_length, compress, typecode, name = toc_entry
# Encode names as UTF-8. This should be safe as standard python modules only contain ASCII-characters (and
# standard shared libraries should have the same), and thus the C-code still can handle this correctly.
name = name.encode('utf-8')
name_length = len(name) + 1 # Add 1 for string-terminating zero byte.
# Ensure TOC entries are aligned on 16-byte boundary, so they can be read by bootloader (C code) on
# platforms with strict data alignment requirements (for example linux on `armhf`/`armv7`, such as 32-bit
# Debian Buster on Raspberry Pi).
entry_length = cls._TOC_ENTRY_LENGTH + name_length
if entry_length % 16 != 0:
padding_length = 16 - (entry_length % 16)
name_length += padding_length
# Serialize
serialized_entry = struct.pack(
cls._TOC_ENTRY_FORMAT + f"{name_length}s", # "Ns" format automatically pads the string with zero bytes.
cls._TOC_ENTRY_LENGTH + name_length,
data_offset,
compressed_length,
data_length,
compress,
ord(typecode),
name,
)
serialized_toc.append(serialized_entry)
return b''.join(serialized_toc)
class SplashWriter:
"""
Writer for the splash screen resources archive.
The resulting archive is added as an entry into the CArchive with the typecode PKG_ITEM_SPLASH.
"""
# This struct describes the splash resources as it will be in an buffer inside the bootloader. All necessary parts
# are bundled, the *_len and *_offset fields describe the data beyond this header definition.
# Whereas script and image fields are binary data, the requirements fields describe an array of strings. Each string
# is null-terminated in order to easily iterate over this list from within C.
#
# typedef struct _splash_data_header {
# char tcl_libname[16]; /* Name of tcl library, e.g. tcl86t.dll */
# char tk_libname[16]; /* Name of tk library, e.g. tk86t.dll */
# char tk_lib[16]; /* Tk Library generic, e.g. "tk/" */
# char rundir[16]; /* temp folder inside extraction path in
# * which the dependencies are extracted */
#
# int script_len; /* Length of the script */
# int script_offset; /* Offset (rel to start) of the script */
#
# int image_len; /* Length of the image data */
# int image_offset; /* Offset (rel to start) of the image */
#
# int requirements_len;
# int requirements_offset;
#
# } SPLASH_DATA_HEADER;
#
_HEADER_FORMAT = '!16s 16s 16s 16s ii ii ii'
_HEADER_LENGTH = struct.calcsize(_HEADER_FORMAT)
# The created archive is compressed by the CArchive, so no need to compress the data here.
def __init__(self, filename, name_list, tcl_libname, tk_libname, tklib, rundir, image, script):
"""
Writer for splash screen resources that are bundled into the CArchive as a single archive/entry.
:param filename: The filename of the archive to create
:param name_list: List of filenames for the requirements array
:param str tcl_libname: Name of the tcl shared library file
:param str tk_libname: Name of the tk shared library file
:param str tklib: Root of tk library (e.g. tk/)
:param str rundir: Unique path to extract requirements to
:param Union[str, bytes] image: Image like object
:param str script: The tcl/tk script to execute to create the screen.
"""
# Ensure forward slashes in dependency names are on Windows converted to back slashes '\\', as on Windows the
# bootloader works only with back slashes.
def _normalize_filename(filename):
filename = os.path.normpath(filename)
if is_win and os.path.sep == '/':
# When building under MSYS, the above path normalization uses Unix-style separators, so replace them
# manually.
filename = filename.replace(os.path.sep, '\\')
return filename
name_list = [_normalize_filename(name) for name in name_list]
with open(filename, "wb") as fp:
# Reserve space for the header.
fp.write(b'\0' * self._HEADER_LENGTH)
# Serialize the requirements list. This list (more an array) contains the names of all files the bootloader
# needs to extract before the splash screen can be started. The implementation terminates every name with a
# null-byte, that keeps the list short memory wise and makes it iterable from C.
requirements_len = 0
requirements_offset = fp.tell()
for name in name_list:
name = name.encode('utf-8') + b'\0'
fp.write(name)
requirements_len += len(name)
# Write splash script
script_offset = fp.tell()
script_len = len(script)
fp.write(script.encode("utf-8"))
# Write splash image. If image is a bytes buffer, it is written directly into the archive. Otherwise, it
# is assumed to be a path and the file is copied into the archive.
image_offset = fp.tell()
if isinstance(image, bytes):
# Image was converted by PIL/Pillow and is already in buffer
image_len = len(image)
fp.write(image)
else:
# Read image into buffer
with open(image, 'rb') as image_fp:
image_data = image_fp.read()
image_len = len(image_data)
fp.write(image_data)
del image_data
# Write header
header_data = struct.pack(
self._HEADER_FORMAT,
tcl_libname.encode("utf-8"),
tk_libname.encode("utf-8"),
tklib.encode("utf-8"),
rundir.encode("utf-8"),
script_len,
script_offset,
image_len,
image_offset,
requirements_len,
requirements_offset,
)
fp.seek(0, os.SEEK_SET)
fp.write(header_data)

View File

@ -0,0 +1 @@
#

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,457 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
import os
import pathlib
import warnings
from PyInstaller import log as logging
from PyInstaller.building.utils import _check_guts_eq
from PyInstaller.utils import misc
logger = logging.getLogger(__name__)
def unique_name(entry):
"""
Return the filename used to enforce uniqueness for the given TOC entry.
Parameters
----------
entry : tuple
Returns
-------
unique_name: str
"""
name, path, typecode = entry
if typecode in ('BINARY', 'DATA', 'EXTENSION', 'DEPENDENCY'):
name = os.path.normcase(name)
return name
# This class is deprecated and has been replaced by plain lists with explicit normalization (de-duplication) via
# `normalize_toc` and `normalize_pyz_toc` helper functions.
class TOC(list):
"""
TOC (Table of Contents) class is a list of tuples of the form (name, path, typecode).
typecode name path description
--------------------------------------------------------------------------------------
EXTENSION Python internal name. Full path name in build. Extension module.
PYSOURCE Python internal name. Full path name in build. Script.
PYMODULE Python internal name. Full path name in build. Pure Python module (including __init__ modules).
PYZ Runtime name. Full path name in build. A .pyz archive (ZlibArchive data structure).
PKG Runtime name. Full path name in build. A .pkg archive (Carchive data structure).
BINARY Runtime name. Full path name in build. Shared library.
DATA Runtime name. Full path name in build. Arbitrary files.
OPTION The option. Unused. Python runtime option (frozen into executable).
A TOC contains various types of files. A TOC contains no duplicates and preserves order.
PyInstaller uses TOC data type to collect necessary files bundle them into an executable.
"""
def __init__(self, initlist=None):
super().__init__()
# Deprecation warning
warnings.warn(
"TOC class is deprecated. Use a plain list of 3-element tuples instead.",
DeprecationWarning,
stacklevel=2,
)
self.filenames = set()
if initlist:
for entry in initlist:
self.append(entry)
def append(self, entry):
if not isinstance(entry, tuple):
logger.info("TOC found a %s, not a tuple", entry)
raise TypeError("Expected tuple, not %s." % type(entry).__name__)
unique = unique_name(entry)
if unique not in self.filenames:
self.filenames.add(unique)
super().append(entry)
def insert(self, pos, entry):
if not isinstance(entry, tuple):
logger.info("TOC found a %s, not a tuple", entry)
raise TypeError("Expected tuple, not %s." % type(entry).__name__)
unique = unique_name(entry)
if unique not in self.filenames:
self.filenames.add(unique)
super().insert(pos, entry)
def __add__(self, other):
result = TOC(self)
result.extend(other)
return result
def __radd__(self, other):
result = TOC(other)
result.extend(self)
return result
def __iadd__(self, other):
for entry in other:
self.append(entry)
return self
def extend(self, other):
# TODO: look if this can be done more efficient with out the loop, e.g. by not using a list as base at all.
for entry in other:
self.append(entry)
def __sub__(self, other):
# Construct new TOC with entries not contained in the other TOC
other = TOC(other)
return TOC([entry for entry in self if unique_name(entry) not in other.filenames])
def __rsub__(self, other):
result = TOC(other)
return result.__sub__(self)
def __setitem__(self, key, value):
if isinstance(key, slice):
if key == slice(None, None, None):
# special case: set the entire list
self.filenames = set()
self.clear()
self.extend(value)
return
else:
raise KeyError("TOC.__setitem__ doesn't handle slices")
else:
old_value = self[key]
old_name = unique_name(old_value)
self.filenames.remove(old_name)
new_name = unique_name(value)
if new_name not in self.filenames:
self.filenames.add(new_name)
super(TOC, self).__setitem__(key, value)
class Target:
invcnum = 0
def __init__(self):
from PyInstaller.config import CONF
# Get a (per class) unique number to avoid conflicts between toc objects
self.invcnum = self.__class__.invcnum
self.__class__.invcnum += 1
self.tocfilename = os.path.join(CONF['workpath'], '%s-%02d.toc' % (self.__class__.__name__, self.invcnum))
self.tocbasename = os.path.basename(self.tocfilename)
self.dependencies = []
def __postinit__(self):
"""
Check if the target need to be rebuild and if so, re-assemble.
`__postinit__` is to be called at the end of `__init__` of every subclass of Target. `__init__` is meant to
setup the parameters and `__postinit__` is checking if rebuild is required and in case calls `assemble()`
"""
logger.info("checking %s", self.__class__.__name__)
data = None
last_build = misc.mtime(self.tocfilename)
if last_build == 0:
logger.info("Building %s because %s is non existent", self.__class__.__name__, self.tocbasename)
else:
try:
data = misc.load_py_data_struct(self.tocfilename)
except Exception:
logger.info("Building because %s is bad", self.tocbasename)
else:
# create a dict for easier access
data = dict(zip((g[0] for g in self._GUTS), data))
# assemble if previous data was not found or is outdated
if not data or self._check_guts(data, last_build):
self.assemble()
self._save_guts()
_GUTS = []
def _check_guts(self, data, last_build):
"""
Returns True if rebuild/assemble is required.
"""
if len(data) != len(self._GUTS):
logger.info("Building because %s is bad", self.tocbasename)
return True
for attr, func in self._GUTS:
if func is None:
# no check for this value
continue
if func(attr, data[attr], getattr(self, attr), last_build):
return True
return False
def _save_guts(self):
"""
Save the input parameters and the work-product of this run to maybe avoid regenerating it later.
"""
data = tuple(getattr(self, g[0]) for g in self._GUTS)
misc.save_py_data_struct(self.tocfilename, data)
class Tree(Target, list):
"""
This class is a way of creating a TOC (Table of Contents) list that describes some or all of the files within a
directory.
"""
def __init__(self, root=None, prefix=None, excludes=None, typecode='DATA'):
"""
root
The root of the tree (on the build system).
prefix
Optional prefix to the names of the target system.
excludes
A list of names to exclude. Two forms are allowed:
name
Files with this basename will be excluded (do not include the path).
*.ext
Any file with the given extension will be excluded.
typecode
The typecode to be used for all files found in this tree. See the TOC class for for information about
the typcodes.
"""
Target.__init__(self)
list.__init__(self)
self.root = root
self.prefix = prefix
self.excludes = excludes
self.typecode = typecode
if excludes is None:
self.excludes = []
self.__postinit__()
_GUTS = ( # input parameters
('root', _check_guts_eq),
('prefix', _check_guts_eq),
('excludes', _check_guts_eq),
('typecode', _check_guts_eq),
('data', None), # tested below
# no calculated/analysed values
)
def _check_guts(self, data, last_build):
if Target._check_guts(self, data, last_build):
return True
# Walk the collected directories as check if they have been changed - which means files have been added or
# removed. There is no need to check for the files, since `Tree` is only about the directory contents (which is
# the list of files).
stack = [data['root']]
while stack:
d = stack.pop()
if misc.mtime(d) > last_build:
logger.info("Building %s because directory %s changed", self.tocbasename, d)
return True
for nm in os.listdir(d):
path = os.path.join(d, nm)
if os.path.isdir(path):
stack.append(path)
self[:] = data['data'] # collected files
return False
def _save_guts(self):
# Use the attribute `data` to save the list
self.data = self
super()._save_guts()
del self.data
def assemble(self):
logger.info("Building Tree %s", self.tocbasename)
stack = [(self.root, self.prefix)]
excludes = set()
xexcludes = set()
for name in self.excludes:
if name.startswith('*'):
xexcludes.add(name[1:])
else:
excludes.add(name)
result = []
while stack:
dir, prefix = stack.pop()
for filename in os.listdir(dir):
if filename in excludes:
continue
ext = os.path.splitext(filename)[1]
if ext in xexcludes:
continue
fullfilename = os.path.join(dir, filename)
if prefix:
resfilename = os.path.join(prefix, filename)
else:
resfilename = filename
if os.path.isdir(fullfilename):
stack.append((fullfilename, resfilename))
else:
result.append((resfilename, fullfilename, self.typecode))
self[:] = result
def normalize_toc(toc):
# Default priority: 0
_TOC_TYPE_PRIORITIES = {
# DEPENDENCY entries need to replace original entries, so they need the highest priority.
'DEPENDENCY': 3,
# SYMLINK entries have higher priority than other regular entries
'SYMLINK': 2,
# BINARY/EXTENSION entries undergo additional processing, so give them precedence over DATA and other entries.
'BINARY': 1,
'EXTENSION': 1,
}
def _type_case_normalization_fcn(typecode):
# Case-normalize all entries except OPTION.
return typecode not in {
"OPTION",
}
return _normalize_toc(toc, _TOC_TYPE_PRIORITIES, _type_case_normalization_fcn)
def normalize_pyz_toc(toc):
# Default priority: 0
_TOC_TYPE_PRIORITIES = {
# Ensure that modules are never shadowed by PYZ-embedded data files.
'PYMODULE': 1,
}
return _normalize_toc(toc, _TOC_TYPE_PRIORITIES)
def _normalize_toc(toc, toc_type_priorities, type_case_normalization_fcn=lambda typecode: False):
options_toc = []
tmp_toc = dict()
for dest_name, src_name, typecode in toc:
# Exempt OPTION entries from de-duplication processing. Some options might allow being specified multiple times.
if typecode == 'OPTION':
options_toc.append(((dest_name, src_name, typecode)))
continue
# Always sanitize the dest_name with `os.path.normpath` to remove any local loops with parent directory path
# components. `pathlib` does not seem to offer equivalent functionality.
dest_name = os.path.normpath(dest_name)
# Normalize the destination name for uniqueness. Use `pathlib.PurePath` to ensure that keys are both
# case-normalized (on OSes where applicable) and directory-separator normalized (just in case).
if type_case_normalization_fcn(typecode):
entry_key = pathlib.PurePath(dest_name)
else:
entry_key = dest_name
existing_entry = tmp_toc.get(entry_key)
if existing_entry is None:
# Entry does not exist - insert
tmp_toc[entry_key] = (dest_name, src_name, typecode)
else:
# Entry already exists - replace if its typecode has higher priority
_, _, existing_typecode = existing_entry
if toc_type_priorities.get(typecode, 0) > toc_type_priorities.get(existing_typecode, 0):
tmp_toc[entry_key] = (dest_name, src_name, typecode)
# Return the items as list. The order matches the original order due to python dict maintaining the insertion order.
# The exception are OPTION entries, which are now placed at the beginning of the TOC.
return options_toc + list(tmp_toc.values())
def toc_process_symbolic_links(toc):
"""
Process TOC entries and replace entries whose files are symbolic links with SYMLINK entries (provided original file
is also being collected).
"""
# Dictionary of all destination names, for a fast look-up.
all_dest_files = set([dest_name for dest_name, src_name, typecode in toc])
# Process the TOC to create SYMLINK entries
new_toc = []
for entry in toc:
dest_name, src_name, typecode = entry
# Skip entries that are already symbolic links
if typecode == 'SYMLINK':
new_toc.append(entry)
continue
# Skip entries without valid source name (e.g., OPTION)
if not src_name:
new_toc.append(entry)
continue
# Source path is not a symbolic link (i.e., it is a regular file or directory)
if not os.path.islink(src_name):
new_toc.append(entry)
continue
# Try preserving the symbolic link, under strict relative-relationship-preservation check
symlink_entry = _try_preserving_symbolic_link(dest_name, src_name, all_dest_files)
if symlink_entry:
new_toc.append(symlink_entry)
else:
new_toc.append(entry)
return new_toc
def _try_preserving_symbolic_link(dest_name, src_name, all_dest_files):
seen_src_files = set()
# Set initial values for the loop
ref_src_file = src_name
ref_dest_file = dest_name
while True:
# Guard against cyclic links...
if ref_src_file in seen_src_files:
break
seen_src_files.add(ref_src_file)
# Stop when referenced source file is not a symbolic link anymore.
if not os.path.islink(ref_src_file):
break
# Read the symbolic link's target, but do not fully resolve it using os.path.realpath(), because there might be
# other symbolic links involved as well (for example, /lib64 -> /usr/lib64 whereas we are processing
# /lib64/liba.so -> /lib64/liba.so.1)
symlink_target = os.readlink(ref_src_file)
if os.path.isabs(symlink_target):
break # We support only relative symbolic links.
ref_dest_file = os.path.join(os.path.dirname(ref_dest_file), symlink_target)
ref_dest_file = os.path.normpath(ref_dest_file) # remove any '..'
ref_src_file = os.path.join(os.path.dirname(ref_src_file), symlink_target)
ref_src_file = os.path.normpath(ref_src_file) # remove any '..'
# Check if referenced destination file is valid (i.e., we are collecting a file under referenced name).
if ref_dest_file in all_dest_files:
# Sanity check: original source name and current referenced source name must, after complete resolution,
# point to the same file.
if os.path.realpath(src_name) == os.path.realpath(ref_src_file):
# Compute relative link for the destination file (might be modified, if we went over non-collected
# intermediate links).
rel_link = os.path.relpath(ref_dest_file, os.path.dirname(dest_name))
return dest_name, rel_link, 'SYMLINK'
# If referenced destination is not valid, do another iteration in case we are dealing with chained links and we
# are not collecting an intermediate link...
return None

View File

@ -0,0 +1,90 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2022-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
from typing import Tuple
import os
import hashlib
def normalize_icon_type(icon_path: str, allowed_types: Tuple[str], convert_type: str, workpath: str) -> str:
"""
Returns a valid icon path or raises an Exception on error.
Ensures that the icon exists, and, if necessary, attempts to convert it to correct OS-specific format using Pillow.
Takes:
icon_path - the icon given by the user
allowed_types - a tuple of icon formats that should be allowed through
EX: ("ico", "exe")
convert_type - the type to attempt conversion too if necessary
EX: "icns"
workpath - the temp directory to save any newly generated image files
"""
# explicitly error if file not found
if not os.path.exists(icon_path):
raise FileNotFoundError(f"Icon input file {icon_path} not found")
_, extension = os.path.splitext(icon_path)
extension = extension[1:] # get rid of the "." in ".whatever"
# if the file is already in the right format, pass it back unchanged
if extension in allowed_types:
# Check both the suffix and the header of the file to guard against the user confusing image types.
signatures = hex_signatures[extension]
with open(icon_path, "rb") as f:
header = f.read(max(len(s) for s in signatures))
if any(list(header)[:len(s)] == s for s in signatures):
return icon_path
# The icon type is wrong! Let's try and import PIL
try:
from PIL import Image as PILImage
import PIL
except ImportError:
raise ValueError(
f"Received icon image '{icon_path}' which exists but is not in the correct format. On this platform, "
f"only {allowed_types} images may be used as icons. If Pillow is installed, automatic conversion will "
f"be attempted. Please install Pillow or convert your '{extension}' file to one of {allowed_types} "
f"and try again."
)
# Let's try to use PIL to convert the icon file type
try:
_generated_name = f"generated-{hashlib.sha256(icon_path.encode()).hexdigest()}.{convert_type}"
generated_icon = os.path.join(workpath, _generated_name)
with PILImage.open(icon_path) as im:
# If an image uses a custom palette + transparency, convert it to RGBA for a better alpha mask depth.
if im.mode == "P" and im.info.get("transparency", None) is not None:
# The bit depth of the alpha channel will be higher, and the images will look better when eventually
# scaled to multiple sizes (16,24,32,..) for the ICO format for example.
im = im.convert("RGBA")
im.save(generated_icon)
icon_path = generated_icon
except PIL.UnidentifiedImageError:
raise ValueError(
f"Something went wrong converting icon image '{icon_path}' to '.{convert_type}' with Pillow, "
f"perhaps the image format is unsupported. Try again with a different file or use a file that can "
f"be used without conversion on this platform: {allowed_types}"
)
return icon_path
# Possible initial bytes of icon types PyInstaller needs to be able to recognise.
# Taken from: https://en.wikipedia.org/wiki/List_of_file_signatures
hex_signatures = {
"png": [[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]],
"exe": [[0x4D, 0x5A], [0x5A, 0x4D]],
"ico": [[0x00, 0x00, 0x01, 0x00]],
"icns": [[0x69, 0x63, 0x6e, 0x73]],
}

View File

@ -0,0 +1,850 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Automatically build spec files containing a description of the project.
"""
import argparse
import os
import re
from PyInstaller import DEFAULT_SPECPATH, HOMEPATH
from PyInstaller import log as logging
from PyInstaller.building.templates import bundleexetmplt, bundletmplt, onedirtmplt, onefiletmplt, splashtmpl
from PyInstaller.compat import expand_path, is_darwin, is_win
logger = logging.getLogger(__name__)
# This list gives valid choices for the ``--debug`` command-line option, except for the ``all`` choice.
DEBUG_ARGUMENT_CHOICES = ['imports', 'bootloader', 'noarchive']
# This is the ``all`` choice.
DEBUG_ALL_CHOICE = ['all']
def escape_win_filepath(path):
# escape all \ with another \ after using normpath to clean up the path
return os.path.normpath(path).replace('\\', '\\\\')
def make_path_spec_relative(filename, spec_dir):
"""
Make the filename relative to the directory containing .spec file if filename is relative and not absolute.
Otherwise keep filename untouched.
"""
if os.path.isabs(filename):
return filename
else:
filename = os.path.abspath(filename)
# Make it relative.
filename = os.path.relpath(filename, start=spec_dir)
return filename
# Support for trying to avoid hard-coded paths in the .spec files. Eg, all files rooted in the Installer directory tree
# will be written using "HOMEPATH", thus allowing this spec file to be used with any Installer installation. Same thing
# could be done for other paths too.
path_conversions = ((HOMEPATH, "HOMEPATH"),)
class SourceDestAction(argparse.Action):
"""
A command line option which takes multiple source:dest pairs.
"""
def __init__(self, *args, default=None, metavar=None, **kwargs):
super().__init__(*args, default=[], metavar='SOURCE:DEST', **kwargs)
def __call__(self, parser, namespace, value, option_string=None):
try:
# Find the only separator that isn't a Windows drive.
separator, = (m for m in re.finditer(rf"(^\w:[/\\])|[:{os.pathsep}]", value) if not m[1])
except ValueError:
# Split into SRC and DEST failed, wrong syntax
raise argparse.ArgumentError(self, f'Wrong syntax, should be {self.option_strings[0]}=SOURCE:DEST')
src = value[:separator.start()]
dest = value[separator.end():]
if not src or not dest:
# Syntax was correct, but one or both of SRC and DEST was not given
raise argparse.ArgumentError(self, "You have to specify both SOURCE and DEST")
# argparse is not particularly smart with copy by reference typed defaults. If the current list is the default,
# replace it before modifying it to avoid changing the default.
if getattr(namespace, self.dest) is self.default:
setattr(namespace, self.dest, [])
getattr(namespace, self.dest).append((src, dest))
def make_variable_path(filename, conversions=path_conversions):
if not os.path.isabs(filename):
# os.path.commonpath can not compare relative and absolute paths, and if filename is not absolute, none of the
# paths in conversions will match anyway.
return None, filename
for (from_path, to_name) in conversions:
assert os.path.abspath(from_path) == from_path, ("path '%s' should already be absolute" % from_path)
try:
common_path = os.path.commonpath([filename, from_path])
except ValueError:
# Per https://docs.python.org/3/library/os.path.html#os.path.commonpath, this raises ValueError in several
# cases which prevent computing a common path.
common_path = None
if common_path == from_path:
rest = filename[len(from_path):]
if rest.startswith(('\\', '/')):
rest = rest[1:]
return to_name, rest
return None, filename
def removed_key_option(x):
from PyInstaller.exceptions import RemovedCipherFeatureError
raise RemovedCipherFeatureError("Please remove your --key=xxx argument.")
class _RemovedFlagAction(argparse.Action):
def __init__(self, *args, **kwargs):
kwargs["help"] = argparse.SUPPRESS
kwargs["nargs"] = 0
super().__init__(*args, **kwargs)
class _RemovedNoEmbedManifestAction(_RemovedFlagAction):
def __call__(self, *args, **kwargs):
from PyInstaller.exceptions import RemovedExternalManifestError
raise RemovedExternalManifestError("Please remove your --no-embed-manifest argument.")
class _RemovedWinPrivateAssembliesAction(_RemovedFlagAction):
def __call__(self, *args, **kwargs):
from PyInstaller.exceptions import RemovedWinSideBySideSupportError
raise RemovedWinSideBySideSupportError("Please remove your --win-private-assemblies argument.")
class _RemovedWinNoPreferRedirectsAction(_RemovedFlagAction):
def __call__(self, *args, **kwargs):
from PyInstaller.exceptions import RemovedWinSideBySideSupportError
raise RemovedWinSideBySideSupportError("Please remove your --win-no-prefer-redirects argument.")
# An object used in place of a "path string", which knows how to repr() itself using variable names instead of
# hard-coded paths.
class Path:
def __init__(self, *parts):
self.path = os.path.join(*parts)
self.variable_prefix = self.filename_suffix = None
def __repr__(self):
if self.filename_suffix is None:
self.variable_prefix, self.filename_suffix = make_variable_path(self.path)
if self.variable_prefix is None:
return repr(self.path)
return "os.path.join(" + self.variable_prefix + "," + repr(self.filename_suffix) + ")"
# An object used to construct extra preamble for the spec file, in order to accommodate extra collect_*() calls from the
# command-line
class Preamble:
def __init__(
self, datas, binaries, hiddenimports, collect_data, collect_binaries, collect_submodules, collect_all,
copy_metadata, recursive_copy_metadata
):
# Initialize with literal values - will be switched to preamble variable name later, if necessary
self.binaries = binaries or []
self.hiddenimports = hiddenimports or []
self.datas = datas or []
# Preamble content
self.content = []
# Import statements
if collect_data:
self._add_hookutil_import('collect_data_files')
if collect_binaries:
self._add_hookutil_import('collect_dynamic_libs')
if collect_submodules:
self._add_hookutil_import('collect_submodules')
if collect_all:
self._add_hookutil_import('collect_all')
if copy_metadata or recursive_copy_metadata:
self._add_hookutil_import('copy_metadata')
if self.content:
self.content += [''] # empty line to separate the section
# Variables
if collect_data or copy_metadata or collect_all or recursive_copy_metadata:
self._add_var('datas', self.datas)
self.datas = 'datas' # switch to variable
if collect_binaries or collect_all:
self._add_var('binaries', self.binaries)
self.binaries = 'binaries' # switch to variable
if collect_submodules or collect_all:
self._add_var('hiddenimports', self.hiddenimports)
self.hiddenimports = 'hiddenimports' # switch to variable
# Content - collect_data_files
for entry in collect_data:
self._add_collect_data(entry)
# Content - copy_metadata
for entry in copy_metadata:
self._add_copy_metadata(entry)
# Content - copy_metadata(..., recursive=True)
for entry in recursive_copy_metadata:
self._add_recursive_copy_metadata(entry)
# Content - collect_binaries
for entry in collect_binaries:
self._add_collect_binaries(entry)
# Content - collect_submodules
for entry in collect_submodules:
self._add_collect_submodules(entry)
# Content - collect_all
for entry in collect_all:
self._add_collect_all(entry)
# Merge
if self.content and self.content[-1] != '':
self.content += [''] # empty line
self.content = '\n'.join(self.content)
def _add_hookutil_import(self, name):
self.content += ['from PyInstaller.utils.hooks import {0}'.format(name)]
def _add_var(self, name, initial_value):
self.content += ['{0} = {1}'.format(name, initial_value)]
def _add_collect_data(self, name):
self.content += ['datas += collect_data_files(\'{0}\')'.format(name)]
def _add_copy_metadata(self, name):
self.content += ['datas += copy_metadata(\'{0}\')'.format(name)]
def _add_recursive_copy_metadata(self, name):
self.content += ['datas += copy_metadata(\'{0}\', recursive=True)'.format(name)]
def _add_collect_binaries(self, name):
self.content += ['binaries += collect_dynamic_libs(\'{0}\')'.format(name)]
def _add_collect_submodules(self, name):
self.content += ['hiddenimports += collect_submodules(\'{0}\')'.format(name)]
def _add_collect_all(self, name):
self.content += [
'tmp_ret = collect_all(\'{0}\')'.format(name),
'datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]'
]
def __add_options(parser):
"""
Add the `Makespec` options to a option-parser instance or a option group.
"""
g = parser.add_argument_group('What to generate')
g.add_argument(
"-D",
"--onedir",
dest="onefile",
action="store_false",
default=None,
help="Create a one-folder bundle containing an executable (default)",
)
g.add_argument(
"-F",
"--onefile",
dest="onefile",
action="store_true",
default=None,
help="Create a one-file bundled executable.",
)
g.add_argument(
"--specpath",
metavar="DIR",
help="Folder to store the generated spec file (default: current directory)",
)
g.add_argument(
"-n",
"--name",
help="Name to assign to the bundled app and spec file (default: first script's basename)",
)
g.add_argument(
"--contents-directory",
help="For onedir builds only, specify the name of the directory in which all supporting files (i.e. everything "
"except the executable itself) will be placed in. Use \".\" to re-enable old onedir layout without contents "
"directory.",
)
g = parser.add_argument_group('What to bundle, where to search')
g.add_argument(
'--add-data',
action=SourceDestAction,
dest='datas',
help="Additional data files or directories containing data files to be added to the application. The argument "
'value should be in form of "source:dest_dir", where source is the path to file (or directory) to be '
"collected, dest_dir is the destination directory relative to the top-level application directory, and both "
"paths are separated by a colon (:). To put a file in the top-level application directory, use . as a "
"dest_dir. This option can be used multiple times."
)
g.add_argument(
'--add-binary',
action=SourceDestAction,
dest="binaries",
help='Additional binary files to be added to the executable. See the ``--add-data`` option for the format. '
'This option can be used multiple times.',
)
g.add_argument(
"-p",
"--paths",
dest="pathex",
metavar="DIR",
action="append",
default=[],
help="A path to search for imports (like using PYTHONPATH). Multiple paths are allowed, separated by ``%s``, "
"or use this option multiple times. Equivalent to supplying the ``pathex`` argument in the spec file." %
repr(os.pathsep),
)
g.add_argument(
'--hidden-import',
'--hiddenimport',
action='append',
default=[],
metavar="MODULENAME",
dest='hiddenimports',
help='Name an import not visible in the code of the script(s). This option can be used multiple times.',
)
g.add_argument(
'--collect-submodules',
action="append",
default=[],
metavar="MODULENAME",
dest='collect_submodules',
help='Collect all submodules from the specified package or module. This option can be used multiple times.',
)
g.add_argument(
'--collect-data',
'--collect-datas',
action="append",
default=[],
metavar="MODULENAME",
dest='collect_data',
help='Collect all data from the specified package or module. This option can be used multiple times.',
)
g.add_argument(
'--collect-binaries',
action="append",
default=[],
metavar="MODULENAME",
dest='collect_binaries',
help='Collect all binaries from the specified package or module. This option can be used multiple times.',
)
g.add_argument(
'--collect-all',
action="append",
default=[],
metavar="MODULENAME",
dest='collect_all',
help='Collect all submodules, data files, and binaries from the specified package or module. This option can '
'be used multiple times.',
)
g.add_argument(
'--copy-metadata',
action="append",
default=[],
metavar="PACKAGENAME",
dest='copy_metadata',
help='Copy metadata for the specified package. This option can be used multiple times.',
)
g.add_argument(
'--recursive-copy-metadata',
action="append",
default=[],
metavar="PACKAGENAME",
dest='recursive_copy_metadata',
help='Copy metadata for the specified package and all its dependencies. This option can be used multiple '
'times.',
)
g.add_argument(
"--additional-hooks-dir",
action="append",
dest="hookspath",
default=[],
help="An additional path to search for hooks. This option can be used multiple times.",
)
g.add_argument(
'--runtime-hook',
action='append',
dest='runtime_hooks',
default=[],
help='Path to a custom runtime hook file. A runtime hook is code that is bundled with the executable and is '
'executed before any other code or module to set up special features of the runtime environment. This option '
'can be used multiple times.',
)
g.add_argument(
'--exclude-module',
dest='excludes',
action='append',
default=[],
help='Optional module or package (the Python name, not the path name) that will be ignored (as though it was '
'not found). This option can be used multiple times.',
)
g.add_argument(
'--key',
dest='key',
help=argparse.SUPPRESS,
type=removed_key_option,
)
g.add_argument(
'--splash',
dest='splash',
metavar="IMAGE_FILE",
help="(EXPERIMENTAL) Add an splash screen with the image IMAGE_FILE to the application. The splash screen can "
"display progress updates while unpacking.",
)
g = parser.add_argument_group('How to generate')
g.add_argument(
"-d",
"--debug",
# If this option is not specified, then its default value is an empty list (no debug options selected).
default=[],
# Note that ``nargs`` is omitted. This produces a single item not stored in a list, as opposed to a list
# containing one item, as per `nargs <https://docs.python.org/3/library/argparse.html#nargs>`_.
nargs=None,
# The options specified must come from this list.
choices=DEBUG_ALL_CHOICE + DEBUG_ARGUMENT_CHOICES,
# Append choice, rather than storing them (which would overwrite any previous selections).
action='append',
# Allow newlines in the help text; see the ``_SmartFormatter`` in ``__main__.py``.
help=(
"R|Provide assistance with debugging a frozen\n"
"application. This argument may be provided multiple\n"
"times to select several of the following options.\n"
"\n"
"- all: All three of the following options.\n"
"\n"
"- imports: specify the -v option to the underlying\n"
" Python interpreter, causing it to print a message\n"
" each time a module is initialized, showing the\n"
" place (filename or built-in module) from which it\n"
" is loaded. See\n"
" https://docs.python.org/3/using/cmdline.html#id4.\n"
"\n"
"- bootloader: tell the bootloader to issue progress\n"
" messages while initializing and starting the\n"
" bundled app. Used to diagnose problems with\n"
" missing imports.\n"
"\n"
"- noarchive: instead of storing all frozen Python\n"
" source files as an archive inside the resulting\n"
" executable, store them as files in the resulting\n"
" output directory.\n"
"\n"
),
)
g.add_argument(
'--python-option',
dest='python_options',
metavar='PYTHON_OPTION',
action='append',
default=[],
help='Specify a command-line option to pass to the Python interpreter at runtime. Currently supports '
'"v" (equivalent to "--debug imports"), "u", "W <warning control>", "X <xoption>", and "hash_seed=<value>". '
'For details, see the section "Specifying Python Interpreter Options" in PyInstaller manual.',
)
g.add_argument(
"-s",
"--strip",
action="store_true",
help="Apply a symbol-table strip to the executable and shared libs (not recommended for Windows)",
)
g.add_argument(
"--noupx",
action="store_true",
default=False,
help="Do not use UPX even if it is available (works differently between Windows and *nix)",
)
g.add_argument(
"--upx-exclude",
dest="upx_exclude",
metavar="FILE",
action="append",
help="Prevent a binary from being compressed when using upx. This is typically used if upx corrupts certain "
"binaries during compression. FILE is the filename of the binary without path. This option can be used "
"multiple times.",
)
g = parser.add_argument_group('Windows and Mac OS X specific options')
g.add_argument(
"-c",
"--console",
"--nowindowed",
dest="console",
action="store_true",
default=None,
help="Open a console window for standard i/o (default). On Windows this option has no effect if the first "
"script is a '.pyw' file.",
)
g.add_argument(
"-w",
"--windowed",
"--noconsole",
dest="console",
action="store_false",
default=None,
help="Windows and Mac OS X: do not provide a console window for standard i/o. On Mac OS this also triggers "
"building a Mac OS .app bundle. On Windows this option is automatically set if the first script is a '.pyw' "
"file. This option is ignored on *NIX systems.",
)
g.add_argument(
"--hide-console",
type=str,
choices={'hide-early', 'hide-late', 'minimize-early', 'minimize-late'},
default=None,
help="Windows only: in console-enabled executable, have bootloader automatically hide or minimize the console "
"window if the program owns the console window (i.e., was not launched from an existing console window).",
)
g.add_argument(
"-i",
"--icon",
action='append',
dest="icon_file",
metavar='<FILE.ico or FILE.exe,ID or FILE.icns or Image or "NONE">',
help="FILE.ico: apply the icon to a Windows executable. FILE.exe,ID: extract the icon with ID from an exe. "
"FILE.icns: apply the icon to the .app bundle on Mac OS. If an image file is entered that isn't in the "
"platform format (ico on Windows, icns on Mac), PyInstaller tries to use Pillow to translate the icon into "
"the correct format (if Pillow is installed). Use \"NONE\" to not apply any icon, thereby making the OS show "
"some default (default: apply PyInstaller's icon). This option can be used multiple times.",
)
g.add_argument(
"--disable-windowed-traceback",
dest="disable_windowed_traceback",
action="store_true",
default=False,
help="Disable traceback dump of unhandled exception in windowed (noconsole) mode (Windows and macOS only), "
"and instead display a message that this feature is disabled.",
)
g = parser.add_argument_group('Windows specific options')
g.add_argument(
"--version-file",
dest="version_file",
metavar="FILE",
help="Add a version resource from FILE to the exe.",
)
g.add_argument(
"-m",
"--manifest",
metavar="<FILE or XML>",
help="Add manifest FILE or XML to the exe.",
)
g.add_argument(
"--no-embed-manifest",
action=_RemovedNoEmbedManifestAction,
)
g.add_argument(
"-r",
"--resource",
dest="resources",
metavar="RESOURCE",
action="append",
default=[],
help="Add or update a resource to a Windows executable. The RESOURCE is one to four items, "
"FILE[,TYPE[,NAME[,LANGUAGE]]]. FILE can be a data file or an exe/dll. For data files, at least TYPE and NAME "
"must be specified. LANGUAGE defaults to 0 or may be specified as wildcard * to update all resources of the "
"given TYPE and NAME. For exe/dll files, all resources from FILE will be added/updated to the final executable "
"if TYPE, NAME and LANGUAGE are omitted or specified as wildcard *. This option can be used multiple times.",
)
g.add_argument(
'--uac-admin',
dest='uac_admin',
action="store_true",
default=False,
help="Using this option creates a Manifest that will request elevation upon application start.",
)
g.add_argument(
'--uac-uiaccess',
dest='uac_uiaccess',
action="store_true",
default=False,
help="Using this option allows an elevated application to work with Remote Desktop.",
)
g = parser.add_argument_group('Windows Side-by-side Assembly searching options (advanced)')
g.add_argument(
"--win-private-assemblies",
action=_RemovedWinPrivateAssembliesAction,
)
g.add_argument(
"--win-no-prefer-redirects",
action=_RemovedWinNoPreferRedirectsAction,
)
g = parser.add_argument_group('Mac OS specific options')
g.add_argument(
"--argv-emulation",
dest="argv_emulation",
action="store_true",
default=False,
help="Enable argv emulation for macOS app bundles. If enabled, the initial open document/URL event is "
"processed by the bootloader and the passed file paths or URLs are appended to sys.argv.",
)
g.add_argument(
'--osx-bundle-identifier',
dest='bundle_identifier',
help="Mac OS .app bundle identifier is used as the default unique program name for code signing purposes. "
"The usual form is a hierarchical name in reverse DNS notation. For example: com.mycompany.department.appname "
"(default: first script's basename)",
)
g.add_argument(
'--target-architecture',
'--target-arch',
dest='target_arch',
metavar='ARCH',
default=None,
help="Target architecture (macOS only; valid values: x86_64, arm64, universal2). Enables switching between "
"universal2 and single-arch version of frozen application (provided python installation supports the target "
"architecture). If not target architecture is not specified, the current running architecture is targeted.",
)
g.add_argument(
'--codesign-identity',
dest='codesign_identity',
metavar='IDENTITY',
default=None,
help="Code signing identity (macOS only). Use the provided identity to sign collected binaries and generated "
"executable. If signing identity is not provided, ad-hoc signing is performed instead.",
)
g.add_argument(
'--osx-entitlements-file',
dest='entitlements_file',
metavar='FILENAME',
default=None,
help="Entitlements file to use when code-signing the collected binaries (macOS only).",
)
g = parser.add_argument_group('Rarely used special options')
g.add_argument(
"--runtime-tmpdir",
dest="runtime_tmpdir",
metavar="PATH",
help="Where to extract libraries and support files in `onefile`-mode. If this option is given, the bootloader "
"will ignore any temp-folder location defined by the run-time OS. The ``_MEIxxxxxx``-folder will be created "
"here. Please use this option only if you know what you are doing.",
)
g.add_argument(
"--bootloader-ignore-signals",
action="store_true",
default=False,
help="Tell the bootloader to ignore signals rather than forwarding them to the child process. Useful in "
"situations where for example a supervisor process signals both the bootloader and the child (e.g., via a "
"process group) to avoid signalling the child twice.",
)
def main(
scripts,
name=None,
onefile=False,
console=True,
debug=[],
python_options=[],
strip=False,
noupx=False,
upx_exclude=None,
runtime_tmpdir=None,
contents_directory=None,
pathex=[],
version_file=None,
specpath=None,
bootloader_ignore_signals=False,
disable_windowed_traceback=False,
datas=[],
binaries=[],
icon_file=None,
manifest=None,
resources=[],
bundle_identifier=None,
hiddenimports=[],
hookspath=[],
runtime_hooks=[],
excludes=[],
uac_admin=False,
uac_uiaccess=False,
collect_submodules=[],
collect_binaries=[],
collect_data=[],
collect_all=[],
copy_metadata=[],
splash=None,
recursive_copy_metadata=[],
target_arch=None,
codesign_identity=None,
entitlements_file=None,
argv_emulation=False,
hide_console=None,
**_kwargs
):
# Default values for onefile and console when not explicitly specified on command-line (indicated by None)
if onefile is None:
onefile = False
if console is None:
console = True
# If appname is not specified - use the basename of the main script as name.
if name is None:
name = os.path.splitext(os.path.basename(scripts[0]))[0]
# If specpath not specified - use default value - current working directory.
if specpath is None:
specpath = DEFAULT_SPECPATH
else:
# Expand tilde to user's home directory.
specpath = expand_path(specpath)
# If cwd is the root directory of PyInstaller, generate the .spec file in ./appname/ subdirectory.
if specpath == HOMEPATH:
specpath = os.path.join(HOMEPATH, name)
# Create directory tree if missing.
if not os.path.exists(specpath):
os.makedirs(specpath)
# Handle additional EXE options.
exe_options = ''
if version_file:
exe_options += "\n version='%s'," % escape_win_filepath(version_file)
if uac_admin:
exe_options += "\n uac_admin=True,"
if uac_uiaccess:
exe_options += "\n uac_uiaccess=True,"
if icon_file:
# Icon file for Windows.
# On Windows, the default icon is embedded in the bootloader executable.
if icon_file[0] == 'NONE':
exe_options += "\n icon='NONE',"
else:
exe_options += "\n icon=[%s]," % ','.join("'%s'" % escape_win_filepath(ic) for ic in icon_file)
# Icon file for Mac OS.
# We need to encapsulate it into apostrofes.
icon_file = "'%s'" % icon_file[0]
else:
# On Mac OS, the default icon has to be copied into the .app bundle.
# The the text value 'None' means - use default icon.
icon_file = 'None'
if contents_directory:
exe_options += "\n contents_directory='%s'," % (contents_directory or "_internal")
if hide_console:
exe_options += "\n hide_console='%s'," % hide_console
if bundle_identifier:
# We need to encapsulate it into apostrofes.
bundle_identifier = "'%s'" % bundle_identifier
if manifest:
if "<" in manifest:
# Assume XML string
exe_options += "\n manifest='%s'," % manifest.replace("'", "\\'")
else:
# Assume filename
exe_options += "\n manifest='%s'," % escape_win_filepath(manifest)
if resources:
resources = list(map(escape_win_filepath, resources))
exe_options += "\n resources=%s," % repr(resources)
hiddenimports = hiddenimports or []
upx_exclude = upx_exclude or []
# If file extension of the first script is '.pyw', force --windowed option.
if is_win and os.path.splitext(scripts[0])[-1] == '.pyw':
console = False
# If script paths are relative, make them relative to the directory containing .spec file.
scripts = [make_path_spec_relative(x, specpath) for x in scripts]
# With absolute paths replace prefix with variable HOMEPATH.
scripts = list(map(Path, scripts))
# Translate the default of ``debug=None`` to an empty list.
if debug is None:
debug = []
# Translate the ``all`` option.
if DEBUG_ALL_CHOICE[0] in debug:
debug = DEBUG_ARGUMENT_CHOICES
# Create preamble (for collect_*() calls)
preamble = Preamble(
datas, binaries, hiddenimports, collect_data, collect_binaries, collect_submodules, collect_all, copy_metadata,
recursive_copy_metadata
)
if splash:
splash_init = splashtmpl % {'splash_image': splash}
splash_binaries = "\n splash.binaries,"
splash_target = "\n splash,"
else:
splash_init = splash_binaries = splash_target = ""
# Create OPTIONs array
if 'imports' in debug and 'v' not in python_options:
python_options.append('v')
python_options_array = [(opt, None, 'OPTION') for opt in python_options]
d = {
'scripts': scripts,
'pathex': pathex or [],
'binaries': preamble.binaries,
'datas': preamble.datas,
'hiddenimports': preamble.hiddenimports,
'preamble': preamble.content,
'name': name,
'noarchive': 'noarchive' in debug,
'options': python_options_array,
'debug_bootloader': 'bootloader' in debug,
'bootloader_ignore_signals': bootloader_ignore_signals,
'strip': strip,
'upx': not noupx,
'upx_exclude': upx_exclude,
'runtime_tmpdir': runtime_tmpdir,
'exe_options': exe_options,
# Directory with additional custom import hooks.
'hookspath': hookspath,
# List with custom runtime hook files.
'runtime_hooks': runtime_hooks or [],
# List of modules/packages to ignore.
'excludes': excludes or [],
# only Windows and Mac OS distinguish windowed and console apps
'console': console,
'disable_windowed_traceback': disable_windowed_traceback,
# Icon filename. Only Mac OS uses this item.
'icon': icon_file,
# .app bundle identifier. Only OSX uses this item.
'bundle_identifier': bundle_identifier,
# argv emulation (macOS only)
'argv_emulation': argv_emulation,
# Target architecture (macOS only)
'target_arch': target_arch,
# Code signing identity (macOS only)
'codesign_identity': codesign_identity,
# Entitlements file (macOS only)
'entitlements_file': entitlements_file,
# splash screen
'splash_init': splash_init,
'splash_target': splash_target,
'splash_binaries': splash_binaries,
}
# Write down .spec file to filesystem.
specfnm = os.path.join(specpath, name + '.spec')
with open(specfnm, 'w', encoding='utf-8') as specfile:
if onefile:
specfile.write(onefiletmplt % d)
# For Mac OS create .app bundle.
if is_darwin and not console:
specfile.write(bundleexetmplt % d)
else:
specfile.write(onedirtmplt % d)
# For Mac OS create .app bundle.
if is_darwin and not console:
specfile.write(bundletmplt % d)
return specfnm

View File

@ -0,0 +1,720 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
import os
import pathlib
import plistlib
import shutil
import subprocess
from PyInstaller.building.api import COLLECT, EXE
from PyInstaller.building.datastruct import Target, logger, normalize_toc
from PyInstaller.building.utils import _check_path_overlap, _rmtree, process_collected_binary
from PyInstaller.compat import is_darwin, strict_collect_mode
from PyInstaller.building.icon import normalize_icon_type
import PyInstaller.utils.misc as miscutils
if is_darwin:
import PyInstaller.utils.osx as osxutils
# Character sequence used to replace dot (`.`) in names of directories that are created in `Contents/MacOS` or
# `Contents/Frameworks`, where only .framework bundle directories are allowed to have dot in name.
DOT_REPLACEMENT = '__dot__'
class BUNDLE(Target):
def __init__(self, *args, **kwargs):
from PyInstaller.config import CONF
# BUNDLE only has a sense under Mac OS, it's a noop on other platforms
if not is_darwin:
return
# Get a path to a .icns icon for the app bundle.
self.icon = kwargs.get('icon')
if not self.icon:
# --icon not specified; use the default in the pyinstaller folder
self.icon = os.path.join(
os.path.dirname(os.path.dirname(__file__)), 'bootloader', 'images', 'icon-windowed.icns'
)
else:
# User gave an --icon=path. If it is relative, make it relative to the spec file location.
if not os.path.isabs(self.icon):
self.icon = os.path.join(CONF['specpath'], self.icon)
super().__init__()
# .app bundle is created in DISTPATH.
self.name = kwargs.get('name', None)
base_name = os.path.basename(self.name)
self.name = os.path.join(CONF['distpath'], base_name)
self.appname = os.path.splitext(base_name)[0]
self.version = kwargs.get("version", "0.0.0")
self.toc = []
self.strip = False
self.upx = False
self.console = True
self.target_arch = None
self.codesign_identity = None
self.entitlements_file = None
# .app bundle identifier for Code Signing
self.bundle_identifier = kwargs.get('bundle_identifier')
if not self.bundle_identifier:
# Fallback to appname.
self.bundle_identifier = self.appname
self.info_plist = kwargs.get('info_plist', None)
for arg in args:
# Valid arguments: EXE object, COLLECT object, and TOC-like iterables
if isinstance(arg, EXE):
# Add EXE as an entry to the TOC, and merge its dependencies TOC
self.toc.append((os.path.basename(arg.name), arg.name, 'EXECUTABLE'))
self.toc.extend(arg.dependencies)
# Inherit settings
self.strip = arg.strip
self.upx = arg.upx
self.upx_exclude = arg.upx_exclude
self.console = arg.console
self.target_arch = arg.target_arch
self.codesign_identity = arg.codesign_identity
self.entitlements_file = arg.entitlements_file
elif isinstance(arg, COLLECT):
# Merge the TOC
self.toc.extend(arg.toc)
# Inherit settings
self.strip = arg.strip_binaries
self.upx = arg.upx_binaries
self.upx_exclude = arg.upx_exclude
self.console = arg.console
self.target_arch = arg.target_arch
self.codesign_identity = arg.codesign_identity
self.entitlements_file = arg.entitlements_file
elif miscutils.is_iterable(arg):
# TOC-like iterable
self.toc.extend(arg)
else:
raise TypeError(f"Invalid argument type for BUNDLE: {type(arg)!r}")
# Infer the executable name from the first EXECUTABLE entry in the TOC; it might have come from the COLLECT
# (as opposed to the stand-alone EXE).
for dest_name, src_name, typecode in self.toc:
if typecode == "EXECUTABLE":
self.exename = src_name
break
else:
raise ValueError("No EXECUTABLE entry found in the TOC!")
# Normalize TOC
self.toc = normalize_toc(self.toc)
self.__postinit__()
_GUTS = (
# BUNDLE always builds, just want the toc to be written out
('toc', None),
)
def _check_guts(self, data, last_build):
# BUNDLE always needs to be executed, in order to clean the output directory.
return True
# Helper for determining whether the given file belongs to a .framework bundle or not. If it does, it returns
# the path to the top-level .framework bundle directory; otherwise, returns None.
@staticmethod
def _is_framework_file(dest_path):
for parent in dest_path.parents:
if parent.name.endswith('.framework'):
return parent
return None
# Helper that computes relative cross-link path between link's location and target, assuming they are both
# rooted in the `Contents` directory of a macOS .app bundle.
@staticmethod
def _compute_relative_crosslink(crosslink_location, crosslink_target):
# We could take symlink_location and symlink_target as they are (relative to parent of the `Contents`
# directory), but that would introduce an unnecessary `../Contents` part. So instead, we take both paths
# relative to the `Contents` directory.
return os.path.join(
*['..' for level in pathlib.PurePath(crosslink_location).relative_to('Contents').parent.parts],
pathlib.PurePath(crosslink_target).relative_to('Contents'),
)
# This method takes the original (input) TOC and processes it into final TOC, based on which the `assemble` method
# performs its file collection. The TOC processing here represents the core of our efforts to generate an .app
# bundle that is compatible with Apple's code-signing requirements.
#
# For in-depth details on the code-signing, see Apple's `Technical Note TN2206: macOS Code Signing In Depth` at
# https://developer.apple.com/library/archive/technotes/tn2206/_index.html
#
# The requirements, framed from PyInstaller's perspective, can be summarized as follows:
#
# 1. The `Contents/MacOS` directory is expected to contain only the program executable and (binary) code (= dylibs
# and nested .framework bundles). Alternatively, the dylibs and .framework bundles can be also placed into
# `Contents/Frameworks` directory (where same rules apply as for `Contents/MacOS`, so the remainder of this
# text refers to the two inter-changeably, unless explicitly noted otherwise). The code in `Contents/MacOS`
# is expected to be signed, and the `codesign` utility will recursively sign all found code when using `--deep`
# option to sign the .app bundle.
#
# 2. All non-code files should be be placed in `Contents/Resources`, so they become sealed (data) resources;
# i.e., their signature data is recorded in `Contents/_CodeSignature/CodeResources`. (As a side note,
# it seems that signature information for data/resources in `Contents/Resources` is kept nder `file` key in
# the `CodeResources` file, while the information for contents in `Contents/MacOS` is kept under `file2` key).
#
# 3. The directories in `Contents/MacOS` may not contain dots (`.`) in their names, except for the nested
# .framework bundle directories. The directories in `Contents/Resources` have no such restrictions.
#
# 4. There may not be any content in the top level of a bundle. In other words, if a bundle has a `Contents`
# or a `Versions` directory at its top level, there may be no other files or directories alongside them. The
# sole exception is that alongside ˙Versions˙, there may be symlinks to files and directories in
# `Versions/Current`. This rule is important for nested .framework bundles that we collect from python packages.
#
# Next, let us consider the consequences of violating each of the above requirements:
#
# 1. Code signing machinery can directly store signature only in Mach-O binaries and nested .framework bundles; if
# a data file is placed in `Contents/MacOS`, the signature is stored in the file's extended attributes. If the
# extended attributes are lost, the program's signature will be broken. Many file transfer techniques (e.g., a
# zip file) do not preserve extended attributes, nor are they preserved when uploading to the Mac App Store.
#
# 2. Putting code (a dylib or a .framework bundle) into `Contents/Resources` causes it to be treated as a resource;
# the outer signature (i.e., of the whole .app bundle) does not know that this nested content is actually a code.
# Consequently, signing the bundle with ˙codesign --deep` will NOT sign binaries placed in the
# `Contents/Resources`, which may result in missing signatures when .app bundle is verified for notarization.
# This might be worked around by signing each binary separately, and then signing the whole bundle (without the
# `--deep˙ option), but requires the user to keep track of the offending binaries.
#
# 3. If a directory in `Contents/MacOS` contains a dot in the name, code-signing the bundle fails with
# ˙bundle format unrecognized, invalid, or unsuitable` due to code signing machinery treating directory as a
# nested .framework bundle directory.
#
# 4. If nested .framework bundle is malformed, the signing of the .app bundle might succeed, but subsequent
# verification will fail, for example with `embedded framework contains modified or invalid version` (as observed
# with .framework bundles shipped by contemporary PyQt/PySide PyPI wheels).
#
# The above requirements are unfortunately often at odds with the structure of python packages:
#
# * In general, python packages are mixed-content directories, where binaries and data files may be expected to
# be found next to each other.
#
# For example, `opencv-python` provides a custom loader script that requires the package to be collected in the
# source-only form by PyInstaller (i.e., the python modules and scripts collected as source .py files). At the
# same time, it expects the .py loader script to be able to find the binary extension next to itself.
#
# Another example of mixed-mode directories are Qt QML components' sub-directories, which contain both the
# component's plugin (a binary) and associated meta files (data files).
#
# * In python world, the directories often contain dots in their names.
#
# Dots are often used for private directories containing binaries that are shipped with a package. For example,
# `numpy/.dylibs`, `scipy/.dylibs`, etc.
#
# Qt QML components may also contain a dot in their name; couple of examples from `PySide2` package:
# `PySide2/Qt/qml/QtQuick.2`, ˙PySide2/Qt/qml/QtQuick/Controls.2˙, ˙PySide2/Qt/qml/QtQuick/Particles.2˙, etc.
#
# The packages' metadata directories also invariably contain dots in the name due to version (for example,
# `numpy-1.24.3.dist-info`).
#
# In the light of all above, PyInstaller attempts to strictly place all files to their mandated location
# (`Contents/MacOS` or `Contents/Frameworks` vs `Contents/Resources`). To preserve the illusion of mixed-content
# directories, the content is cross-linked from one directory to the other. Specifically:
#
# * All entries with DATA typecode are assumed to be data files, and are always placed in corresponding directory
# structure rooted in `Contents/Resources`.
#
# * All entries with BINARY or EXTENSION typecode are always placed in corresponding directory structure rooted in
# `Contents/Frameworks`.
#
# * All entries with EXECUTABLE are placed in `Contents/MacOS` directory.
#
# * For the purposes of relocation, nested .framework bundles are treated as a single BINARY entity; i.e., the
# whole .bundle directory is placed in corresponding directory structure rooted in `Contents/Frameworks` (even
# though some of its contents, such as `Info.plist` file, are actually data files).
#
# * Top-level data files and binaries are always cross-linked to the other directory. For example, given a data file
# `data_file.txt` that was collected into `Contents/Resources`, we create a symbolic link called
# `Contents/MacOS/data_file.txt` that points to `../Resources/data_file.txt`.
#
# * The executable itself, while placed in `Contents/MacOS`, are cross-linked into both `Contents/Framworks` and
# `Contents/Resources`.
#
# * The stand-alone PKG entries (used with onefile builds that side-load the PKG archive) are treated as data files
# and collected into `Contents/Resources`, but cross-linked only into `Contents/MacOS` directory (because they
# must appear to be next to the program executable). This is the only entry type that is cross-linked into the
# `Contents/MacOS` directory and also the only data-like entry type that is not cross-linked into the
# `Contents/Frameworks` directory.
#
# * For files in sub-directories, the cross-linking behavior depends on the type of directory:
#
# * A data-only directory is created in directory structure rooted in `Contents/Resources`, and cross-linked
# into directory structure rooted in `Contents/Frameworks` at directory level (i.e., we link the whole
# directory instead of individual files).
#
# This largely saves us from having to deal with dots in the names of collected metadata directories, which
# are examples of data-only directories.
#
# * A binary-only directory is created in directory structure rooted in `Contents/Frameworks`, and cross-linked
# into `Contents/Resources` at directory level.
#
# * A mixed-content directory is created in both directory structures. Files are placed into corresponding
# directory structure based on their type, and cross-linked into other directory structure at file level.
#
# * This rule is applied recursively; for example, a data-only sub-directory in a mixed-content directory is
# cross-linked at directory level, while adjacent binary and data files are cross-linked at file level.
#
# * To work around the issue with dots in the names of directories in `Contents/Frameworks` (applicable to
# binary-only or mixed-content directories), such directories are created with modified name (the dot replaced
# with a pre-defined pattern). Next to the modified directory, a symbolic link with original name is created,
# pointing to the directory with modified name. With mixed-content directories, this modification is performed
# only on the `Contents/Frameworks` side; the corresponding directory in `Contents/Resources` can be created
# directly, without name modification and symbolic link.
#
# * If a symbolic link needs to be created in a mixed-content directory due to a SYMLINK entry from the original
# TOC (i.e., a "collected" symlink originating from analysis, as opposed to the cross-linking mechanism described
# above), the link is created in both directory structures, each pointing to the resource in its corresponding
# directory structure (with one such resource being an actual file, and the other being a cross-link to the file).
#
# Final remarks:
#
# NOTE: the relocation mechanism is codified by tests in `tests/functional/test_macos_bundle_structure.py`.
#
# NOTE: by placing binaries and nested .framework entries into `Contents/Frameworks` instead of `Contents/MacOS`,
# we have effectively relocated the `sys._MEIPASS` directory from the `Contents/MacOS` (= the parent directory of
# the program executable) into `Contents/Frameworks`. This requires the PyInstaller's bootloader to detect that it
# is running in the app-bundle mode (e.g., by checking if program executable's parent directory is `Contents/NacOS`)
# and adjust the path accordingly.
#
# NOTE: the implemented relocation mechanism depends on the input TOC containing properly classified entries
# w.r.t. BINARY vs DATA. So hooks and .spec files triggering collection of binaries as datas (and vice versa) will
# result in incorrect placement of those files in the generated .app bundle. However, this is *not* the proper place
# to address such issues; if necessary, automatic (re)classification should be added to analysis process, to ensure
# that BUNDLE (as well as other build targets) receive correctly classified TOC.
#
# NOTE: similar to the previous note, the relocation mechanism is also not the proper place to enforce compliant
# structure of the nested .framework bundles. Instead, this is handled by the analysis process, using the
# `PyInstaller.utils.osx.collect_files_from_framework_bundles` helper function. So the input TOC that BUNDLE
# receives should already contain entries that reconstruct compliant nested .framework bundles.
def _process_bundle_toc(self, toc):
bundle_toc = []
# Step 1: inspect the directory layout and classify the directories according to their contents.
directory_types = dict()
_MIXED_DIR_TYPE = 'MIXED-DIR'
_DATA_DIR_TYPE = 'DATA-DIR'
_BINARY_DIR_TYPE = 'BINARY-DIR'
_FRAMEWORK_DIR_TYPE = 'FRAMEWORK-DIR'
_TOP_LEVEL_DIR = pathlib.PurePath('.')
for dest_name, src_name, typecode in toc:
dest_path = pathlib.PurePath(dest_name)
framework_dir = self._is_framework_file(dest_path)
if framework_dir:
# Mark the framework directory as FRAMEWORK-DIR.
directory_types[framework_dir] = _FRAMEWORK_DIR_TYPE
# Treat the framework directory as BINARY file when classifying parent directories.
typecode = 'BINARY'
parent_dirs = framework_dir.parents
else:
parent_dirs = dest_path.parents
# Treat BINARY and EXTENSION as BINARY to simplify further processing.
if typecode == 'EXTENSION':
typecode = 'BINARY'
# (Re)classify parent directories
for parent_dir in parent_dirs:
# Skip the top-level `.` dir. This is also the only directory that can contain EXECUTABLE and PKG
# entries, so we do not have to worry about.
if parent_dir == _TOP_LEVEL_DIR:
continue
directory_type = _BINARY_DIR_TYPE if typecode == 'BINARY' else _DATA_DIR_TYPE # default
directory_type = directory_types.get(parent_dir, directory_type)
if directory_type == _DATA_DIR_TYPE and typecode == 'BINARY':
directory_type = _MIXED_DIR_TYPE
if directory_type == _BINARY_DIR_TYPE and typecode == 'DATA':
directory_type = _MIXED_DIR_TYPE
directory_types[parent_dir] = directory_type
logger.debug("Directory classification: %r", directory_types)
# Step 2: process the obtained directory structure and create symlink entries for directories that need to be
# cross-linked. Such directories are data-only and binary-only directories (and framework directories) that are
# located either in the top-level directory (have no parent) or in a mixed-content directory.
for directory_path, directory_type in directory_types.items():
# Cross-linking at directory level applies only to data-only and binary-only directories (as well as
# framework directories).
if directory_type == _MIXED_DIR_TYPE:
continue
# The parent needs to be either top-level directory or a mixed-content directory. Otherwise, the parent
# (or one of its ancestors) will get cross-linked, and we do not need the link here.
parent_dir = directory_path.parent
requires_crosslink = parent_dir == _TOP_LEVEL_DIR or directory_types.get(parent_dir) == _MIXED_DIR_TYPE
if not requires_crosslink:
continue
logger.debug("Cross-linking directory %r of type %r", directory_path, directory_type)
# Data-only directories are created in `Contents/Resources`, needs to be cross-linked into `Contents/MacOS`.
# Vice versa for binary-only or framework directories. The directory creation is handled implicitly, when we
# create parent directory structure for collected files.
if directory_type == _DATA_DIR_TYPE:
symlink_src = os.path.join('Contents/Resources', directory_path)
symlink_dest = os.path.join('Contents/Frameworks', directory_path)
else:
symlink_src = os.path.join('Contents/Frameworks', directory_path)
symlink_dest = os.path.join('Contents/Resources', directory_path)
symlink_ref = self._compute_relative_crosslink(symlink_dest, symlink_src)
bundle_toc.append((symlink_dest, symlink_ref, 'SYMLINK'))
# Step 3: first part of the work-around for directories that are located in `Contents/Frameworks` but contain a
# dot in their name. As per `codesign` rules, the only directories in `Contents/Frameworks` that are allowed to
# contain a dot in their name are .framework bundle directories. So we replace the dot with a custom character
# sequence (stored in global `DOT_REPLACEMENT` variable), and create a symbolic with original name pointing to
# the modified name. This is the best we can do with code-sign requirements vs. python community showing their
# packages' dylibs into `.dylib` subdirectories, or Qt storing their Qml components in directories named
# `QtQuick.2`, `QtQuick/Controls.2`, `QtQuick/Particles.2`, `QtQuick/Templates.2`, etc.
#
# In this step, we only prepare symlink entries that link the original directory name (with dot) to the modified
# one (with dot replaced). The parent paths for collected files are modified in later step(s).
for directory_path, directory_type in directory_types.items():
# .framework bundle directories contain a dot in the name, but are allowed that.
if directory_type == _FRAMEWORK_DIR_TYPE:
continue
# Data-only directories are fully located in `Contents/Resources` and cross-linked to `Contents/Frameworks`
# at directory level, so they are also allowed a dot in their name.
if directory_type == _DATA_DIR_TYPE:
continue
# Apply the work-around, if necessary...
if '.' not in directory_path.name:
continue
logger.debug(
"Creating symlink to work around the dot in the name of directory %r (%s)...", str(directory_path),
directory_type
)
# Create a SYMLINK entry, but only for this level. In case of nested directories with dots in names, the
# symlinks for ancestors will be created by corresponding loop iteration.
bundle_toc.append((
os.path.join('Contents/Frameworks', directory_path),
directory_path.name.replace('.', DOT_REPLACEMENT),
'SYMLINK',
))
# Step 4: process the entries for collected files, and decide whether they should be placed into
# `Contents/MacOS`, `Contents/Frameworks`, or `Contents/Resources`, and whether they should be cross-linked into
# other directories.
for orig_dest_name, src_name, typecode in toc:
orig_dest_path = pathlib.PurePath(orig_dest_name)
# Special handling for EXECUTABLE and PKG entries
if typecode == 'EXECUTABLE':
# Place into `Contents/MacOS`, ...
file_dest = os.path.join('Contents/MacOS', orig_dest_name)
bundle_toc.append((file_dest, src_name, typecode))
# ... and do nothing else. We explicitly avoid cross-linking the executable to `Contents/Frameworks` and
# `Contents/Resources`, because it should be not necessary (the executable's location should be
# discovered via `sys.executable`) and to prevent issues when executable name collides with name of a
# package from which we collect either binaries or data files (or both); see #7314.
continue
elif typecode == 'PKG':
# Place into `Contents/Resources` ...
file_dest = os.path.join('Contents/Resources', orig_dest_name)
bundle_toc.append((file_dest, src_name, typecode))
# ... and cross-link only into `Contents/MacOS`.
# This is used only in `onefile` mode, where there is actually no other content to distribute among the
# `Contents/Resources` and `Contents/Frameworks` directories, so cross-linking into the latter makes
# little sense.
symlink_dest = os.path.join('Contents/MacOS', orig_dest_name)
symlink_ref = self._compute_relative_crosslink(symlink_dest, file_dest)
bundle_toc.append((symlink_dest, symlink_ref, 'SYMLINK'))
continue
# Standard data vs binary processing...
# Determine file location based on its type.
if self._is_framework_file(orig_dest_path):
# File from a framework bundle; put into `Contents/Frameworks`, but never cross-link the file itself.
# The whole .framework bundle directory will be linked as necessary by the directory cross-linking
# mechanism.
file_base_dir = 'Contents/Frameworks'
crosslink_base_dir = None
elif typecode == 'DATA':
# Data file; relocate to `Contents/Resources` and cross-link it back into `Contents/Frameworks`.
file_base_dir = 'Contents/Resources'
crosslink_base_dir = 'Contents/Frameworks'
else:
# Binary; put into `Contents/Frameworks` and cross-link it into `Contents/Resources`.
file_base_dir = 'Contents/Frameworks'
crosslink_base_dir = 'Contents/Resources'
# Determine if we need to cross-link the file. We need to do this for top-level files (the ones without
# parent directories), and for files whose parent directories are mixed-content directories.
requires_crosslink = False
if crosslink_base_dir is not None:
parent_dir = orig_dest_path.parent
requires_crosslink = parent_dir == _TOP_LEVEL_DIR or directory_types.get(parent_dir) == _MIXED_DIR_TYPE
# Special handling for SYMLINK entries in original TOC; if we need to cross-link a symlink entry, we create
# it in both locations, and have each point to the (relative) resource in the same directory (so one of the
# targets will likely be a file, and the other will be a symlink due to cross-linking).
if typecode == 'SYMLINK' and requires_crosslink:
bundle_toc.append((os.path.join(file_base_dir, orig_dest_name), src_name, typecode))
bundle_toc.append((os.path.join(crosslink_base_dir, orig_dest_name), src_name, typecode))
continue
# The file itself.
file_dest = os.path.join(file_base_dir, orig_dest_name)
bundle_toc.append((file_dest, src_name, typecode))
# Symlink for cross-linking
if requires_crosslink:
symlink_dest = os.path.join(crosslink_base_dir, orig_dest_name)
symlink_ref = self._compute_relative_crosslink(symlink_dest, file_dest)
bundle_toc.append((symlink_dest, symlink_ref, 'SYMLINK'))
# Step 5: sanitize all destination paths in the new TOC, to ensure that paths that are rooted in
# `Contents/Frameworks` do not contain directories with dots in their names. Doing this as a post-processing
# step keeps code simple and clean and ensures that this step is applied to files, symlinks that originate from
# cross-linking files, and symlinks that originate from cross-linking directories. This in turn ensures that
# all directory hierarchies created during the actual file collection have sanitized names, and that collection
# outcome does not depend on the order of entries in the TOC.
sanitized_toc = []
for dest_name, src_name, typecode in bundle_toc:
dest_path = pathlib.PurePath(dest_name)
# Paths rooted in Contents/Resources do not require sanitizing.
if dest_path.parts[0] == 'Contents' and dest_path.parts[1] == 'Resources':
sanitized_toc.append((dest_name, src_name, typecode))
continue
# Special handling for files from .framework bundle directories; sanitize only parent path of the .framework
# directory.
framework_path = self._is_framework_file(dest_path)
if framework_path:
parent_path = framework_path.parent
remaining_path = dest_path.relative_to(parent_path)
else:
parent_path = dest_path.parent
remaining_path = dest_path.name
sanitized_dest_path = pathlib.PurePath(
*parent_path.parts[:2], # Contents/Frameworks
*[part.replace('.', DOT_REPLACEMENT) for part in parent_path.parts[2:]],
remaining_path,
)
sanitized_dest_name = str(sanitized_dest_path)
if sanitized_dest_path != dest_path:
logger.debug("Sanitizing dest path: %r -> %r", dest_name, sanitized_dest_name)
sanitized_toc.append((sanitized_dest_name, src_name, typecode))
bundle_toc = sanitized_toc
# Normalize and sort the TOC for easier inspection
bundle_toc = sorted(normalize_toc(bundle_toc))
return bundle_toc
def assemble(self):
from PyInstaller.config import CONF
if _check_path_overlap(self.name) and os.path.isdir(self.name):
_rmtree(self.name)
logger.info("Building BUNDLE %s", self.tocbasename)
# Create a minimal Mac bundle structure.
os.makedirs(os.path.join(self.name, "Contents", "MacOS"))
os.makedirs(os.path.join(self.name, "Contents", "Resources"))
os.makedirs(os.path.join(self.name, "Contents", "Frameworks"))
# Makes sure the icon exists and attempts to convert to the proper format if applicable
self.icon = normalize_icon_type(self.icon, ("icns",), "icns", CONF["workpath"])
# Ensure icon path is absolute
self.icon = os.path.abspath(self.icon)
# Copy icns icon to Resources directory.
shutil.copyfile(self.icon, os.path.join(self.name, 'Contents', 'Resources', os.path.basename(self.icon)))
# Key/values for a minimal Info.plist file
info_plist_dict = {
"CFBundleDisplayName": self.appname,
"CFBundleName": self.appname,
# Required by 'codesign' utility.
# The value for CFBundleIdentifier is used as the default unique name of your program for Code Signing
# purposes. It even identifies the APP for access to restricted OS X areas like Keychain.
#
# The identifier used for signing must be globally unique. The usual form for this identifier is a
# hierarchical name in reverse DNS notation, starting with the toplevel domain, followed by the company
# name, followed by the department within the company, and ending with the product name. Usually in the
# form: com.mycompany.department.appname
# CLI option --osx-bundle-identifier sets this value.
"CFBundleIdentifier": self.bundle_identifier,
"CFBundleExecutable": os.path.basename(self.exename),
"CFBundleIconFile": os.path.basename(self.icon),
"CFBundleInfoDictionaryVersion": "6.0",
"CFBundlePackageType": "APPL",
"CFBundleShortVersionString": self.version,
}
# Set some default values. But they still can be overwritten by the user.
if self.console:
# Setting EXE console=True implies LSBackgroundOnly=True.
info_plist_dict['LSBackgroundOnly'] = True
else:
# Let's use high resolution by default.
info_plist_dict['NSHighResolutionCapable'] = True
# Merge info_plist settings from spec file
if isinstance(self.info_plist, dict) and self.info_plist:
info_plist_dict.update(self.info_plist)
plist_filename = os.path.join(self.name, "Contents", "Info.plist")
with open(plist_filename, "wb") as plist_fh:
plistlib.dump(info_plist_dict, plist_fh)
# Pre-process the TOC into its final BUNDLE-compatible form.
bundle_toc = self._process_bundle_toc(self.toc)
# Perform the actual collection.
CONTENTS_FRAMEWORKS_PATH = pathlib.PurePath('Contents/Frameworks')
for dest_name, src_name, typecode in bundle_toc:
# Create parent directory structure, if necessary
dest_path = os.path.join(self.name, dest_name) # Absolute destination path
dest_dir = os.path.dirname(dest_path)
try:
os.makedirs(dest_dir, exist_ok=True)
except FileExistsError:
raise SystemExit(
f"Pyinstaller needs to create a directory at {dest_dir!r}, "
"but there already exists a file at that path!"
)
# Copy extensions and binaries from cache. This ensures that these files undergo additional binary
# processing - have paths to linked libraries rewritten (relative to `@rpath`) and have rpath set to the
# top-level directory (relative to `@loader_path`, i.e., the file's location). The "top-level" directory
# in this case corresponds to `Contents/MacOS` (where `sys._MEIPASS` also points), so we need to pass
# the cache retrieval function the *original* destination path (which is without preceding
# `Contents/MacOS`).
if typecode in ('EXTENSION', 'BINARY'):
orig_dest_name = str(pathlib.PurePath(dest_name).relative_to(CONTENTS_FRAMEWORKS_PATH))
src_name = process_collected_binary(
src_name,
orig_dest_name,
use_strip=self.strip,
use_upx=self.upx,
upx_exclude=self.upx_exclude,
target_arch=self.target_arch,
codesign_identity=self.codesign_identity,
entitlements_file=self.entitlements_file,
strict_arch_validation=(typecode == 'EXTENSION'),
)
if typecode == 'SYMLINK':
os.symlink(src_name, dest_path) # Create link at dest_path, pointing at (relative) src_name
else:
# BUNDLE does not support MERGE-based multipackage
assert typecode != 'DEPENDENCY', "MERGE DEPENDENCY entries are not supported in BUNDLE!"
# At this point, `src_name` should be a valid file.
if not os.path.isfile(src_name):
raise ValueError(f"Resource {src_name!r} is not a valid file!")
# If strict collection mode is enabled, the destination should not exist yet.
if strict_collect_mode and os.path.exists(dest_path):
raise ValueError(
f"Attempting to collect a duplicated file into BUNDLE: {dest_name} (type: {typecode})"
)
# Use `shutil.copyfile` to copy file with default permissions. We do not attempt to preserve original
# permissions nor metadata, as they might be too restrictive and cause issues either during subsequent
# re-build attempts or when trying to move the application bundle. For binaries (and data files with
# executable bit set), we manually set the executable bits after copying the file.
shutil.copyfile(src_name, dest_path)
if (
typecode in ('EXTENSION', 'BINARY', 'EXECUTABLE')
or (typecode == 'DATA' and os.access(src_name, os.X_OK))
):
os.chmod(dest_path, 0o755)
# Sign the bundle
logger.info('Signing the BUNDLE...')
try:
osxutils.sign_binary(self.name, self.codesign_identity, self.entitlements_file, deep=True)
except Exception as e:
# Display a warning or re-raise the error, depending on the environment-variable setting.
if os.environ.get("PYINSTALLER_STRICT_BUNDLE_CODESIGN_ERROR", "0") == "0":
logger.warning("Error while signing the bundle: %s", e)
logger.warning("You will need to sign the bundle manually!")
else:
raise RuntimeError("Failed to codesign the bundle!") from e
logger.info("Building BUNDLE %s completed successfully.", self.tocbasename)
# Optionally verify bundle's signature. This is primarily intended for our CI.
if os.environ.get("PYINSTALLER_VERIFY_BUNDLE_SIGNATURE", "0") != "0":
logger.info("Verifying signature for BUNDLE %s...", self.name)
self.verify_bundle_signature(self.name)
logger.info("BUNDLE verification complete!")
@staticmethod
def verify_bundle_signature(bundle_dir):
# First, verify the bundle signature using codesign.
cmd_args = ['codesign', '--verify', '--all-architectures', '--deep', '--strict', bundle_dir]
p = subprocess.run(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8')
if p.returncode:
raise SystemError(
f"codesign command ({cmd_args}) failed with error code {p.returncode}!\noutput: {p.stdout}"
)
# Ensure that code-signing information is *NOT* embedded in the files' extended attributes.
#
# This happens when files other than binaries are present in `Contents/MacOS` or `Contents/Frameworks`
# directory; as the signature cannot be embedded within the file itself (contrary to binaries with
# `LC_CODE_SIGNATURE` section in their header), it ends up stores in the file's extended attributes. However,
# if such bundle is transferred using a method that does not support extended attributes (for example, a zip
# file), the signatures on these files are lost, and the signature of the bundle as a whole becomes invalid.
# This is the primary reason why we need to relocate non-binaries into `Contents/Resources` - the signatures
# for files in that directory end up stored in `Contents/_CodeSignature/CodeResources` file.
#
# This check therefore aims to ensure that all files have been properly relocated to their corresponding
# locations w.r.t. the code-signing requirements.
try:
import xattr
except ModuleNotFoundError:
logger.info("xattr package not available; skipping verification of extended attributes!")
return
CODESIGN_ATTRS = (
"com.apple.cs.CodeDirectory",
"com.apple.cs.CodeRequirements",
"com.apple.cs.CodeRequirements-1",
"com.apple.cs.CodeSignature",
)
for entry in pathlib.Path(bundle_dir).rglob("*"):
if not entry.is_file():
continue
file_attrs = xattr.listxattr(entry)
if any([codesign_attr in file_attrs for codesign_attr in CODESIGN_ATTRS]):
raise ValueError(f"Code-sign attributes found in extended attributes of {str(entry)!r}!")

View File

@ -0,0 +1,468 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
# -----------------------------------------------------------------------------
import io
import os
import re
import struct
import pathlib
from PyInstaller import log as logging
from PyInstaller.archive.writers import SplashWriter
from PyInstaller.building import splash_templates
from PyInstaller.building.datastruct import Target
from PyInstaller.building.utils import _check_guts_eq, _check_guts_toc, misc
from PyInstaller.compat import is_darwin
from PyInstaller.depend import bindepend
from PyInstaller.utils.hooks import tcl_tk as tcltk_utils
try:
from PIL import Image as PILImage
except ImportError:
PILImage = None
logger = logging.getLogger(__name__)
# These requirement files are checked against the current splash screen script. If you wish to modify the splash screen
# and run into tcl errors/bad behavior, this is a good place to start and add components your implementation of the
# splash screen might use.
# NOTE: these paths use the *destination* layout for Tcl/Tk scripts, which uses unversioned tcl and tk directories
# (see `PyInstaller.utils.hooks.tcl_tk.collect_tcl_tk_files`).
splash_requirements = [
# prepended tcl/tk binaries
os.path.join(tcltk_utils.TK_ROOTNAME, "license.terms"),
os.path.join(tcltk_utils.TK_ROOTNAME, "text.tcl"),
os.path.join(tcltk_utils.TK_ROOTNAME, "tk.tcl"),
# Used for customizable font
os.path.join(tcltk_utils.TK_ROOTNAME, "ttk", "ttk.tcl"),
os.path.join(tcltk_utils.TK_ROOTNAME, "ttk", "fonts.tcl"),
os.path.join(tcltk_utils.TK_ROOTNAME, "ttk", "cursors.tcl"),
os.path.join(tcltk_utils.TK_ROOTNAME, "ttk", "utils.tcl"),
]
class Splash(Target):
"""
Bundles the required resources for the splash screen into a file, which will be included in the CArchive.
A Splash has two outputs, one is itself and one is stored in splash.binaries. Both need to be passed to other
build targets in order to enable the splash screen.
"""
def __init__(self, image_file, binaries, datas, **kwargs):
"""
:param str image_file:
A path-like object to the image to be used. Only the PNG file format is supported.
.. note:: If a different file format is supplied and PIL (Pillow) is installed, the file will be converted
automatically.
.. note:: *Windows*: The color ``'magenta'`` / ``'#ff00ff'`` must not be used in the image or text, as it is
used by splash screen to indicate transparent areas. Use a similar color (e.g., ``'#ff00fe'``) instead.
.. note:: If PIL (Pillow) is installed and the image is bigger than max_img_size, the image will be resized
to fit into the specified area.
:param list binaries:
The TOC list of binaries the Analysis build target found. This TOC includes all extension modules and their
binary dependencies. This is required to determine whether the user's program uses `tkinter`.
:param list datas:
The TOC list of data the Analysis build target found. This TOC includes all data-file dependencies of the
modules. This is required to check if all splash screen requirements can be bundled.
:keyword text_pos:
An optional two-integer tuple that represents the origin of the text on the splash screen image. The
origin of the text is its lower left corner. A unit in the respective coordinate system is a pixel of the
image, its origin lies in the top left corner of the image. This parameter also acts like a switch for
the text feature. If omitted, no text will be displayed on the splash screen. This text will be used to
show textual progress in onefile mode.
:type text_pos: Tuple[int, int]
:keyword text_size:
The desired size of the font. If the size argument is a positive number, it is interpreted as a size in
points. If size is a negative number, its absolute value is interpreted as a size in pixels. Default: ``12``
:type text_size: int
:keyword text_font:
An optional name of a font for the text. This font must be installed on the user system, otherwise the
system default font is used. If this parameter is omitted, the default font is also used.
:keyword text_color:
An optional color for the text. HTML color codes (``'#40e0d0'``) and color names (``'turquoise'``) are
supported. Default: ``'black'``
(Windows: the color ``'magenta'`` / ``'#ff00ff'`` is used to indicate transparency, and should not be used)
:type text_color: str
:keyword text_default:
The default text which will be displayed before the extraction starts. Default: ``"Initializing"``
:type text_default: str
:keyword full_tk:
By default Splash bundles only the necessary files for the splash screen (some tk components). This
options enables adding full tk and making it a requirement, meaning all tk files will be unpacked before
the splash screen can be started. This is useful during development of the splash screen script.
Default: ``False``
:type full_tk: bool
:keyword minify_script:
The splash screen is created by executing an Tcl/Tk script. This option enables minimizing the script,
meaning removing all non essential parts from the script. Default: ``True``
:keyword rundir:
The folder name in which tcl/tk will be extracted at runtime. There should be no matching folder in your
application to avoid conflicts. Default: ``'__splash'``
:type rundir: str
:keyword name:
An optional alternative filename for the .res file. If not specified, a name is generated.
:type name: str
:keyword script_name:
An optional alternative filename for the Tcl script, that will be generated. If not specified, a name is
generated.
:type script_name: str
:keyword max_img_size:
Maximum size of the splash screen image as a tuple. If the supplied image exceeds this limit, it will be
resized to fit the maximum width (to keep the original aspect ratio). This option can be disabled by
setting it to None. Default: ``(760, 480)``
:type max_img_size: Tuple[int, int]
:keyword always_on_top:
Force the splashscreen to be always on top of other windows. If disabled, other windows (e.g., from other
applications) can cover the splash screen by user bringing them to front. This might be useful for
frozen applications with long startup times. Default: ``True``
:type always_on_top: bool
"""
from ..config import CONF
Target.__init__(self)
# Splash screen is not supported on macOS. It operates in a secondary thread and macOS disallows UI operations
# in any thread other than main.
if is_darwin:
raise SystemExit("Splash screen is not supported on macOS.")
# Make image path relative to .spec file
if not os.path.isabs(image_file):
image_file = os.path.join(CONF['specpath'], image_file)
image_file = os.path.normpath(image_file)
if not os.path.exists(image_file):
raise ValueError("Image file '%s' not found" % image_file)
# Copy all arguments
self.image_file = image_file
self.full_tk = kwargs.get("full_tk", False)
self.name = kwargs.get("name", None)
self.script_name = kwargs.get("script_name", None)
self.minify_script = kwargs.get("minify_script", True)
self.rundir = kwargs.get("rundir", None)
self.max_img_size = kwargs.get("max_img_size", (760, 480))
# text options
self.text_pos = kwargs.get("text_pos", None)
self.text_size = kwargs.get("text_size", 12)
self.text_font = kwargs.get("text_font", "TkDefaultFont")
self.text_color = kwargs.get("text_color", "black")
self.text_default = kwargs.get("text_default", "Initializing")
# always-on-top behavior
self.always_on_top = kwargs.get("always_on_top", True)
# Save the generated file separately so that it is not necessary to generate the data again and again
root = os.path.splitext(self.tocfilename)[0]
if self.name is None:
self.name = root + '.res'
if self.script_name is None:
self.script_name = root + '_script.tcl'
if self.rundir is None:
self.rundir = self._find_rundir(binaries + datas)
# Internal variables
try:
# Do not import _tkinter at the toplevel, because on some systems _tkinter will fail to load, since it is
# not installed. This would cause a runtime error in PyInstaller, since this module is imported from
# build_main.py, instead we just want to inform the user that the splash screen feature is not supported on
# his platform
import _tkinter
self._tkinter_module = _tkinter
self._tkinter_file = self._tkinter_module.__file__
except ModuleNotFoundError:
raise SystemExit(
"Your platform does not support the splash screen feature, since tkinter is not installed. Please "
"install tkinter and try again."
)
# Calculated / analysed values
self.uses_tkinter = self._uses_tkinter(self._tkinter_file, binaries)
logger.debug("Program uses tkinter: %r", self.uses_tkinter)
self.script = self.generate_script()
self.tcl_lib, self.tk_lib = tcltk_utils.find_tcl_tk_shared_libs(self._tkinter_file)
if is_darwin:
# Outdated Tcl/Tk 8.5 system framework is not supported. Depending on macOS version, the library path will
# come up empty (hidden system libraries on Big Sur), or will be
# [/System]/Library/Frameworks/Tcl.framework/Tcl
if self.tcl_lib[1] is None or 'Library/Frameworks/Tcl.framework' in self.tcl_lib[1]:
raise SystemExit("The splash screen feature does not support macOS system framework version of Tcl/Tk.")
# Check if tcl/tk was found
assert all(self.tcl_lib)
assert all(self.tk_lib)
logger.debug("Use Tcl Library from %s and Tk From %s", self.tcl_lib, self.tk_lib)
self.splash_requirements = set([self.tcl_lib[0], self.tk_lib[0]] + splash_requirements)
logger.info("Collect tcl/tk binaries for the splash screen")
tcltk_tree = tcltk_utils.collect_tcl_tk_files(self._tkinter_file)
if self.full_tk:
# The user wants a full copy of tk, so make all tk files a requirement.
self.splash_requirements.update(entry[0] for entry in tcltk_tree)
# Scan for binary dependencies of the Tcl/Tk shared libraries, and add them to `binaries` TOC list (which
# should really be called `dependencies` as it is not limited to binaries. But it is too late now, and
# existing spec files depend on this naming). We specify these binary dependencies (which include the
# Tcl and Tk shared libaries themselves) even if the user's program uses tkinter and they would be collected
# anyway; let the collection mechanism deal with potential duplicates.
tcltk_libs = [(dest_name, src_name, 'BINARY') for dest_name, src_name in (self.tcl_lib, self.tk_lib)]
self.binaries = bindepend.binary_dependency_analysis(tcltk_libs)
# Put all shared library dependencies in `splash_requirements`, so they are made available in onefile mode.
self.splash_requirements.update(entry[0] for entry in self.binaries)
# If the user's program does not use tkinter, add resources from Tcl/Tk tree to the dependencies list.
# Do so only for the resources that are part of splash requirements.
if not self.uses_tkinter:
self.binaries.extend(entry for entry in tcltk_tree if entry[0] in self.splash_requirements)
# Check if all requirements were found.
collected_files = set(entry[0] for entry in (binaries + datas + self.binaries))
def _filter_requirement(filename):
if filename not in collected_files:
# Item is not bundled, so warn the user about it. This actually may happen on some tkinter installations
# that are missing the license.terms file.
logger.warning(
"The local Tcl/Tk installation is missing the file %s. The behavior of the splash screen is "
"therefore undefined and may be unsupported.", filename
)
return False
return True
# Remove all files which were not found.
self.splash_requirements = set(filter(_filter_requirement, self.splash_requirements))
# Test if the tcl/tk version is supported by the bootloader.
self.test_tk_version()
logger.debug("Splash Requirements: %s", self.splash_requirements)
self.__postinit__()
_GUTS = (
# input parameters
('image_file', _check_guts_eq),
('name', _check_guts_eq),
('script_name', _check_guts_eq),
('text_pos', _check_guts_eq),
('text_size', _check_guts_eq),
('text_font', _check_guts_eq),
('text_color', _check_guts_eq),
('text_default', _check_guts_eq),
('always_on_top', _check_guts_eq),
('full_tk', _check_guts_eq),
('minify_script', _check_guts_eq),
('rundir', _check_guts_eq),
('max_img_size', _check_guts_eq),
# calculated/analysed values
('uses_tkinter', _check_guts_eq),
('script', _check_guts_eq),
('tcl_lib', _check_guts_eq),
('tk_lib', _check_guts_eq),
('splash_requirements', _check_guts_eq),
('binaries', _check_guts_toc),
# internal value
# Check if the tkinter installation changed. This is theoretically possible if someone uses two different python
# installations of the same version.
('_tkinter_file', _check_guts_eq),
)
def _check_guts(self, data, last_build):
if Target._check_guts(self, data, last_build):
return True
# Check if the image has been modified.
if misc.mtime(self.image_file) > last_build:
logger.info("Building %s because file %s changed", self.tocbasename, self.image_file)
return True
return False
def assemble(self):
logger.info("Building Splash %s", self.name)
# Function to resize a given image to fit into the area defined by max_img_size.
def _resize_image(_image, _orig_size):
if PILImage:
_w, _h = _orig_size
_ratio_w = self.max_img_size[0] / _w
if _ratio_w < 1:
# Image width exceeds limit
_h = int(_h * _ratio_w)
_w = self.max_img_size[0]
_ratio_h = self.max_img_size[1] / _h
if _ratio_h < 1:
# Image height exceeds limit
_w = int(_w * _ratio_h)
_h = self.max_img_size[1]
# If a file is given it will be open
if isinstance(_image, PILImage.Image):
_img = _image
else:
_img = PILImage.open(_image)
_img_resized = _img.resize((_w, _h))
# Save image into a stream
_image_stream = io.BytesIO()
_img_resized.save(_image_stream, format='PNG')
_img.close()
_img_resized.close()
_image_data = _image_stream.getvalue()
logger.info("Resized image %s from dimensions %s to (%d, %d)", self.image_file, str(_orig_size), _w, _h)
return _image_data
else:
raise ValueError(
"The splash image dimensions (w: %d, h: %d) exceed max_img_size (w: %d, h:%d), but the image "
"cannot be resized due to missing PIL.Image! Either install the Pillow package, adjust the "
"max_img_size, or use an image of compatible dimensions.", _orig_size[0], _orig_size[1],
self.max_img_size[0], self.max_img_size[1]
)
# Open image file
image_file = open(self.image_file, 'rb')
# Check header of the file to identify it
if image_file.read(8) == b'\x89PNG\r\n\x1a\n':
# self.image_file is a PNG file
image_file.seek(16)
img_size = (struct.unpack("!I", image_file.read(4))[0], struct.unpack("!I", image_file.read(4))[0])
if img_size > self.max_img_size:
# The image exceeds the maximum image size, so resize it
image = _resize_image(self.image_file, img_size)
else:
image = os.path.abspath(self.image_file)
elif PILImage:
# Pillow is installed, meaning the image can be converted automatically
img = PILImage.open(self.image_file, mode='r')
if img.size > self.max_img_size:
image = _resize_image(img, img.size)
else:
image_data = io.BytesIO()
img.save(image_data, format='PNG')
img.close()
image = image_data.getvalue()
logger.info("Converted image %s to PNG format", self.image_file)
else:
raise ValueError(
"The image %s needs to be converted to a PNG file, but PIL.Image is not available! Either install the "
"Pillow package, or use a PNG image for you splash screen.", self.image_file
)
image_file.close()
SplashWriter(
self.name,
self.splash_requirements,
self.tcl_lib[0], # tcl86t.dll
self.tk_lib[0], # tk86t.dll
tcltk_utils.TK_ROOTNAME,
self.rundir,
image,
self.script
)
def test_tk_version(self):
tcl_version = float(self._tkinter_module.TCL_VERSION)
tk_version = float(self._tkinter_module.TK_VERSION)
# Test if tcl/tk version is supported
if tcl_version < 8.6 or tk_version < 8.6:
logger.warning(
"The installed Tcl/Tk (%s/%s) version might not work with the splash screen feature of the bootloader. "
"The bootloader is tested against Tcl/Tk 8.6", self._tkinter_module.TCL_VERSION,
self._tkinter_module.TK_VERSION
)
# This should be impossible, since tcl/tk is released together with the same version number, but just in case
if tcl_version != tk_version:
logger.warning(
"The installed version of Tcl (%s) and Tk (%s) do not match. PyInstaller is tested against matching "
"versions", self._tkinter_module.TCL_VERSION, self._tkinter_module.TK_VERSION
)
# Ensure that Tcl is built with multi-threading support.
if not tcltk_utils.tcl_threaded:
# This is a feature breaking problem, so exit.
raise SystemExit(
"The installed tcl version is not threaded. PyInstaller only supports the splash screen "
"using threaded tcl."
)
def generate_script(self):
"""
Generate the script for the splash screen.
If minify_script is True, all unnecessary parts will be removed.
"""
d = {}
if self.text_pos is not None:
logger.debug("Add text support to splash screen")
d.update({
'pad_x': self.text_pos[0],
'pad_y': self.text_pos[1],
'color': self.text_color,
'font': self.text_font,
'font_size': self.text_size,
'default_text': self.text_default,
})
script = splash_templates.build_script(text_options=d, always_on_top=self.always_on_top)
if self.minify_script:
# Remove any documentation, empty lines and unnecessary spaces
script = '\n'.join(
line for line in map(lambda line: line.strip(), script.splitlines())
if not line.startswith('#') # documentation
and line # empty lines
)
# Remove unnecessary spaces
script = re.sub(' +', ' ', script)
# Write script to disk, so that it is transparent to the user what script is executed.
with open(self.script_name, "w", encoding="utf-8") as script_file:
script_file.write(script)
return script
@staticmethod
def _uses_tkinter(tkinter_file, binaries):
# Test for _tkinter extension instead of tkinter module, because user might use a different wrapping library for
# Tk. Use `pathlib.PurePath˙ in comparisons to account for case normalization and separator normalization.
tkinter_file = pathlib.PurePath(tkinter_file)
for dest_name, src_name, typecode in binaries:
if pathlib.PurePath(src_name) == tkinter_file:
return True
return False
@staticmethod
def _find_rundir(structure):
# First try a name the user could understand, if one would find the directory.
rundir = '__splash%s'
candidate = rundir % ""
counter = 0
# Run this loop as long as a folder exist named like rundir. In most cases __splash will be sufficient and this
# loop won't enter.
while any(e[0].startswith(candidate + os.sep) for e in structure):
# just append to rundir a counter
candidate = rundir % str(counter)
counter += 1
# The SPLASH_DATA_HEADER structure limits the name to be 16 bytes at maximum. So if we exceed the limit
# raise an error. This will never happen, since there are 10^8 different possibilities, but just in case.
assert len(candidate) <= 16
return candidate

View File

@ -0,0 +1,229 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
# -----------------------------------------------------------------------------
"""
Templates for the splash screen tcl script.
"""
from PyInstaller.compat import is_cygwin, is_darwin, is_win
ipc_script = r"""
proc _ipc_server {channel clientaddr clientport} {
# This function is called if a new client connects to
# the server. This creates a channel, which calls
# _ipc_caller if data was send through the connection
set client_name [format <%s:%d> $clientaddr $clientport]
chan configure $channel \
-buffering none \
-encoding utf-8 \
-eofchar \x04 \
-translation cr
chan event $channel readable [list _ipc_caller $channel $client_name]
}
proc _ipc_caller {channel client_name} {
# This function is called if a command was sent through
# the tcp connection. The current implementation supports
# two commands: update_text and exit, although exit
# is implemented to be called if the connection gets
# closed (from python) or the character 0x04 was received
chan gets $channel cmd
if {[chan eof $channel]} {
# This is entered if either the connection was closed
# or the char 0x04 was send
chan close $channel
exit
} elseif {![chan blocked $channel]} {
# RPC methods
# update_text command
if {[string match "update_text*" $cmd]} {
global status_text
set first [expr {[string first "(" $cmd] + 1}]
set last [expr {[string last ")" $cmd] - 1}]
set status_text [string range $cmd $first $last]
}
# Implement other procedures here
}
}
# By setting the port to 0 the os will assign a free port
set server_socket [socket -server _ipc_server -myaddr localhost 0]
set server_port [fconfigure $server_socket -sockname]
# This environment variable is shared between the python and the tcl
# interpreter and publishes the port the tcp server socket is available
set env(_PYIBoot_SPLASH) [lindex $server_port 2]
"""
image_script = r"""
# The variable $_image_data, which holds the data for the splash
# image is created by the bootloader.
image create photo splash_image
splash_image put $_image_data
# delete the variable, because the image now holds the data
unset _image_data
proc canvas_text_update {canvas tag _var - -} {
# This function is rigged to be called if the a variable
# status_text gets changed. This updates the text on
# the canvas
upvar $_var var
$canvas itemconfigure $tag -text $var
}
"""
splash_canvas_setup = r"""
package require Tk
set image_width [image width splash_image]
set image_height [image height splash_image]
set display_width [winfo screenwidth .]
set display_height [winfo screenheight .]
set x_position [expr {int(0.5*($display_width - $image_width))}]
set y_position [expr {int(0.5*($display_height - $image_height))}]
# Toplevel frame in which all widgets should be positioned
frame .root
# Configure the canvas on which the splash
# screen will be drawn
canvas .root.canvas \
-width $image_width \
-height $image_height \
-borderwidth 0 \
-highlightthickness 0
# Draw the image into the canvas, filling it.
.root.canvas create image \
[expr {$image_width / 2}] \
[expr {$image_height / 2}] \
-image splash_image
"""
splash_canvas_text = r"""
# Create a text on the canvas, which tracks the local
# variable status_text. status_text is changed via C to
# update the progress on the splash screen.
# We cannot use the default label, because it has a
# default background, which cannot be turned transparent
.root.canvas create text \
%(pad_x)d \
%(pad_y)d \
-fill %(color)s \
-justify center \
-font myFont \
-tag vartext \
-anchor sw
trace variable status_text w \
[list canvas_text_update .root.canvas vartext]
set status_text "%(default_text)s"
"""
splash_canvas_default_font = r"""
font create myFont {*}[font actual TkDefaultFont]
font configure myFont -size %(font_size)d
"""
splash_canvas_custom_font = r"""
font create myFont -family %(font)s -size %(font_size)d
"""
if is_win or is_cygwin:
transparent_setup = r"""
# If the image is transparent, the background will be filled
# with magenta. The magenta background is later replaced with transparency.
# Here is the limitation of this implementation, that only
# sharp transparent image corners are possible
wm attributes . -transparentcolor magenta
.root.canvas configure -background magenta
"""
elif is_darwin:
# This is untested, but should work following: https://stackoverflow.com/a/44296157/5869139
transparent_setup = r"""
wm attributes . -transparent 1
. configure -background systemTransparent
.root.canvas configure -background systemTransparent
"""
else:
# For Linux there is no common way to create a transparent window
transparent_setup = r""
pack_widgets = r"""
# Position all widgets in the window
pack .root
grid .root.canvas -column 0 -row 0 -columnspan 1 -rowspan 2
"""
# Enable always-on-top behavior, by setting overrideredirect and the topmost attribute.
position_window_on_top = r"""
# Set position and mode of the window - always-on-top behavior
wm overrideredirect . 1
wm geometry . +${x_position}+${y_position}
wm attributes . -topmost 1
"""
# Disable always-on-top behavior
if is_win or is_cygwin or is_darwin:
# On Windows, we disable the always-on-top behavior while still setting overrideredirect
# (to disable window decorations), but set topmost attribute to 0.
position_window = r"""
# Set position and mode of the window
wm overrideredirect . 1
wm geometry . +${x_position}+${y_position}
wm attributes . -topmost 0
"""
else:
# On Linux, we must not use overrideredirect; instead, we set X11-specific type attribute to splash,
# which lets the window manager to properly handle the splash screen (without window decorations
# but allowing other windows to be brought to front).
position_window = r"""
# Set position and mode of the window
wm geometry . +${x_position}+${y_position}
wm attributes . -type splash
"""
raise_window = r"""
raise .
"""
def build_script(text_options=None, always_on_top=False):
"""
This function builds the tcl script for the splash screen.
"""
# Order is important!
script = [
ipc_script,
image_script,
splash_canvas_setup,
]
if text_options:
# If the default font is used we need a different syntax
if text_options['font'] == "TkDefaultFont":
script.append(splash_canvas_default_font % text_options)
else:
script.append(splash_canvas_custom_font % text_options)
script.append(splash_canvas_text % text_options)
script.append(transparent_setup)
script.append(pack_widgets)
script.append(position_window_on_top if always_on_top else position_window)
script.append(raise_window)
return '\n'.join(script)

View File

@ -0,0 +1,124 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Templates to generate .spec files.
"""
onefiletmplt = """# -*- mode: python ; coding: utf-8 -*-
%(preamble)s
a = Analysis(
%(scripts)s,
pathex=%(pathex)s,
binaries=%(binaries)s,
datas=%(datas)s,
hiddenimports=%(hiddenimports)s,
hookspath=%(hookspath)r,
hooksconfig={},
runtime_hooks=%(runtime_hooks)r,
excludes=%(excludes)s,
noarchive=%(noarchive)s,
)
pyz = PYZ(a.pure)
%(splash_init)s
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,%(splash_target)s%(splash_binaries)s
%(options)s,
name='%(name)s',
debug=%(debug_bootloader)s,
bootloader_ignore_signals=%(bootloader_ignore_signals)s,
strip=%(strip)s,
upx=%(upx)s,
upx_exclude=%(upx_exclude)s,
runtime_tmpdir=%(runtime_tmpdir)r,
console=%(console)s,
disable_windowed_traceback=%(disable_windowed_traceback)s,
argv_emulation=%(argv_emulation)r,
target_arch=%(target_arch)r,
codesign_identity=%(codesign_identity)r,
entitlements_file=%(entitlements_file)r,%(exe_options)s
)
"""
onedirtmplt = """# -*- mode: python ; coding: utf-8 -*-
%(preamble)s
a = Analysis(
%(scripts)s,
pathex=%(pathex)s,
binaries=%(binaries)s,
datas=%(datas)s,
hiddenimports=%(hiddenimports)s,
hookspath=%(hookspath)r,
hooksconfig={},
runtime_hooks=%(runtime_hooks)r,
excludes=%(excludes)s,
noarchive=%(noarchive)s,
)
pyz = PYZ(a.pure)
%(splash_init)s
exe = EXE(
pyz,
a.scripts,%(splash_target)s
%(options)s,
exclude_binaries=True,
name='%(name)s',
debug=%(debug_bootloader)s,
bootloader_ignore_signals=%(bootloader_ignore_signals)s,
strip=%(strip)s,
upx=%(upx)s,
console=%(console)s,
disable_windowed_traceback=%(disable_windowed_traceback)s,
argv_emulation=%(argv_emulation)r,
target_arch=%(target_arch)r,
codesign_identity=%(codesign_identity)r,
entitlements_file=%(entitlements_file)r,%(exe_options)s
)
coll = COLLECT(
exe,
a.binaries,
a.datas,%(splash_binaries)s
strip=%(strip)s,
upx=%(upx)s,
upx_exclude=%(upx_exclude)s,
name='%(name)s',
)
"""
bundleexetmplt = """app = BUNDLE(
exe,
name='%(name)s.app',
icon=%(icon)s,
bundle_identifier=%(bundle_identifier)s,
)
"""
bundletmplt = """app = BUNDLE(
coll,
name='%(name)s.app',
icon=%(icon)s,
bundle_identifier=%(bundle_identifier)s,
)
"""
splashtmpl = """splash = Splash(
%(splash_image)r,
binaries=a.binaries,
datas=a.datas,
text_pos=None,
text_size=12,
minify_script=True,
always_on_top=True,
)
"""

View File

@ -0,0 +1,756 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
import fnmatch
import glob
import hashlib
import marshal
import os
import pathlib
import platform
import py_compile
import shutil
import struct
import subprocess
import sys
import zipfile
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.compat import (EXTENSION_SUFFIXES, is_darwin, is_win)
from PyInstaller.config import CONF
from PyInstaller.exceptions import InvalidSrcDestTupleError
from PyInstaller.utils import misc
if is_win:
from PyInstaller.utils.win32 import versioninfo
if is_darwin:
import PyInstaller.utils.osx as osxutils
logger = logging.getLogger(__name__)
# -- Helpers for checking guts.
#
# NOTE: by _GUTS it is meant intermediate files and data structures that PyInstaller creates for bundling files and
# creating final executable.
def _check_guts_eq(attr_name, old_value, new_value, last_build):
"""
Rebuild is required if values differ.
"""
if old_value != new_value:
logger.info("Building because %s changed", attr_name)
return True
return False
def _check_guts_toc_mtime(attr_name, old_toc, new_toc, last_build):
"""
Rebuild is required if mtimes of files listed in old TOC are newer than last_build.
Use this for calculated/analysed values read from cache.
"""
for dest_name, src_name, typecode in old_toc:
if misc.mtime(src_name) > last_build:
logger.info("Building because %s changed", src_name)
return True
return False
def _check_guts_toc(attr_name, old_toc, new_toc, last_build):
"""
Rebuild is required if either TOC content changed or mtimes of files listed in old TOC are newer than last_build.
Use this for input parameters.
"""
return _check_guts_eq(attr_name, old_toc, new_toc, last_build) or \
_check_guts_toc_mtime(attr_name, old_toc, new_toc, last_build)
def add_suffix_to_extension(dest_name, src_name, typecode):
"""
Take a TOC entry (dest_name, src_name, typecode) and adjust the dest_name for EXTENSION to include the full library
suffix.
"""
# No-op for non-extension
if typecode != 'EXTENSION':
return dest_name, src_name, typecode
# If dest_name completely fits into end of the src_name, it has already been processed.
if src_name.endswith(dest_name):
return dest_name, src_name, typecode
# Change the dotted name into a relative path. This places C extensions in the Python-standard location.
dest_name = dest_name.replace('.', os.sep)
# In some rare cases extension might already contain a suffix. Skip it in this case.
if os.path.splitext(dest_name)[1] not in EXTENSION_SUFFIXES:
# Determine the base name of the file.
base_name = os.path.basename(dest_name)
assert '.' not in base_name
# Use this file's existing extension. For extensions such as ``libzmq.cp36-win_amd64.pyd``, we cannot use
# ``os.path.splitext``, which would give only the ```.pyd`` part of the extension.
dest_name = dest_name + os.path.basename(src_name)[len(base_name):]
return dest_name, src_name, typecode
def process_collected_binary(
src_name,
dest_name,
use_strip=False,
use_upx=False,
upx_exclude=None,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
strict_arch_validation=False
):
"""
Process the collected binary using strip or UPX (or both), and apply any platform-specific processing. On macOS,
this rewrites the library paths in the headers, and (re-)signs the binary. On-disk cache is used to avoid processing
the same binary with same options over and over.
In addition to given arguments, this function also uses CONF['cachedir'] and CONF['upx_dir'].
"""
from PyInstaller.config import CONF
# We need to use cache in the following scenarios:
# * extra binary processing due to use of `strip` or `upx`
# * building on macOS, where we need to rewrite library paths in binaries' headers and (re-)sign the binaries.
if not use_strip and not use_upx and not is_darwin:
return src_name
# Skip processing if this is Windows .manifest file. We used to process these as part of support for collecting
# WinSxS assemblies, but that was removed in PyInstaller 6.0. So in case we happen to get a .manifest file here,
# return it as-is.
if is_win and src_name.lower().endswith(".manifest"):
return src_name
# Match against provided UPX exclude patterns.
upx_exclude = upx_exclude or []
if use_upx:
src_path = pathlib.PurePath(src_name)
for upx_exclude_entry in upx_exclude:
# pathlib.PurePath.match() matches from right to left, and supports * wildcard, but does not support the
# "**" syntax for directory recursion. Case sensitivity follows the OS default.
if src_path.match(upx_exclude_entry):
logger.info("Disabling UPX for %s due to match in exclude pattern: %s", src_name, upx_exclude_entry)
use_upx = False
break
# Prepare cache directory path. Cache is tied to python major/minor version, but also to various processing options.
pyver = f'py{sys.version_info[0]}{sys.version_info[1]}'
arch = platform.architecture()[0]
cache_dir = os.path.join(
CONF['cachedir'],
f'bincache{use_strip:d}{use_upx:d}{pyver}{arch}',
)
if target_arch:
cache_dir = os.path.join(cache_dir, target_arch)
if is_darwin:
# Separate by codesign identity
if codesign_identity:
# Compute hex digest of codesign identity string to prevent issues with invalid characters.
csi_hash = hashlib.sha256(codesign_identity.encode('utf-8'))
cache_dir = os.path.join(cache_dir, csi_hash.hexdigest())
else:
cache_dir = os.path.join(cache_dir, 'adhoc') # ad-hoc signing
# Separate by entitlements
if entitlements_file:
# Compute hex digest of entitlements file contents
with open(entitlements_file, 'rb') as fp:
ef_hash = hashlib.sha256(fp.read())
cache_dir = os.path.join(cache_dir, ef_hash.hexdigest())
else:
cache_dir = os.path.join(cache_dir, 'no-entitlements')
os.makedirs(cache_dir, exist_ok=True)
# Load cache index, if available
cache_index_file = os.path.join(cache_dir, "index.dat")
try:
cache_index = misc.load_py_data_struct(cache_index_file)
except FileNotFoundError:
cache_index = {}
except Exception:
# Tell the user they may want to fix their cache... However, do not delete it for them; if it keeps getting
# corrupted, we will never find out.
logger.warning("PyInstaller bincache may be corrupted; use pyinstaller --clean to fix it.")
raise
# Look up the file in cache; use case-normalized destination name as identifier.
cached_id = os.path.normcase(dest_name)
cached_name = os.path.join(cache_dir, dest_name)
src_digest = _compute_file_digest(src_name)
if cached_id in cache_index:
# If digest matches to the cached digest, return the cached file...
if src_digest == cache_index[cached_id]:
return cached_name
# ... otherwise remove it.
os.remove(cached_name)
cmd = None
if use_upx:
# If we are to apply both strip and UPX, apply strip first.
if use_strip:
src_name = process_collected_binary(
src_name,
dest_name,
use_strip=True,
use_upx=False,
target_arch=target_arch,
codesign_identity=codesign_identity,
entitlements_file=entitlements_file,
strict_arch_validation=strict_arch_validation,
)
# We need to avoid using UPX with Windows DLLs that have Control Flow Guard enabled, as it breaks them.
if is_win and versioninfo.pefile_check_control_flow_guard(src_name):
logger.info('Disabling UPX for %s due to CFG!', src_name)
elif misc.is_file_qt_plugin(src_name):
logger.info('Disabling UPX for %s due to it being a Qt plugin!', src_name)
else:
upx_exe = 'upx'
upx_dir = CONF['upx_dir']
if upx_dir:
upx_exe = os.path.join(upx_dir, upx_exe)
upx_options = [
# Do not compress icons, so that they can still be accessed externally.
'--compress-icons=0',
# Use LZMA compression.
'--lzma',
# Quiet mode.
'-q',
]
if is_win:
# Binaries built with Visual Studio 7.1 require --strip-loadconf or they will not compress.
upx_options.append('--strip-loadconf')
cmd = [upx_exe, *upx_options, cached_name]
elif use_strip:
strip_options = []
if is_darwin:
# The default strip behavior breaks some shared libraries under macOS.
strip_options = ["-S"] # -S = strip only debug symbols.
cmd = ["strip", *strip_options, cached_name]
# Ensure parent path exists
os.makedirs(os.path.dirname(cached_name), exist_ok=True)
# Use `shutil.copyfile` to copy the file with default permissions bits, then manually set executable
# bits. This way, we avoid copying permission bits and metadata from the original file, which might be too
# restrictive for further processing (read-only permissions, immutable flag on FreeBSD, and so on).
shutil.copyfile(src_name, cached_name)
os.chmod(cached_name, 0o755)
if cmd:
logger.info("Executing: %s", " ".join(cmd))
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# On macOS, we need to modify the given binary's paths to the dependent libraries, in order to ensure they are
# relocatable and always refer to location within the frozen application. Specifically, we make all dependent
# library paths relative to @rpath, and set @rpath to point to the top-level application directory, relative to
# the binary's location (i.e., @loader_path).
#
# While modifying the headers invalidates existing signatures, we avoid removing them in order to speed things up
# (and to avoid potential bugs in the codesign utility, like the one reported on Mac OS 10.13 in #6167).
# The forced re-signing at the end should take care of the invalidated signatures.
if is_darwin:
try:
osxutils.binary_to_target_arch(cached_name, target_arch, display_name=src_name)
#osxutils.remove_signature_from_binary(cached_name) # Disabled as per comment above.
target_rpath = str(
pathlib.PurePath('@loader_path', *['..' for level in pathlib.PurePath(dest_name).parent.parts])
)
osxutils.set_dylib_dependency_paths(cached_name, target_rpath)
osxutils.sign_binary(cached_name, codesign_identity, entitlements_file)
except osxutils.InvalidBinaryError:
# Raised by osxutils.binary_to_target_arch when the given file is not a valid macOS binary (for example,
# a linux .so file; see issue #6327). The error prevents any further processing, so just ignore it.
pass
except osxutils.IncompatibleBinaryArchError:
# Raised by osxutils.binary_to_target_arch when the given file does not contain (all) required arch slices.
# Depending on the strict validation mode, re-raise or swallow the error.
#
# Strict validation should be enabled only for binaries where the architecture *must* match the target one,
# i.e., the extension modules. Everything else is pretty much a gray area, for example:
# * a universal2 extension may have its x86_64 and arm64 slices linked against distinct single-arch/thin
# shared libraries
# * a collected executable that is launched by python code via a subprocess can be x86_64-only, even though
# the actual python code is running on M1 in native arm64 mode.
if strict_arch_validation:
raise
logger.debug("File %s failed optional architecture validation - collecting as-is!", src_name)
except Exception as e:
raise SystemError(f"Failed to process binary {cached_name!r}!") from e
# Update cache index
cache_index[cached_id] = src_digest
misc.save_py_data_struct(cache_index_file, cache_index)
return cached_name
def _compute_file_digest(filename):
hasher = hashlib.md5()
with open(filename, "rb") as fp:
for chunk in iter(lambda: fp.read(16 * 1024), b""):
hasher.update(chunk)
return bytearray(hasher.digest())
def _check_path_overlap(path):
"""
Check that path does not overlap with WORKPATH or SPECPATH (i.e., WORKPATH and SPECPATH may not start with path,
which could be caused by a faulty hand-edited specfile).
Raise SystemExit if there is overlap, return True otherwise
"""
from PyInstaller.config import CONF
specerr = 0
if CONF['workpath'].startswith(path):
logger.error('Specfile error: The output path "%s" contains WORKPATH (%s)', path, CONF['workpath'])
specerr += 1
if CONF['specpath'].startswith(path):
logger.error('Specfile error: The output path "%s" contains SPECPATH (%s)', path, CONF['specpath'])
specerr += 1
if specerr:
raise SystemExit(
'Error: Please edit/recreate the specfile (%s) and set a different output name (e.g. "dist").' %
CONF['spec']
)
return True
def _make_clean_directory(path):
"""
Create a clean directory from the given directory name.
"""
if _check_path_overlap(path):
if os.path.isdir(path) or os.path.isfile(path):
try:
os.remove(path)
except OSError:
_rmtree(path)
os.makedirs(path, exist_ok=True)
def _rmtree(path):
"""
Remove directory and all its contents, but only after user confirmation, or if the -y option is set.
"""
from PyInstaller.config import CONF
if CONF['noconfirm']:
choice = 'y'
elif sys.stdout.isatty():
choice = input(
'WARNING: The output directory "%s" and ALL ITS CONTENTS will be REMOVED! Continue? (y/N)' % path
)
else:
raise SystemExit(
'Error: The output directory "%s" is not empty. Please remove all its contents or use the -y option (remove'
' output directory without confirmation).' % path
)
if choice.strip().lower() == 'y':
if not CONF['noconfirm']:
print("On your own risk, you can use the option `--noconfirm` to get rid of this question.")
logger.info('Removing dir %s', path)
shutil.rmtree(path)
else:
raise SystemExit('User aborted')
# TODO Refactor to prohibit empty target directories. As the docstring below documents, this function currently permits
# the second item of each 2-tuple in "hook.datas" to be the empty string, in which case the target directory defaults to
# the source directory's basename. However, this functionality is very fragile and hence bad. Instead:
#
# * An exception should be raised if such item is empty.
# * All hooks currently passing the empty string for such item (e.g.,
# "hooks/hook-babel.py", "hooks/hook-matplotlib.py") should be refactored
# to instead pass such basename.
def format_binaries_and_datas(binaries_or_datas, workingdir=None):
"""
Convert the passed list of hook-style 2-tuples into a returned set of `TOC`-style 2-tuples.
Elements of the passed list are 2-tuples `(source_dir_or_glob, target_dir)`.
Elements of the returned set are 2-tuples `(target_file, source_file)`.
For backwards compatibility, the order of elements in the former tuples are the reverse of the order of elements in
the latter tuples!
Parameters
----------
binaries_or_datas : list
List of hook-style 2-tuples (e.g., the top-level `binaries` and `datas` attributes defined by hooks) whose:
* The first element is either:
* A glob matching only the absolute or relative paths of source non-Python data files.
* The absolute or relative path of a source directory containing only source non-Python data files.
* The second element is the relative path of the target directory into which these source files will be
recursively copied.
If the optional `workingdir` parameter is passed, source paths may be either absolute or relative; else, source
paths _must_ be absolute.
workingdir : str
Optional absolute path of the directory to which all relative source paths in the `binaries_or_datas`
parameter will be prepended by (and hence converted into absolute paths) _or_ `None` if these paths are to be
preserved as relative. Defaults to `None`.
Returns
----------
set
Set of `TOC`-style 2-tuples whose:
* First element is the absolute or relative path of a target file.
* Second element is the absolute or relative path of the corresponding source file to be copied to this target
file.
"""
toc_datas = set()
for src_root_path_or_glob, trg_root_dir in binaries_or_datas:
# Disallow empty source path. Those are typically result of errors, and result in implicit collection of the
# whole current working directory, which is never a good idea.
if not src_root_path_or_glob:
raise InvalidSrcDestTupleError(
(src_root_path_or_glob, trg_root_dir),
"Empty SRC is not allowed when adding binary and data files, as it would result in collection of the "
"whole current working directory."
)
if not trg_root_dir:
raise InvalidSrcDestTupleError(
(src_root_path_or_glob, trg_root_dir),
"Empty DEST_DIR is not allowed - to collect files into application's top-level directory, use "
f"{os.curdir!r}."
)
# Disallow absolute target paths, as well as target paths that would end up pointing outside of the
# application's top-level directory.
if os.path.isabs(trg_root_dir):
raise InvalidSrcDestTupleError((src_root_path_or_glob, trg_root_dir), "DEST_DIR must be a relative path!")
if os.path.normpath(trg_root_dir).startswith('..'):
raise InvalidSrcDestTupleError(
(src_root_path_or_glob, trg_root_dir),
"DEST_DIR must not point outside of application's top-level directory!",
)
# Convert relative to absolute paths if required.
if workingdir and not os.path.isabs(src_root_path_or_glob):
src_root_path_or_glob = os.path.join(workingdir, src_root_path_or_glob)
# Normalize paths.
src_root_path_or_glob = os.path.normpath(src_root_path_or_glob)
if os.path.isfile(src_root_path_or_glob):
src_root_paths = [src_root_path_or_glob]
else:
# List of the absolute paths of all source paths matching the current glob.
src_root_paths = glob.glob(src_root_path_or_glob)
if not src_root_paths:
raise SystemExit(f'Unable to find {src_root_path_or_glob!r} when adding binary and data files.')
for src_root_path in src_root_paths:
if os.path.isfile(src_root_path):
# Normalizing the result to remove redundant relative paths (e.g., removing "./" from "trg/./file").
toc_datas.add((
os.path.normpath(os.path.join(trg_root_dir, os.path.basename(src_root_path))),
os.path.normpath(src_root_path),
))
elif os.path.isdir(src_root_path):
for src_dir, src_subdir_basenames, src_file_basenames in os.walk(src_root_path):
# Ensure the current source directory is a subdirectory of the passed top-level source directory.
# Since os.walk() does *NOT* follow symlinks by default, this should be the case. (But let's make
# sure.)
assert src_dir.startswith(src_root_path)
# Relative path of the current target directory, obtained by:
#
# * Stripping the top-level source directory from the current source directory (e.g., removing
# "/top" from "/top/dir").
# * Normalizing the result to remove redundant relative paths (e.g., removing "./" from
# "trg/./file").
trg_dir = os.path.normpath(os.path.join(trg_root_dir, os.path.relpath(src_dir, src_root_path)))
for src_file_basename in src_file_basenames:
src_file = os.path.join(src_dir, src_file_basename)
if os.path.isfile(src_file):
# Normalize the result to remove redundant relative paths (e.g., removing "./" from
# "trg/./file").
toc_datas.add((
os.path.normpath(os.path.join(trg_dir, src_file_basename)), os.path.normpath(src_file)
))
return toc_datas
def get_code_object(modname, filename):
"""
Get the code-object for a module.
This is a simplifed non-performant version which circumvents __pycache__.
"""
if filename in ('-', None):
# This is a NamespacePackage, modulegraph marks them by using the filename '-'. (But wants to use None, so
# check for None, too, to be forward-compatible.)
logger.debug('Compiling namespace package %s', modname)
txt = '#\n'
code_object = compile(txt, filename, 'exec')
else:
_, ext = os.path.splitext(filename)
ext = ext.lower()
if ext == '.pyc':
# The module is available in binary-only form. Read the contents of .pyc file using helper function, which
# supports reading from either stand-alone or archive-embedded .pyc files.
logger.debug('Reading code object from .pyc file %s', filename)
pyc_data = _read_pyc_data(filename)
code_object = marshal.loads(pyc_data[16:])
else:
# Assume this is a source .py file, but allow an arbitrary extension (other than .pyc, which is taken in
# the above branch). This allows entry-point scripts to have an arbitrary (or no) extension, as tested by
# the `test_arbitrary_ext` in `test_basic.py`.
logger.debug('Compiling python script/module file %s', filename)
with open(filename, 'rb') as f:
source = f.read()
# If entry-point script has no suffix, append .py when compiling the source. In POSIX builds, the executable
# has no suffix either; this causes issues with `traceback` module, as it tries to read the executable file
# when trying to look up the code for the entry-point script (when current working directory contains the
# executable).
_, ext = os.path.splitext(filename)
if not ext:
logger.debug("Appending .py to compiled entry-point name...")
filename += '.py'
try:
code_object = compile(source, filename, 'exec')
except SyntaxError:
logger.warning("Sytnax error while compiling %s", filename)
raise
return code_object
def strip_paths_in_code(co, new_filename=None):
# Paths to remove from filenames embedded in code objects
replace_paths = sys.path + CONF['pathex']
# Make sure paths end with os.sep and the longest paths are first
replace_paths = sorted((os.path.join(f, '') for f in replace_paths), key=len, reverse=True)
if new_filename is None:
original_filename = os.path.normpath(co.co_filename)
for f in replace_paths:
if original_filename.startswith(f):
new_filename = original_filename[len(f):]
break
else:
return co
code_func = type(co)
consts = tuple(
strip_paths_in_code(const_co, new_filename) if isinstance(const_co, code_func) else const_co
for const_co in co.co_consts
)
return co.replace(co_consts=consts, co_filename=new_filename)
def _should_include_system_binary(binary_tuple, exceptions):
"""
Return True if the given binary_tuple describes a system binary that should be included.
Exclude all system library binaries other than those with "lib-dynload" in the destination or "python" in the
source, except for those matching the patterns in the exceptions list. Intended to be used from the Analysis
exclude_system_libraries method.
"""
dest = binary_tuple[0]
if dest.startswith('lib-dynload'):
return True
src = binary_tuple[1]
if fnmatch.fnmatch(src, '*python*'):
return True
if not src.startswith('/lib') and not src.startswith('/usr/lib'):
return True
for exception in exceptions:
if fnmatch.fnmatch(dest, exception):
return True
return False
def compile_pymodule(name, src_path, workpath, code_cache=None):
"""
Given the TOC entry (name, path, typecode) for a pure-python module, compile the module in the specified working
directory, and return the TOC entry for collecting the byte-compiled module. No-op for typecodes other than
PYMODULE.
"""
# Construct the target .pyc filename in the workpath
split_name = name.split(".")
if "__init__" in src_path:
# __init__ module; use "__init__" as module name, and construct parent path using all components of the
# fully-qualified name
parent_dirs = split_name
mod_basename = "__init__"
else:
# Regular module; use last component of the fully-qualified name as module name, and the rest as the parent
# path.
parent_dirs = split_name[:-1]
mod_basename = split_name[-1]
pyc_path = os.path.join(workpath, *parent_dirs, mod_basename + '.pyc')
# If .pyc file already exists in our workpath, check if we can re-use it. For that:
# - its modification timestamp must be newer than that of the source file
# - it must be compiled for compatible python version
if os.path.exists(pyc_path):
can_reuse = False
if misc.mtime(pyc_path) > misc.mtime(src_path):
with open(pyc_path, 'rb') as fh:
can_reuse = fh.read(4) == compat.BYTECODE_MAGIC
if can_reuse:
return pyc_path
# Ensure the existence of parent directories for the target pyc path
os.makedirs(os.path.dirname(pyc_path), exist_ok=True)
# Check if optional cache contains module entry
code_object = code_cache.get(name, None) if code_cache else None
if code_object is None:
_, ext = os.path.splitext(src_path)
ext = ext.lower()
if ext == '.py':
# Source py file; compile...
py_compile.compile(src_path, pyc_path)
# ... and read the contents
with open(pyc_path, 'rb') as fp:
pyc_data = fp.read()
elif ext == '.pyc':
# The module is available in binary-only form. Read the contents of .pyc file using helper function, which
# supports reading from either stand-alone or archive-embedded .pyc files.
pyc_data = _read_pyc_data(src_path)
else:
raise ValueError(f"Invalid python module file {src_path}; unhandled extension {ext}!")
# Unmarshal code object; this is necessary if we want to strip paths from it
code_object = marshal.loads(pyc_data[16:])
# Strip code paths from the code object
code_object = strip_paths_in_code(code_object)
# Write module file
with open(pyc_path, 'wb') as fh:
fh.write(compat.BYTECODE_MAGIC)
fh.write(struct.pack('<I', 0b01)) # PEP-552: hash-based pyc, check_source=False
fh.write(b'\00' * 8) # Zero the source hash
marshal.dump(code_object, fh)
# Return output path
return pyc_path
def _read_pyc_data(filename):
"""
Helper for reading data from .pyc files. Supports both stand-alone and archive-embedded .pyc files. Used by
`compile_pymodule` and `get_code_object` helper functions.
"""
src_file = pathlib.Path(filename)
if src_file.is_file():
# Stand-alone .pyc file.
pyc_data = src_file.read_bytes()
else:
# Check if .pyc file is stored in a .zip archive, as is the case for stdlib modules in embeddable
# python on Windows.
parent_zip_file = misc.path_to_parent_archive(src_file)
if parent_zip_file is not None and zipfile.is_zipfile(parent_zip_file):
with zipfile.ZipFile(parent_zip_file, 'r') as zip_archive:
# NOTE: zip entry names must be in POSIX format, even on Windows!
zip_entry_name = str(src_file.relative_to(parent_zip_file).as_posix())
pyc_data = zip_archive.read(zip_entry_name)
else:
raise FileNotFoundError(f"Cannot find .pyc file {filename!r}!")
# Verify the python version
if pyc_data[:4] != compat.BYTECODE_MAGIC:
raise ValueError(f"The .pyc module {filename} was compiled for incompatible version of python!")
return pyc_data
def postprocess_binaries_toc_pywin32(binaries):
"""
Process the given `binaries` TOC list to apply work around for `pywin32` package, fixing the target directory
for collected extensions.
"""
# Ensure that all files collected from `win32` or `pythonwin` into top-level directory are put back into
# their corresponding directories. They end up in top-level directory because `pywin32.pth` adds both
# directories to the `sys.path`, so they end up visible as top-level directories. But these extensions
# might in fact be linked against each other, so we should preserve the directory layout for consistency
# between modulegraph-discovered extensions and linked binaries discovered by link-time dependency analysis.
# Within the same framework, also consider `pywin32_system32`, just in case.
PYWIN32_SUBDIRS = {'win32', 'pythonwin', 'pywin32_system32'}
processed_binaries = []
for dest_name, src_name, typecode in binaries:
dest_path = pathlib.PurePath(dest_name)
src_path = pathlib.PurePath(src_name)
if dest_path.parent == pathlib.PurePath('.') and src_path.parent.name.lower() in PYWIN32_SUBDIRS:
dest_path = pathlib.PurePath(src_path.parent.name) / dest_path
dest_name = str(dest_path)
processed_binaries.append((dest_name, src_name, typecode))
return processed_binaries
def postprocess_binaries_toc_pywin32_anaconda(binaries):
"""
Process the given `binaries` TOC list to apply work around for Anaconda `pywin32` package, fixing the location
of collected `pywintypes3X.dll` and `pythoncom3X.dll`.
"""
# The Anaconda-provided `pywin32` package installs three copies of `pywintypes3X.dll` and `pythoncom3X.dll`,
# located in the following directories (relative to the environment):
# - Library/bin
# - Lib/site-packages/pywin32_system32
# - Lib/site-packages/win32
#
# This turns our dependency scanner and directory layout preservation mechanism into a lottery based on what
# `pywin32` modules are imported and in what order. To keep things simple, we deal with this insanity by
# post-processing the `binaries` list, modifying the destination of offending copies, and let the final TOC
# list normalization deal with potential duplicates.
DLL_CANDIDATES = {
f"pywintypes{sys.version_info[0]}{sys.version_info[1]}.dll",
f"pythoncom{sys.version_info[0]}{sys.version_info[1]}.dll",
}
DUPLICATE_DIRS = {
pathlib.PurePath('.'),
pathlib.PurePath('win32'),
}
processed_binaries = []
for dest_name, src_name, typecode in binaries:
# Check if we need to divert - based on the destination base name and destination parent directory.
dest_path = pathlib.PurePath(dest_name)
if dest_path.name.lower() in DLL_CANDIDATES and dest_path.parent in DUPLICATE_DIRS:
dest_path = pathlib.PurePath("pywin32_system32") / dest_path.name
dest_name = str(dest_path)
processed_binaries.append((dest_name, src_name, typecode))
return processed_binaries

View File

@ -0,0 +1,733 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
# ----------------------------------------------------------------------------
"""
Various classes and functions to provide some backwards-compatibility with previous versions of Python onward.
"""
from __future__ import annotations
import errno
import importlib.machinery
import importlib.util
import os
import platform
import site
import subprocess
import sys
import shutil
import types
from PyInstaller._shared_with_waf import _pyi_machine
from PyInstaller.exceptions import ExecCommandFailed
# setup.py sets this environment variable to avoid errors due to unmet run-time dependencies. The PyInstaller.compat
# module is imported by setup.py to build wheels, and some dependencies that are otherwise required at run-time
# (importlib-metadata on python < 3.10, pywin32-ctypes on Windows) might not be present while building wheels,
# nor are they required during that phase.
_setup_py_mode = os.environ.get('_PYINSTALLER_SETUP_PY', '0') != '0'
# PyInstaller requires importlib.metadata from python >= 3.10 stdlib, or equivalent importlib-metadata >= 4.6.
if _setup_py_mode:
importlib_metadata = None
else:
if sys.version_info >= (3, 10):
import importlib.metadata as importlib_metadata
else:
try:
import importlib_metadata
except ImportError as e:
from PyInstaller.exceptions import ImportlibMetadataError
raise ImportlibMetadataError() from e
import packaging.version # For importlib_metadata version check
# Validate the version
if packaging.version.parse(importlib_metadata.version("importlib-metadata")) < packaging.version.parse("4.6"):
from PyInstaller.exceptions import ImportlibMetadataError
raise ImportlibMetadataError()
# Strict collect mode, which raises error when trying to collect duplicate files into PKG/CArchive or COLLECT.
strict_collect_mode = os.environ.get("PYINSTALLER_STRICT_COLLECT_MODE", "0") != "0"
# Copied from https://docs.python.org/3/library/platform.html#cross-platform.
is_64bits: bool = sys.maxsize > 2**32
# Distinguish specific code for various Python versions. Variables 'is_pyXY' mean that Python X.Y and up is supported.
# Keep even unsupported versions here to keep 3rd-party hooks working.
is_py35 = sys.version_info >= (3, 5)
is_py36 = sys.version_info >= (3, 6)
is_py37 = sys.version_info >= (3, 7)
is_py38 = sys.version_info >= (3, 8)
is_py39 = sys.version_info >= (3, 9)
is_py310 = sys.version_info >= (3, 10)
is_py311 = sys.version_info >= (3, 11)
is_py312 = sys.version_info >= (3, 12)
is_win = sys.platform.startswith('win')
is_win_10 = is_win and (platform.win32_ver()[0] == '10')
is_win_wine = False # Running under Wine; determined later on.
is_cygwin = sys.platform == 'cygwin'
is_darwin = sys.platform == 'darwin' # Mac OS X
# Unix platforms
is_linux = sys.platform.startswith('linux')
is_solar = sys.platform.startswith('sun') # Solaris
is_aix = sys.platform.startswith('aix')
is_freebsd = sys.platform.startswith('freebsd')
is_openbsd = sys.platform.startswith('openbsd')
is_hpux = sys.platform.startswith('hp-ux')
# Some code parts are similar to several unix platforms (e.g. Linux, Solaris, AIX).
# Mac OS is not considered as unix since there are many platform-specific details for Mac in PyInstaller.
is_unix = is_linux or is_solar or is_aix or is_freebsd or is_hpux or is_openbsd
# Linux distributions such as Alpine or OpenWRT use musl as their libc implementation and resultantly need specially
# compiled bootloaders. On musl systems, ldd with no arguments prints 'musl' and its version.
is_musl = is_linux and "musl" in subprocess.run(["ldd"], capture_output=True, encoding="utf-8").stderr
# macOS version
_macos_ver = tuple(int(x) for x in platform.mac_ver()[0].split('.')) if is_darwin else None
# macOS 11 (Big Sur): if python is not compiled with Big Sur support, it ends up in compatibility mode by default, which
# is indicated by platform.mac_ver() returning '10.16'. The lack of proper Big Sur support breaks find_library()
# function from ctypes.util module, as starting with Big Sur, shared libraries are not visible on disk anymore. Support
# for the new library search mechanism was added in python 3.9 when compiled with Big Sur support. In such cases,
# platform.mac_ver() reports version as '11.x'. The behavior can be further modified via SYSTEM_VERSION_COMPAT
# environment variable; which allows explicitly enabling or disabling the compatibility mode. However, note that
# disabling the compatibility mode and using python that does not properly support Big Sur still leaves find_library()
# broken (which is a scenario that we ignore at the moment).
# The same logic applies to macOS 12 (Monterey).
is_macos_11_compat = bool(_macos_ver) and _macos_ver[0:2] == (10, 16) # Big Sur or newer in compat mode
is_macos_11_native = bool(_macos_ver) and _macos_ver[0:2] >= (11, 0) # Big Sur or newer in native mode
is_macos_11 = is_macos_11_compat or is_macos_11_native # Big Sur or newer
# On different platforms is different file for dynamic python library.
_pyver = sys.version_info[:2]
if is_win or is_cygwin:
PYDYLIB_NAMES = {
'python%d%d.dll' % _pyver,
'libpython%d%d.dll' % _pyver,
'libpython%d.%d.dll' % _pyver,
} # For MSYS2 environment
elif is_darwin:
# libpython%d.%dm.dylib for Conda virtual environment installations
PYDYLIB_NAMES = {
'Python',
'.Python',
'Python%d' % _pyver[0],
'libpython%d.%d.dylib' % _pyver,
}
elif is_aix:
# Shared libs on AIX may be archives with shared object members, hence the ".a" suffix. However, starting with
# python 2.7.11 libpython?.?.so and Python3 libpython?.?m.so files are produced.
PYDYLIB_NAMES = {
'libpython%d.%d.a' % _pyver,
'libpython%d.%d.so' % _pyver,
}
elif is_freebsd:
PYDYLIB_NAMES = {
'libpython%d.%d.so.1' % _pyver,
'libpython%d.%d.so.1.0' % _pyver,
}
elif is_openbsd:
PYDYLIB_NAMES = {'libpython%d.%d.so.0.0' % _pyver}
elif is_hpux:
PYDYLIB_NAMES = {'libpython%d.%d.so' % _pyver}
elif is_unix:
# Other *nix platforms.
# Python 2 .so library on Linux is: libpython2.7.so.1.0
# Python 3 .so library on Linux is: libpython3.3.so.1.0
PYDYLIB_NAMES = {'libpython%d.%d.so.1.0' % _pyver, 'libpython%d.%d.so' % _pyver}
else:
raise SystemExit('Your platform is not yet supported. Please define constant PYDYLIB_NAMES for your platform.')
# In a virtual environment created by virtualenv (github.com/pypa/virtualenv) there exists sys.real_prefix with the path
# to the base Python installation from which the virtual environment was created. This is true regardless of the version
# of Python used to execute the virtualenv command.
#
# In a virtual environment created by the venv module available in the Python standard lib, there exists sys.base_prefix
# with the path to the base implementation. This does not exist in a virtual environment created by virtualenv.
#
# The following code creates compat.is_venv and is.virtualenv that are True when running a virtual environment, and also
# compat.base_prefix with the path to the base Python installation.
base_prefix: str = os.path.abspath(getattr(sys, 'real_prefix', getattr(sys, 'base_prefix', sys.prefix)))
# Ensure `base_prefix` is not containing any relative parts.
is_venv = is_virtualenv = base_prefix != os.path.abspath(sys.prefix)
# Conda environments sometimes have different paths or apply patches to packages that can affect how a hook or package
# should access resources. Method for determining conda taken from https://stackoverflow.com/questions/47610844#47610844
is_conda = os.path.isdir(os.path.join(base_prefix, 'conda-meta'))
# Similar to ``is_conda`` but is ``False`` using another ``venv``-like manager on top. In this case, no packages
# encountered will be conda packages meaning that the default non-conda behaviour is generally desired from PyInstaller.
is_pure_conda = os.path.isdir(os.path.join(sys.prefix, 'conda-meta'))
# Full path to python interpreter.
python_executable = getattr(sys, '_base_executable', sys.executable)
# Is this Python from Microsoft App Store (Windows only)? Python from Microsoft App Store has executable pointing at
# empty shims.
is_ms_app_store = is_win and os.path.getsize(python_executable) == 0
if is_ms_app_store:
# Locate the actual executable inside base_prefix.
python_executable = os.path.join(base_prefix, os.path.basename(python_executable))
if not os.path.exists(python_executable):
raise SystemExit(
'PyInstaller cannot locate real python executable belonging to Python from Microsoft App Store!'
)
# Bytecode magic value
BYTECODE_MAGIC = importlib.util.MAGIC_NUMBER
# List of suffixes for Python C extension modules.
EXTENSION_SUFFIXES = importlib.machinery.EXTENSION_SUFFIXES
ALL_SUFFIXES = importlib.machinery.all_suffixes()
# On Windows we require pywin32-ctypes.
# -> all pyinstaller modules should use win32api from PyInstaller.compat to
# ensure that it can work on MSYS2 (which requires pywin32-ctypes)
if is_win:
if _setup_py_mode:
pywintypes = None
win32api = None
else:
try:
from win32ctypes.pywin32 import pywintypes # noqa: F401, E402
from win32ctypes.pywin32 import win32api # noqa: F401, E402
except ImportError as e:
raise SystemExit(
'PyInstaller cannot check for assembly dependencies.\n'
'Please install pywin32-ctypes.\n\n'
'pip install pywin32-ctypes\n'
) from e
except Exception as e:
if sys.flags.optimize == 2:
raise SystemExit(
"pycparser, a Windows only indirect dependency of PyInstaller, is incompatible with "
"Python's \"discard docstrings\" (-OO) flag mode. For more information see:\n"
" https://github.com/pyinstaller/pyinstaller/issues/6345"
) from e
raise
# macOS's platform.architecture() can be buggy, so we do this manually here. Based off the python documentation:
# https://docs.python.org/3/library/platform.html#platform.architecture
if is_darwin:
architecture = '64bit' if sys.maxsize > 2**32 else '32bit'
else:
architecture = platform.architecture()[0]
# Cygwin needs special handling, because platform.system() contains identifiers such as MSYS_NT-10.0-19042 and
# CYGWIN_NT-10.0-19042 that do not fit PyInstaller's OS naming scheme. Explicitly set `system` to 'Cygwin'.
system = 'Cygwin' if is_cygwin else platform.system()
# Machine suffix for bootloader.
machine = _pyi_machine(platform.machine(), platform.system())
# Wine detection and support
def is_wine_dll(filename: str | os.PathLike):
"""
Check if the given PE file is a Wine DLL (PE-converted built-in, or fake/placeholder one).
Returns True if the given file is a Wine DLL, False if not (or if file cannot be analyzed or does not exist).
"""
_WINE_SIGNATURES = (
b'Wine builtin DLL', # PE-converted Wine DLL
b'Wine placeholder DLL', # Fake/placeholder Wine DLL
)
_MAX_LEN = max([len(sig) for sig in _WINE_SIGNATURES])
# Wine places their DLL signature in the padding area between the IMAGE_DOS_HEADER and IMAGE_NT_HEADERS. So we need
# to compare the bytes that come right after IMAGE_DOS_HEADER, i.e., after initial 64 bytes. We can read the file
# directly and avoid using the pefile library to avoid performance penalty associated with full header parsing.
try:
with open(filename, 'rb') as fp:
fp.seek(64)
signature = fp.read(_MAX_LEN)
return signature.startswith(_WINE_SIGNATURES)
except Exception:
pass
return False
if is_win:
try:
import ctypes.util # noqa: E402
is_win_wine = is_wine_dll(ctypes.util.find_library('kernel32'))
except Exception:
pass
# Set and get environment variables does not handle unicode strings correctly on Windows.
# Acting on os.environ instead of using getenv()/setenv()/unsetenv(), as suggested in
# <http://docs.python.org/library/os.html#os.environ>: "Calling putenv() directly does not change os.environ, so it is
# better to modify os.environ." (Same for unsetenv.)
def getenv(name: str, default: str | None = None):
"""
Returns unicode string containing value of environment variable 'name'.
"""
return os.environ.get(name, default)
def setenv(name: str, value: str):
"""
Accepts unicode string and set it as environment variable 'name' containing value 'value'.
"""
os.environ[name] = value
def unsetenv(name: str):
"""
Delete the environment variable 'name'.
"""
# Some platforms (e.g., AIX) do not support `os.unsetenv()` and thus `del os.environ[name]` has no effect on the
# real environment. For this case, we set the value to the empty string.
os.environ[name] = ""
del os.environ[name]
# Exec commands in subprocesses.
def exec_command(
*cmdargs: str, encoding: str | None = None, raise_enoent: bool | None = None, **kwargs: int | bool | list | None
):
"""
Run the command specified by the passed positional arguments, optionally configured by the passed keyword arguments.
.. DANGER::
**Ignore this function's return value** -- unless this command's standard output contains _only_ pathnames, in
which case this function returns the correct filesystem-encoded string expected by PyInstaller. In all other
cases, this function's return value is _not_ safely usable. Consider calling the general-purpose
`exec_command_stdout()` function instead.
For backward compatibility, this function's return value non-portably depends on the current Python version and
passed keyword arguments:
* Under Python 2.7, this value is an **encoded `str` string** rather than a decoded `unicode` string. This value
_cannot_ be safely used for any purpose (e.g., string manipulation or parsing), except to be passed directly to
another non-Python command.
* Under Python 3.x, this value is a **decoded `str` string**. However, even this value is _not_ necessarily
safely usable:
* If the `encoding` parameter is passed, this value is guaranteed to be safely usable.
* Else, this value _cannot_ be safely used for any purpose (e.g., string manipulation or parsing), except to be
passed directly to another non-Python command. Why? Because this value has been decoded with the encoding
specified by `sys.getfilesystemencoding()`, the encoding used by `os.fsencode()` and `os.fsdecode()` to
convert from platform-agnostic to platform-specific pathnames. This is _not_ necessarily the encoding with
which this command's standard output was encoded. Cue edge-case decoding exceptions.
Parameters
----------
cmdargs :
Variadic list whose:
1. Mandatory first element is the absolute path, relative path, or basename in the current `${PATH}` of the
command to run.
2. Optional remaining elements are arguments to pass to this command.
encoding : str, optional
Optional keyword argument specifying the encoding with which to decode this command's standard output under
Python 3. As this function's return value should be ignored, this argument should _never_ be passed.
raise_enoent : boolean, optional
Optional keyword argument to simply raise the exception if the executing the command fails since to the command
is not found. This is useful to checking id a command exists.
All remaining keyword arguments are passed as is to the `subprocess.Popen()` constructor.
Returns
----------
str
Ignore this value. See discussion above.
"""
proc = subprocess.Popen(cmdargs, stdout=subprocess.PIPE, **kwargs)
try:
out = proc.communicate(timeout=60)[0]
except OSError as e:
if raise_enoent and e.errno == errno.ENOENT:
raise
print('--' * 20, file=sys.stderr)
print("Error running '%s':" % " ".join(cmdargs), file=sys.stderr)
print(e, file=sys.stderr)
print('--' * 20, file=sys.stderr)
raise ExecCommandFailed("Error: Executing command failed!") from e
except subprocess.TimeoutExpired:
proc.kill()
raise
# stdout/stderr are returned as a byte array NOT as string, so we need to convert that to proper encoding.
try:
if encoding:
out = out.decode(encoding)
else:
# If no encoding is given, assume we are reading filenames from stdout only because it is the common case.
out = os.fsdecode(out)
except UnicodeDecodeError as e:
# The sub-process used a different encoding; provide more information to ease debugging.
print('--' * 20, file=sys.stderr)
print(str(e), file=sys.stderr)
print('These are the bytes around the offending byte:', file=sys.stderr)
print('--' * 20, file=sys.stderr)
raise
return out
def exec_command_rc(*cmdargs: str, **kwargs: float | bool | list | None):
"""
Return the exit code of the command specified by the passed positional arguments, optionally configured by the
passed keyword arguments.
Parameters
----------
cmdargs : list
Variadic list whose:
1. Mandatory first element is the absolute path, relative path, or basename in the current `${PATH}` of the
command to run.
2. Optional remaining elements are arguments to pass to this command.
All keyword arguments are passed as is to the `subprocess.call()` function.
Returns
----------
int
This command's exit code as an unsigned byte in the range `[0, 255]`, where 0 signifies success and all other
values signal a failure.
"""
# 'encoding' keyword is not supported for 'subprocess.call'; remove it from kwargs.
if 'encoding' in kwargs:
kwargs.pop('encoding')
return subprocess.call(cmdargs, **kwargs)
def exec_command_all(*cmdargs: str, encoding: str | None = None, **kwargs: int | bool | list | None):
"""
Run the command specified by the passed positional arguments, optionally configured by the passed keyword arguments.
.. DANGER::
**Ignore this function's return value.** If this command's standard output consists solely of pathnames, consider
calling `exec_command()`; otherwise, consider calling `exec_command_stdout()`.
Parameters
----------
cmdargs : str
Variadic list whose:
1. Mandatory first element is the absolute path, relative path, or basename in the current `${PATH}` of the
command to run.
2. Optional remaining elements are arguments to pass to this command.
encoding : str, optional
Optional keyword argument specifying the encoding with which to decode this command's standard output. As this
function's return value should be ignored, this argument should _never_ be passed.
All remaining keyword arguments are passed as is to the `subprocess.Popen()` constructor.
Returns
----------
(int, str, str)
Ignore this 3-element tuple `(exit_code, stdout, stderr)`. See the `exec_command()` function for discussion.
"""
proc = subprocess.Popen(
cmdargs,
bufsize=-1, # Default OS buffer size.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**kwargs
)
# Waits for subprocess to complete.
try:
out, err = proc.communicate(timeout=60)
except subprocess.TimeoutExpired:
proc.kill()
raise
# stdout/stderr are returned as a byte array NOT as string. Thus we need to convert that to proper encoding.
try:
if encoding:
out = out.decode(encoding)
err = err.decode(encoding)
else:
# If no encoding is given, assume we're reading filenames from stdout only because it's the common case.
out = os.fsdecode(out)
err = os.fsdecode(err)
except UnicodeDecodeError as e:
# The sub-process used a different encoding, provide more information to ease debugging.
print('--' * 20, file=sys.stderr)
print(str(e), file=sys.stderr)
print('These are the bytes around the offending byte:', file=sys.stderr)
print('--' * 20, file=sys.stderr)
raise
return proc.returncode, out, err
def __wrap_python(args, kwargs):
cmdargs = [sys.executable]
# Mac OS X supports universal binaries (binary for multiple architectures. We need to ensure that subprocess
# binaries are running for the same architecture as python executable. It is necessary to run binaries with 'arch'
# command.
if is_darwin:
if architecture == '64bit':
if platform.machine() == 'arm64':
py_prefix = ['arch', '-arm64'] # Apple M1
else:
py_prefix = ['arch', '-x86_64'] # Intel
elif architecture == '32bit':
py_prefix = ['arch', '-i386']
else:
py_prefix = []
# Since Mac OS 10.11, the environment variable DYLD_LIBRARY_PATH is no more inherited by child processes, so we
# proactively propagate the current value using the `-e` option of the `arch` command.
if 'DYLD_LIBRARY_PATH' in os.environ:
path = os.environ['DYLD_LIBRARY_PATH']
py_prefix += ['-e', 'DYLD_LIBRARY_PATH=%s' % path]
cmdargs = py_prefix + cmdargs
if not __debug__:
cmdargs.append('-O')
cmdargs.extend(args)
env = kwargs.get('env')
if env is None:
env = dict(**os.environ)
# Ensure python 3 subprocess writes 'str' as utf-8
env['PYTHONIOENCODING'] = 'UTF-8'
# ... and ensure we read output as utf-8
kwargs['encoding'] = 'UTF-8'
return cmdargs, kwargs
def exec_python(*args: str, **kwargs: str | None):
"""
Wrap running python script in a subprocess.
Return stdout of the invoked command.
"""
cmdargs, kwargs = __wrap_python(args, kwargs)
return exec_command(*cmdargs, **kwargs)
def exec_python_rc(*args: str, **kwargs: str | None):
"""
Wrap running python script in a subprocess.
Return exit code of the invoked command.
"""
cmdargs, kwargs = __wrap_python(args, kwargs)
return exec_command_rc(*cmdargs, **kwargs)
# Path handling.
def expand_path(path: str | os.PathLike):
"""
Replace initial tilde '~' in path with user's home directory, and also expand environment variables
(i.e., ${VARNAME} on Unix, %VARNAME% on Windows).
"""
return os.path.expandvars(os.path.expanduser(path))
# Site-packages functions - use native function if available.
def getsitepackages(prefixes: list | None = None):
"""
Returns a list containing all global site-packages directories.
For each directory present in ``prefixes`` (or the global ``PREFIXES``), this function finds its `site-packages`
subdirectory depending on the system environment, and returns a list of full paths.
"""
# This implementation was copied from the ``site`` module, python 3.7.3.
sitepackages = []
seen = set()
if prefixes is None:
prefixes = [sys.prefix, sys.exec_prefix]
for prefix in prefixes:
if not prefix or prefix in seen:
continue
seen.add(prefix)
if os.sep == '/':
sitepackages.append(os.path.join(prefix, "lib", "python%d.%d" % sys.version_info[:2], "site-packages"))
else:
sitepackages.append(prefix)
sitepackages.append(os.path.join(prefix, "lib", "site-packages"))
return sitepackages
# Backported for virtualenv. Module 'site' in virtualenv might not have this attribute.
getsitepackages = getattr(site, 'getsitepackages', getsitepackages)
# Wrapper to load a module from a Python source file. This function loads import hooks when processing them.
def importlib_load_source(name: str, pathname: str):
# Import module from a file.
mod_loader = importlib.machinery.SourceFileLoader(name, pathname)
mod = types.ModuleType(mod_loader.name)
mod.__file__ = mod_loader.get_filename() # Some hooks require __file__ attribute in their namespace
mod_loader.exec_module(mod)
return mod
# Patterns of module names that should be bundled into the base_library.zip to be available during bootstrap.
# These modules include direct or indirect dependencies of encodings.* modules. The encodings modules must be
# recursively included to set the I/O encoding during python startup. Similarly, this list should include
# modules used by PyInstaller's bootstrap scripts and modules (loader/pyi*.py)
PY3_BASE_MODULES = {
'_collections_abc',
'_weakrefset',
'abc',
'codecs',
'collections',
'copyreg',
'encodings',
'enum',
'functools',
'genericpath', # dependency of os.path
'io',
'heapq',
'keyword',
'linecache',
'locale',
'ntpath', # dependency of os.path
'operator',
'os',
'posixpath', # dependency of os.path
're',
'reprlib',
'sre_compile',
'sre_constants',
'sre_parse',
'stat', # dependency of os.path
'traceback', # for startup errors
'types',
'weakref',
'warnings',
}
if not is_py310:
PY3_BASE_MODULES.add('_bootlocale')
# Object types of Pure Python modules in modulegraph dependency graph.
# Pure Python modules have code object (attribute co_code).
PURE_PYTHON_MODULE_TYPES = {
'SourceModule',
'CompiledModule',
'Package',
'NamespacePackage',
# Deprecated.
# TODO Could these module types be removed?
'FlatPackage',
'ArchiveModule',
}
# Object types of special Python modules (built-in, run-time, namespace package) in modulegraph dependency graph that do
# not have code object.
SPECIAL_MODULE_TYPES = {
'AliasNode',
'BuiltinModule',
'RuntimeModule',
'RuntimePackage',
# PyInstaller handles scripts differently and not as standard Python modules.
'Script',
}
# Object types of Binary Python modules (extensions, etc) in modulegraph dependency graph.
BINARY_MODULE_TYPES = {
'Extension',
'ExtensionPackage',
}
# Object types of valid Python modules in modulegraph dependency graph.
VALID_MODULE_TYPES = PURE_PYTHON_MODULE_TYPES | SPECIAL_MODULE_TYPES | BINARY_MODULE_TYPES
# Object types of bad/missing/invalid Python modules in modulegraph dependency graph.
# TODO: should be 'Invalid' module types also in the 'MISSING' set?
BAD_MODULE_TYPES = {
'BadModule',
'ExcludedModule',
'InvalidSourceModule',
'InvalidCompiledModule',
'MissingModule',
# Runtime modules and packages are technically valid rather than bad, but exist only in-memory rather than on-disk
# (typically due to pre_safe_import_module() hooks), and hence cannot be physically frozen. For simplicity, these
# nodes are categorized as bad rather than valid.
'RuntimeModule',
'RuntimePackage',
}
ALL_MODULE_TYPES = VALID_MODULE_TYPES | BAD_MODULE_TYPES
# TODO: review this mapping to TOC, remove useless entries.
# Dictionary to map ModuleGraph node types to TOC typecodes.
MODULE_TYPES_TO_TOC_DICT = {
# Pure modules.
'AliasNode': 'PYMODULE',
'Script': 'PYSOURCE',
'SourceModule': 'PYMODULE',
'CompiledModule': 'PYMODULE',
'Package': 'PYMODULE',
'FlatPackage': 'PYMODULE',
'ArchiveModule': 'PYMODULE',
# Binary modules.
'Extension': 'EXTENSION',
'ExtensionPackage': 'EXTENSION',
# Special valid modules.
'BuiltinModule': 'BUILTIN',
'NamespacePackage': 'PYMODULE',
# Bad modules.
'BadModule': 'bad',
'ExcludedModule': 'excluded',
'InvalidSourceModule': 'invalid',
'InvalidCompiledModule': 'invalid',
'MissingModule': 'missing',
'RuntimeModule': 'runtime',
'RuntimePackage': 'runtime',
# Other.
'does not occur': 'BINARY',
}
def check_requirements():
"""
Verify that all requirements to run PyInstaller are met.
Fail hard if any requirement is not met.
"""
# Fail hard if Python does not have minimum required version
if sys.version_info < (3, 8):
raise EnvironmentError('PyInstaller requires Python 3.8 or newer.')
# There are some old packages which used to be backports of libraries which are now part of the standard library.
# These backports are now unmaintained and contain only an older subset of features leading to obscure errors like
# "enum has not attribute IntFlag" if installed.
from importlib.metadata import distribution, PackageNotFoundError
for name in ["enum34", "typing", "pathlib"]:
try:
dist = distribution(name)
except PackageNotFoundError:
continue
remove = "conda remove" if is_conda else f'"{sys.executable}" -m pip uninstall {name}'
raise SystemExit(
f"The '{name}' package is an obsolete backport of a standard library package and is incompatible with "
f"PyInstaller. Please remove this package (located in {dist.locate_file('')}) using\n {remove}\n"
"then try again."
)
# Bail out if binutils is not installed.
if is_linux and shutil.which("objdump") is None:
raise SystemExit(
"On Linux, objdump is required. It is typically provided by the 'binutils' package "
"installable via your Linux distribution's package manager."
)

View File

@ -0,0 +1,56 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
This module holds run-time PyInstaller configuration.
Variable CONF is a dict() with all configuration options that are necessary for the build phase. Build phase is done by
passing .spec file to exec() function. CONF variable is the only way how to pass arguments to exec() and how to avoid
using 'global' variables.
NOTE: Having 'global' variables does not play well with the test suite because it does not provide isolated environments
for tests. Some tests might fail in this case.
NOTE: The 'CONF' dict() is cleaned after building phase to not interfere with any other possible test.
To pass any arguments to build phase, just do:
from PyInstaller.config import CONF
CONF['my_var_name'] = my_value
And to use this variable in the build phase:
from PyInstaller.config import CONF
foo = CONF['my_var_name']
This is the list of known variables. (Please update it if necessary.)
cachedir
hiddenimports
noconfirm
pathex
ui_admin
ui_access
upx_available
upx_dir
workpath
tests_modgraph - cached PyiModuleGraph object to speed up tests
code_cache - dictionary associating `Analysis.pure` list instances with code cache dictionaries. Used by PYZ writer.
"""
# NOTE: Do not import other PyInstaller modules here. Just define constants here.
CONF = {
# Unit tests require this key to exist.
'pathex': [],
}

View File

@ -0,0 +1,107 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Configure PyInstaller for the current Python installation.
"""
import os
import subprocess
from PyInstaller import compat
from PyInstaller import log as logging
logger = logging.getLogger(__name__)
def _check_upx_availability(upx_dir):
logger.debug('Testing UPX availability ...')
upx_exe = "upx"
if upx_dir:
upx_exe = os.path.normpath(os.path.join(upx_dir, upx_exe))
# Check if we can call `upx -V`.
try:
output = subprocess.check_output(
[upx_exe, '-V'],
stdin=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
encoding='utf-8',
)
except Exception:
logger.debug('UPX is not available.')
return False
# Read the first line to display version string
try:
version_string = output.splitlines()[0]
except IndexError:
version_string = 'version string unavailable'
logger.debug('UPX is available: %s', version_string)
return True
def _get_pyinstaller_cache_dir():
old_cache_dir = None
if compat.getenv('PYINSTALLER_CONFIG_DIR'):
cache_dir = compat.getenv('PYINSTALLER_CONFIG_DIR')
elif compat.is_win:
cache_dir = compat.getenv('LOCALAPPDATA')
if not cache_dir:
cache_dir = os.path.expanduser('~\\Application Data')
elif compat.is_darwin:
cache_dir = os.path.expanduser('~/Library/Application Support')
else:
# According to XDG specification: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
old_cache_dir = compat.getenv('XDG_DATA_HOME')
if not old_cache_dir:
old_cache_dir = os.path.expanduser('~/.local/share')
cache_dir = compat.getenv('XDG_CACHE_HOME')
if not cache_dir:
cache_dir = os.path.expanduser('~/.cache')
cache_dir = os.path.join(cache_dir, 'pyinstaller')
# Move old cache-dir, if any, to new location.
if old_cache_dir and not os.path.exists(cache_dir):
old_cache_dir = os.path.join(old_cache_dir, 'pyinstaller')
if os.path.exists(old_cache_dir):
parent_dir = os.path.dirname(cache_dir)
if not os.path.exists(parent_dir):
os.makedirs(parent_dir)
os.rename(old_cache_dir, cache_dir)
return cache_dir
def get_config(upx_dir=None):
config = {}
config['cachedir'] = _get_pyinstaller_cache_dir()
config['upx_dir'] = upx_dir
# Disable UPX on non-Windows. Using UPX (3.96) on modern Linux shared libraries (for example, the python3.x.so
# shared library) seems to result in segmentation fault when they are dlopen'd. This happens in recent versions
# of Fedora and Ubuntu linux, as well as in Alpine containers. On macOS, UPX (3.96) fails with
# UnknownExecutableFormatException on most .dylibs (and interferes with code signature on other occasions). And
# even when it would succeed, compressed libraries cannot be (re)signed due to failed strict validation.
upx_available = _check_upx_availability(upx_dir)
if upx_available:
if compat.is_win or compat.is_cygwin:
logger.info("UPX is available and will be used if enabled on build targets.")
elif os.environ.get("PYINSTALLER_FORCE_UPX", "0") != "0":
logger.warning(
"UPX is available and force-enabled on platform with known compatibility problems - use at own risk!"
)
else:
upx_available = False
logger.info("UPX is available but is disabled on non-Windows due to known compatibility problems.")
config['upx_available'] = upx_available
return config

View File

@ -0,0 +1 @@
#

View File

@ -0,0 +1,982 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Define a modified ModuleGraph that can return its contents as a TOC and in other ways act like the old ImpTracker.
TODO: This class, along with TOC and Tree, should be in a separate module.
For reference, the ModuleGraph node types and their contents:
nodetype identifier filename
Script full path to .py full path to .py
SourceModule basename full path to .py
BuiltinModule basename None
CompiledModule basename full path to .pyc
Extension basename full path to .so
MissingModule basename None
Package basename full path to __init__.py
packagepath is ['path to package']
globalnames is set of global names __init__.py defines
ExtensionPackage basename full path to __init__.{so,dll}
packagepath is ['path to package']
The main extension here over ModuleGraph is a method to extract nodes from the flattened graph and return them as a
TOC, or added to a TOC. Other added methods look up nodes by identifier and return facts about them, replacing what
the old ImpTracker list could do.
"""
import ast
import os
import re
import sys
import traceback
from collections import defaultdict
from copy import deepcopy
from PyInstaller import HOMEPATH, PACKAGEPATH
from PyInstaller import log as logging
from PyInstaller.building.utils import add_suffix_to_extension
from PyInstaller.compat import (
BAD_MODULE_TYPES, BINARY_MODULE_TYPES, MODULE_TYPES_TO_TOC_DICT, PURE_PYTHON_MODULE_TYPES, PY3_BASE_MODULES,
VALID_MODULE_TYPES, importlib_load_source, is_win
)
from PyInstaller.depend import bytecode
from PyInstaller.depend.imphook import AdditionalFilesCache, ModuleHookCache
from PyInstaller.depend.imphookapi import (PreFindModulePathAPI, PreSafeImportModuleAPI)
from PyInstaller.lib.modulegraph.find_modules import get_implies
from PyInstaller.lib.modulegraph.modulegraph import ModuleGraph, DEFAULT_IMPORT_LEVEL, ABSOLUTE_IMPORT_LEVEL, Package
from PyInstaller.log import DEBUG, INFO, TRACE
from PyInstaller.utils.hooks import collect_submodules, is_package
logger = logging.getLogger(__name__)
class PyiModuleGraph(ModuleGraph):
"""
Directed graph whose nodes represent modules and edges represent dependencies between these modules.
This high-level subclass wraps the lower-level `ModuleGraph` class with support for graph and runtime hooks.
While each instance of `ModuleGraph` represents a set of disconnected trees, each instance of this class *only*
represents a single connected tree whose root node is the Python script originally passed by the user on the
command line. For that reason, while there may (and typically do) exist more than one `ModuleGraph` instance,
there typically exists only a singleton instance of this class.
Attributes
----------
_hooks : ModuleHookCache
Dictionary mapping the fully-qualified names of all modules with normal (post-graph) hooks to the absolute paths
of such hooks. See the the `_find_module_path()` method for details.
_hooks_pre_find_module_path : ModuleHookCache
Dictionary mapping the fully-qualified names of all modules with pre-find module path hooks to the absolute
paths of such hooks. See the the `_find_module_path()` method for details.
_hooks_pre_safe_import_module : ModuleHookCache
Dictionary mapping the fully-qualified names of all modules with pre-safe import module hooks to the absolute
paths of such hooks. See the `_safe_import_module()` method for details.
_user_hook_dirs : list
List of the absolute paths of all directories containing user-defined hooks for the current application.
_excludes : list
List of module names to be excluded when searching for dependencies.
_additional_files_cache : AdditionalFilesCache
Cache of all external dependencies (e.g., binaries, datas) listed in hook scripts for imported modules.
_module_collection_mode : dict
A dictionary of module/package collection mode settings set by hook scripts for their modules.
_base_modules: list
Dependencies for `base_library.zip` (which remain the same for every executable).
"""
# Note: these levels are completely arbitrary and may be adjusted if needed.
LOG_LEVEL_MAPPING = {0: INFO, 1: DEBUG, 2: TRACE, 3: TRACE, 4: TRACE}
def __init__(self, pyi_homepath, user_hook_dirs=(), excludes=(), **kwargs):
super().__init__(excludes=excludes, **kwargs)
# Homepath to the place where is PyInstaller located.
self._homepath = pyi_homepath
# modulegraph Node for the main python script that is analyzed by PyInstaller.
self._top_script_node = None
# Absolute paths of all user-defined hook directories.
self._excludes = excludes
self._reset(user_hook_dirs)
self._analyze_base_modules()
def _reset(self, user_hook_dirs):
"""
Reset for another set of scripts. This is primary required for running the test-suite.
"""
self._top_script_node = None
self._additional_files_cache = AdditionalFilesCache()
self._module_collection_mode = dict()
# Command line, Entry Point, and then builtin hook dirs.
self._user_hook_dirs = [*user_hook_dirs, os.path.join(PACKAGEPATH, 'hooks')]
# Hook-specific lookup tables. These need to reset when reusing cached PyiModuleGraph to avoid hooks to refer to
# files or data from another test-case.
logger.info('Caching module graph hooks...')
self._hooks = self._cache_hooks("")
self._hooks_pre_safe_import_module = self._cache_hooks('pre_safe_import_module')
self._hooks_pre_find_module_path = self._cache_hooks('pre_find_module_path')
# Search for run-time hooks in all hook directories.
self._available_rthooks = defaultdict(list)
for uhd in self._user_hook_dirs:
uhd_path = os.path.abspath(os.path.join(uhd, 'rthooks.dat'))
try:
with open(uhd_path, 'r', encoding='utf-8') as f:
rthooks = ast.literal_eval(f.read())
except FileNotFoundError:
# Ignore if this hook path doesn't have run-time hooks.
continue
except Exception as e:
logger.error('Unable to read run-time hooks from %r: %s' % (uhd_path, e))
continue
self._merge_rthooks(rthooks, uhd, uhd_path)
# Convert back to a standard dict.
self._available_rthooks = dict(self._available_rthooks)
def _merge_rthooks(self, rthooks, uhd, uhd_path):
"""
The expected data structure for a run-time hook file is a Python dictionary of type ``Dict[str, List[str]]``,
where the dictionary keys are module names and the sequence strings are Python file names.
Check then merge this data structure, updating the file names to be absolute.
"""
# Check that the root element is a dict.
assert isinstance(rthooks, dict), 'The root element in %s must be a dict.' % uhd_path
for module_name, python_file_name_list in rthooks.items():
# Ensure the key is a string.
assert isinstance(module_name, str), \
'%s must be a dict whose keys are strings; %s is not a string.' % (uhd_path, module_name)
# Ensure the value is a list.
assert isinstance(python_file_name_list, list), \
'The value of %s key %s must be a list.' % (uhd_path, module_name)
if module_name in self._available_rthooks:
logger.warning(
'Runtime hooks for %s have already been defined. Skipping the runtime hooks for %s that are '
'defined in %s.', module_name, module_name, os.path.join(uhd, 'rthooks')
)
# Skip this module
continue
# Merge this with existing run-time hooks.
for python_file_name in python_file_name_list:
# Ensure each item in the list is a string.
assert isinstance(python_file_name, str), \
'%s key %s, item %r must be a string.' % (uhd_path, module_name, python_file_name)
# Transform it into an absolute path.
abs_path = os.path.join(uhd, 'rthooks', python_file_name)
# Make sure this file exists.
assert os.path.exists(abs_path), \
'In %s, key %s, the file %r expected to be located at %r does not exist.' % \
(uhd_path, module_name, python_file_name, abs_path)
# Merge it.
self._available_rthooks[module_name].append(abs_path)
@staticmethod
def _findCaller(*args, **kwargs):
# Used to add an additional stack-frame above logger.findCaller. findCaller expects the caller to be three
# stack-frames above itself.
return logger.findCaller(*args, **kwargs)
def msg(self, level, s, *args):
"""
Print a debug message with the given level.
1. Map the msg log level to a logger log level.
2. Generate the message format (the same format as ModuleGraph)
3. Find the caller, which findCaller expects three stack-frames above itself:
[3] caller -> [2] msg (here) -> [1] _findCaller -> [0] logger.findCaller
4. Create a logRecord with the caller's information.
5. Handle the logRecord.
"""
try:
level = self.LOG_LEVEL_MAPPING[level]
except KeyError:
return
if not logger.isEnabledFor(level):
return
msg = "%s %s" % (s, ' '.join(map(repr, args)))
try:
fn, lno, func, sinfo = self._findCaller()
except ValueError: # pragma: no cover
fn, lno, func, sinfo = "(unknown file)", 0, "(unknown function)", None
record = logger.makeRecord(logger.name, level, fn, lno, msg, [], None, func, None, sinfo)
logger.handle(record)
# Set logging methods so that the stack is correctly detected.
msgin = msg
msgout = msg
def _cache_hooks(self, hook_type):
"""
Get a cache of all hooks of the passed type.
The cache will include all official hooks defined by the PyInstaller codebase _and_ all unofficial hooks
defined for the current application.
Parameters
----------
hook_type : str
Type of hooks to be cached, equivalent to the basename of the subpackage of the `PyInstaller.hooks`
package containing such hooks (e.g., `post_create_package` for post-create package hooks).
"""
# Cache of this type of hooks.
hook_dirs = []
for user_hook_dir in self._user_hook_dirs:
# Absolute path of the user-defined subdirectory of this hook type. If this directory exists, add it to the
# list to be cached.
user_hook_type_dir = os.path.join(user_hook_dir, hook_type)
if os.path.isdir(user_hook_type_dir):
hook_dirs.append(user_hook_type_dir)
return ModuleHookCache(self, hook_dirs)
def _analyze_base_modules(self):
"""
Analyze dependencies of the the modules in base_library.zip.
"""
logger.info('Analyzing base_library.zip ...')
required_mods = []
# Collect submodules from required modules in base_library.zip.
for m in PY3_BASE_MODULES:
if is_package(m):
required_mods += collect_submodules(m)
else:
required_mods.append(m)
# Initialize ModuleGraph.
self._base_modules = [mod for req in required_mods for mod in self.import_hook(req)]
def add_script(self, pathname, caller=None):
"""
Wrap the parent's 'run_script' method and create graph from the first script in the analysis, and save its
node to use as the "caller" node for all others. This gives a connected graph rather than a collection of
unrelated trees.
"""
if self._top_script_node is None:
# Remember the node for the first script.
try:
self._top_script_node = super().add_script(pathname)
except SyntaxError:
print("\nSyntax error in", pathname, file=sys.stderr)
formatted_lines = traceback.format_exc().splitlines(True)
print(*formatted_lines[-4:], file=sys.stderr)
sys.exit(1)
# Create references from the top script to the base_modules in graph.
for node in self._base_modules:
self.add_edge(self._top_script_node, node)
# Return top-level script node.
return self._top_script_node
else:
if not caller:
# Defaults to as any additional script is called from the top-level script.
caller = self._top_script_node
return super().add_script(pathname, caller=caller)
def process_post_graph_hooks(self, analysis):
"""
For each imported module, run this module's post-graph hooks if any.
Parameters
----------
analysis: build_main.Analysis
The Analysis that calls the hooks
"""
# For each iteration of the infinite "while" loop below:
#
# 1. All hook() functions defined in cached hooks for imported modules are called. This may result in new
# modules being imported (e.g., as hidden imports) that were ignored earlier in the current iteration: if
# this is the case, all hook() functions defined in cached hooks for these modules will be called by the next
# iteration.
# 2. All cached hooks whose hook() functions were called are removed from this cache. If this cache is empty, no
# hook() functions will be called by the next iteration and this loop will be terminated.
# 3. If no hook() functions were called, this loop is terminated.
logger.info('Processing module hooks...')
while True:
# Set of the names of all imported modules whose post-graph hooks are run by this iteration, preventing the
# next iteration from re- running these hooks. If still empty at the end of this iteration, no post-graph
# hooks were run; thus, this loop will be terminated.
hooked_module_names = set()
# For each remaining hookable module and corresponding hooks...
for module_name, module_hooks in self._hooks.items():
# Graph node for this module if imported or "None" otherwise.
module_node = self.find_node(module_name, create_nspkg=False)
# If this module has not been imported, temporarily ignore it. This module is retained in the cache, as
# a subsequently run post-graph hook could import this module as a hidden import.
if module_node is None:
continue
# If this module is unimportable, permanently ignore it.
if type(module_node).__name__ not in VALID_MODULE_TYPES:
hooked_module_names.add(module_name)
continue
# For each hook script for this module...
for module_hook in module_hooks:
# Run this script's post-graph hook.
module_hook.post_graph(analysis)
# Cache all external dependencies listed by this script after running this hook, which could add
# dependencies.
self._additional_files_cache.add(module_name, module_hook.binaries, module_hook.datas)
# Update package collection mode settings.
self._module_collection_mode.update(module_hook.module_collection_mode)
# Prevent this module's hooks from being run again.
hooked_module_names.add(module_name)
# Prevent all post-graph hooks run above from being run again by the next iteration.
self._hooks.remove_modules(*hooked_module_names)
# If no post-graph hooks were run, terminate iteration.
if not hooked_module_names:
break
def _find_all_excluded_imports(self, module_name):
"""
Collect excludedimports from the hooks of the specified module and all its parents.
"""
excluded_imports = set()
while module_name:
# Gather excluded imports from hook(s) belonging to the module
for module_hook in self._hooks.get(module_name, []):
excluded_imports.update(module_hook.excludedimports)
# Change module name to the module's parent name
module_name = module_name.rpartition('.')[0]
return excluded_imports
def _safe_import_hook(
self, target_module_partname, source_module, target_attr_names, level=DEFAULT_IMPORT_LEVEL, edge_attr=None
):
if source_module is not None:
# Gather all excluded imports for the referring modules, as well as its parents.
# For example, we want the excluded imports specified by hook for PIL to be also applied when the referring
# module is its submodule, PIL.Image.
excluded_imports = self._find_all_excluded_imports(source_module.identifier)
# Apply extra processing only if we have any excluded-imports rules
if excluded_imports:
# Resolve the base module name. Level can be ABSOLUTE_IMPORT_LEVEL (= 0) for absolute imports, or an
# integer indicating the relative level. We do not use equality comparison just in case we ever happen
# to get ABSOLUTE_OR_RELATIVE_IMPORT_LEVEL (-1), which is a remnant of python2 days.
if level > ABSOLUTE_IMPORT_LEVEL:
if isinstance(source_module, Package):
# Package
base_module_name = source_module.identifier
else:
# Module in a package; base name must be the parent package name!
base_module_name = '.'.join(source_module.identifier.split('.')[:-1])
if target_module_partname:
base_module_name += '.' + target_module_partname
# Adjust the base module name based on level
if level > 1:
base_module_name = '.'.join(base_module_name.split('.')[:-(level - 1)])
else:
base_module_name = target_module_partname
def _exclude_module(module_name, excluded_imports):
"""
Helper for checking whether given module should be excluded.
Returns the name of exclusion rule if module should be excluded, None otherwise.
"""
module_name_parts = module_name.split('.')
for excluded_import in excluded_imports:
excluded_import_parts = excluded_import.split('.')
match = module_name_parts[:len(excluded_import_parts)] == excluded_import_parts
if match:
return excluded_import
return None
# First, check if base module name is to be excluded.
# This covers both basic `import a` and `import a.b.c`, as well as `from d import e, f` where base
# module `d` is excluded.
excluded_import_rule = _exclude_module(base_module_name, excluded_imports)
if excluded_import_rule:
logger.debug(
"Suppressing import of %r from module %r due to excluded import %r specified in a hook for %r "
"(or its parent package(s)).", base_module_name, source_module.identifier, excluded_import_rule,
source_module.identifier
)
return []
# If we have target attribute names, check each of them, and remove excluded ones from the
# `target_attr_names` list.
if target_attr_names:
filtered_target_attr_names = []
for target_attr_name in target_attr_names:
submodule_name = base_module_name + '.' + target_attr_name
excluded_import_rule = _exclude_module(submodule_name, excluded_imports)
if excluded_import_rule:
logger.debug(
"Suppressing import of %r from module %r due to excluded import %r specified in a hook "
"for %r (or its parent package(s)).", submodule_name, source_module.identifier,
excluded_import_rule, source_module.identifier
)
else:
filtered_target_attr_names.append(target_attr_name)
# Swap with filtered target attribute names list; if no elements remain after the filtering, pass
# None...
target_attr_names = filtered_target_attr_names or None
return super()._safe_import_hook(target_module_partname, source_module, target_attr_names, level, edge_attr)
def _safe_import_module(self, module_basename, module_name, parent_package):
"""
Create a new graph node for the module with the passed name under the parent package signified by the passed
graph node.
This method wraps the superclass method with support for pre-import module hooks. If such a hook exists for
this module (e.g., a script `PyInstaller.hooks.hook-{module_name}` containing a function
`pre_safe_import_module()`), that hook will be run _before_ the superclass method is called.
Pre-Safe-Import-Hooks are performed just *prior* to importing the module. When running the hook, the modules
parent package has already been imported and ti's `__path__` is set up. But the module is just about to be
imported.
See the superclass method for description of parameters and return value.
"""
# If this module has pre-safe import module hooks, run these first.
if module_name in self._hooks_pre_safe_import_module:
# For the absolute path of each such hook...
for hook in self._hooks_pre_safe_import_module[module_name]:
# Dynamically import this hook as a fabricated module.
logger.info('Processing pre-safe import module hook %s from %r.', module_name, hook.hook_filename)
hook_module_name = 'PyInstaller_hooks_pre_safe_import_module_' + module_name.replace('.', '_')
hook_module = importlib_load_source(hook_module_name, hook.hook_filename)
# Object communicating changes made by this hook back to us.
hook_api = PreSafeImportModuleAPI(
module_graph=self,
module_basename=module_basename,
module_name=module_name,
parent_package=parent_package,
)
# Run this hook, passed this object.
if not hasattr(hook_module, 'pre_safe_import_module'):
raise NameError('pre_safe_import_module() function not defined by hook %r.' % hook_module)
hook_module.pre_safe_import_module(hook_api)
# Respect method call changes requested by this hook.
module_basename = hook_api.module_basename
module_name = hook_api.module_name
# Prevent subsequent calls from rerunning these hooks.
del self._hooks_pre_safe_import_module[module_name]
# Call the superclass method.
return super()._safe_import_module(module_basename, module_name, parent_package)
def _find_module_path(self, fullname, module_name, search_dirs):
"""
Get a 3-tuple detailing the physical location of the module with the passed name if that module exists _or_
raise `ImportError` otherwise.
This method wraps the superclass method with support for pre-find module path hooks. If such a hook exists
for this module (e.g., a script `PyInstaller.hooks.hook-{module_name}` containing a function
`pre_find_module_path()`), that hook will be run _before_ the superclass method is called.
See superclass method for parameter and return value descriptions.
"""
# If this module has pre-find module path hooks, run these first.
if fullname in self._hooks_pre_find_module_path:
# For the absolute path of each such hook...
for hook in self._hooks_pre_find_module_path[fullname]:
# Dynamically import this hook as a fabricated module.
logger.info('Processing pre-find module path hook %s from %r.', fullname, hook.hook_filename)
hook_fullname = 'PyInstaller_hooks_pre_find_module_path_' + fullname.replace('.', '_')
hook_module = importlib_load_source(hook_fullname, hook.hook_filename)
# Object communicating changes made by this hook back to us.
hook_api = PreFindModulePathAPI(
module_graph=self,
module_name=fullname,
search_dirs=search_dirs,
)
# Run this hook, passed this object.
if not hasattr(hook_module, 'pre_find_module_path'):
raise NameError('pre_find_module_path() function not defined by hook %r.' % hook_module)
hook_module.pre_find_module_path(hook_api)
# Respect method call changes requested by this hook.
search_dirs = hook_api.search_dirs
# Prevent subsequent calls from rerunning these hooks.
del self._hooks_pre_find_module_path[fullname]
# Call the superclass method.
return super()._find_module_path(fullname, module_name, search_dirs)
def get_code_objects(self):
"""
Get code objects from ModuleGraph for pure Python modules. This allows to avoid writing .pyc/pyo files to hdd
at later stage.
:return: Dict with module name and code object.
"""
code_dict = {}
mod_types = PURE_PYTHON_MODULE_TYPES
for node in self.iter_graph(start=self._top_script_node):
# TODO This is terrible. To allow subclassing, types should never be directly compared. Use isinstance()
# instead, which is safer, simpler, and accepts sets. Most other calls to type() in the codebase should also
# be refactored to call isinstance() instead.
# get node type e.g. Script
mg_type = type(node).__name__
if mg_type in mod_types:
if node.code:
code_dict[node.identifier] = node.code
return code_dict
def _make_toc(self, typecode=None):
"""
Return the name, path and type of selected nodes as a TOC. The selection is determined by the given list
of PyInstaller TOC typecodes. If that list is empty we return the complete flattened graph as a TOC with the
ModuleGraph note types in place of typecodes -- meant for debugging only. Normally we return ModuleGraph
nodes whose types map to the requested PyInstaller typecode(s) as indicated in the MODULE_TYPES_TO_TOC_DICT.
We use the ModuleGraph (really, ObjectGraph) flatten() method to scan all the nodes. This is patterned after
ModuleGraph.report().
"""
# Construct regular expression for matching modules that should be excluded because they are bundled in
# base_library.zip.
#
# This expression matches the base module name, optionally followed by a period and then any number of
# characters. This matches the module name and the fully qualified names of any of its submodules.
regex_str = '(' + '|'.join(PY3_BASE_MODULES) + r')(\.|$)'
module_filter = re.compile(regex_str)
toc = list()
for node in self.iter_graph(start=self._top_script_node):
# Skip modules that are in base_library.zip.
if module_filter.match(node.identifier):
continue
entry = self._node_to_toc(node, typecode)
# Append the entry. We do not check for duplicates here; the TOC normalization is left to caller.
# However, as entries are obtained from modulegraph, there should not be any duplicates at this stage.
if entry is not None:
toc.append(entry)
return toc
def make_pure_toc(self):
"""
Return all pure Python modules formatted as TOC.
"""
# PyInstaller should handle special module types without code object.
return self._make_toc(PURE_PYTHON_MODULE_TYPES)
def make_binaries_toc(self):
"""
Return all binary Python modules formatted as TOC.
"""
return self._make_toc(BINARY_MODULE_TYPES)
def make_missing_toc(self):
"""
Return all MISSING Python modules formatted as TOC.
"""
return self._make_toc(BAD_MODULE_TYPES)
@staticmethod
def _node_to_toc(node, typecode=None):
# TODO This is terrible. Everything in Python has a type. It is nonsensical to even speak of "nodes [that] are
# not typed." How would that even occur? After all, even "None" has a type! (It is "NoneType", for the curious.)
# Remove this, please.
# Get node type, e.g., Script
mg_type = type(node).__name__
assert mg_type is not None
if typecode and mg_type not in typecode:
# Type is not a to be selected one, skip this one
return None
# Extract the identifier and a path if any.
if mg_type == 'Script':
# for Script nodes only, identifier is a whole path
(name, ext) = os.path.splitext(node.filename)
name = os.path.basename(name)
elif mg_type == 'ExtensionPackage':
# Package with __init__ module being an extension module. This needs to end up as e.g. 'mypkg/__init__.so'.
# Convert the packages name ('mypkg') into the module name ('mypkg.__init__') *here* to keep special cases
# away elsewhere (where the module name is converted to a filename).
name = node.identifier + ".__init__"
else:
name = node.identifier
path = node.filename if node.filename is not None else ''
# Ensure name is really 'str'. Module graph might return object type 'modulegraph.Alias' which inherits fromm
# 'str'. But 'marshal.dumps()' function is able to marshal only 'str'. Otherwise on Windows PyInstaller might
# fail with message like:
# ValueError: unmarshallable object
name = str(name)
# Translate to the corresponding TOC typecode.
toc_type = MODULE_TYPES_TO_TOC_DICT[mg_type]
return name, path, toc_type
def nodes_to_toc(self, nodes):
"""
Given a list of nodes, create a TOC representing those nodes. This is mainly used to initialize a TOC of
scripts with the ones that are runtime hooks. The process is almost the same as _make_toc(), but the caller
guarantees the nodes are valid, so minimal checking.
"""
return [self._node_to_toc(node) for node in nodes]
# Return true if the named item is in the graph as a BuiltinModule node. The passed name is a basename.
def is_a_builtin(self, name):
node = self.find_node(name)
if node is None:
return False
return type(node).__name__ == 'BuiltinModule'
def get_importers(self, name):
"""
List all modules importing the module with the passed name.
Returns a list of (identifier, DependencyIinfo)-tuples. If the names module has not yet been imported, this
method returns an empty list.
Parameters
----------
name : str
Fully-qualified name of the module to be examined.
Returns
----------
list
List of (fully-qualified names, DependencyIinfo)-tuples of all modules importing the module with the passed
fully-qualified name.
"""
def get_importer_edge_data(importer):
edge = self.graph.edge_by_node(importer, name)
# edge might be None in case an AliasModule was added.
if edge is not None:
return self.graph.edge_data(edge)
node = self.find_node(name)
if node is None:
return []
_, importers = self.get_edges(node)
importers = (importer.identifier for importer in importers if importer is not None)
return [(importer, get_importer_edge_data(importer)) for importer in importers]
# TODO: create a class from this function.
def analyze_runtime_hooks(self, custom_runhooks):
"""
Analyze custom run-time hooks and run-time hooks implied by found modules.
:return : list of Graph nodes.
"""
rthooks_nodes = []
logger.info('Analyzing run-time hooks ...')
# Process custom runtime hooks (from --runtime-hook options). The runtime hooks are order dependent. First hooks
# in the list are executed first. Put their graph nodes at the head of the priority_scripts list Pyinstaller
# defined rthooks and thus they are executed first.
if custom_runhooks:
for hook_file in custom_runhooks:
logger.info("Including custom run-time hook %r", hook_file)
hook_file = os.path.abspath(hook_file)
# Not using "try" here because the path is supposed to exist, if it does not, the raised error will
# explain.
rthooks_nodes.append(self.add_script(hook_file))
# Find runtime hooks that are implied by packages already imported. Get a temporary TOC listing all the scripts
# and packages graphed so far. Assuming that runtime hooks apply only to modules and packages.
temp_toc = self._make_toc(VALID_MODULE_TYPES)
for (mod_name, path, typecode) in temp_toc:
# Look if there is any run-time hook for given module.
if mod_name in self._available_rthooks:
# There could be several run-time hooks for a module.
for abs_path in self._available_rthooks[mod_name]:
logger.info("Including run-time hook %r", abs_path)
rthooks_nodes.append(self.add_script(abs_path))
return rthooks_nodes
def add_hiddenimports(self, module_list):
"""
Add hidden imports that are either supplied as CLI option --hidden-import=MODULENAME or as dependencies from
some PyInstaller features when enabled (e.g., crypto feature).
"""
assert self._top_script_node is not None
# Analyze the script's hidden imports (named on the command line).
for modnm in module_list:
node = self.find_node(modnm)
if node is not None:
logger.debug('Hidden import %r already found', modnm)
else:
logger.info("Analyzing hidden import %r", modnm)
# ModuleGraph throws ImportError if import not found.
try:
nodes = self.import_hook(modnm)
assert len(nodes) == 1
node = nodes[0]
except ImportError:
logger.error("Hidden import %r not found", modnm)
continue
# Create references from the top script to the hidden import, even if found otherwise. Do not waste time
# checking whether it is actually added by this (test-) script.
self.add_edge(self._top_script_node, node)
def get_code_using(self, module: str) -> dict:
"""
Find modules that import a given **module**.
"""
co_dict = {}
pure_python_module_types = PURE_PYTHON_MODULE_TYPES | {
'Script',
}
node = self.find_node(module)
if node:
referrers = self.incoming(node)
for r in referrers:
# Under python 3.7 and earlier, if `module` is added to hidden imports, one of referrers ends up being
# None, causing #3825. Work around it.
if r is None:
continue
# Ensure that modulegraph objects have 'code' attribute.
if type(r).__name__ not in pure_python_module_types:
continue
identifier = r.identifier
if identifier == module or identifier.startswith(module + '.'):
# Skip self references or references from `modules`'s own submodules.
continue
# The code object may be None if referrer ends up shadowed by eponymous directory that ends up treated
# as a namespace package. See #6873 for an example.
if r.code is None:
continue
co_dict[r.identifier] = r.code
return co_dict
def metadata_required(self) -> set:
"""
Collect metadata for all packages that appear to need it.
"""
# List every function that we can think of which is known to require metadata.
out = set()
out |= self._metadata_from(
"pkg_resources",
["get_distribution"], # Requires metadata for one distribution.
["require"], # Requires metadata for all dependencies.
)
# importlib.metadata is often `import ... as` aliased to importlib_metadata for compatibility with < py38.
# Assume both are valid.
for importlib_metadata in ["importlib.metadata", "importlib_metadata"]:
out |= self._metadata_from(
importlib_metadata,
["metadata", "distribution", "version", "files", "requires"],
[],
)
return out
def _metadata_from(self, package, methods=(), recursive_methods=()) -> set:
"""
Collect metadata whose requirements are implied by given function names.
Args:
package:
The module name that must be imported in a source file to trigger the search.
methods:
Function names from **package** which take a distribution name as an argument and imply that metadata
is required for that distribution.
recursive_methods:
Like **methods** but also implies that a distribution's dependencies' metadata must be collected too.
Returns:
Required metadata in hook data ``(source, dest)`` format as returned by
:func:`PyInstaller.utils.hooks.copy_metadata()`.
Scan all source code to be included for usage of particular *key* functions which imply that that code will
require metadata for some distribution (which may not be its own) at runtime. In the case of a match,
collect the required metadata.
"""
from PyInstaller.utils.hooks import copy_metadata
from PyInstaller.compat import importlib_metadata
# Generate sets of possible function names to search for.
need_metadata = set()
need_recursive_metadata = set()
for method in methods:
need_metadata.update(bytecode.any_alias(package + "." + method))
for method in recursive_methods:
need_recursive_metadata.update(bytecode.any_alias(package + "." + method))
out = set()
for name, code in self.get_code_using(package).items():
for calls in bytecode.recursive_function_calls(code).values():
for function_name, args in calls:
# Only consider function calls taking one argument.
if len(args) != 1:
continue
package = args[0]
try:
if function_name in need_metadata:
out.update(copy_metadata(package))
elif function_name in need_recursive_metadata:
out.update(copy_metadata(package, recursive=True))
except importlib_metadata.PackageNotFoundError:
# Currently, we opt to silently skip over missing metadata.
continue
return out
def get_collected_packages(self) -> list:
"""
Return the list of collected python packages.
"""
# `node.identifier` might be an instance of `modulegraph.Alias`, hence explicit conversion to `str`.
return [
str(node.identifier) for node in self.iter_graph(start=self._top_script_node)
if type(node).__name__ == 'Package'
]
def make_hook_binaries_toc(self) -> list:
"""
Return the TOC list of binaries collected by hooks."
"""
toc = []
for node in self.iter_graph(start=self._top_script_node):
module_name = str(node.identifier)
for dest_name, src_name in self._additional_files_cache.binaries(module_name):
toc.append((dest_name, src_name, 'BINARY'))
return toc
def make_hook_datas_toc(self) -> list:
"""
Return the TOC list of data files collected by hooks."
"""
toc = []
for node in self.iter_graph(start=self._top_script_node):
module_name = str(node.identifier)
for dest_name, src_name in self._additional_files_cache.datas(module_name):
toc.append((dest_name, src_name, 'DATA'))
return toc
_cached_module_graph_ = None
def initialize_modgraph(excludes=(), user_hook_dirs=()):
"""
Create the cached module graph.
This function might appear weird but is necessary for speeding up test runtime because it allows caching basic
ModuleGraph object that gets created for 'base_library.zip'.
Parameters
----------
excludes : list
List of the fully-qualified names of all modules to be "excluded" and hence _not_ frozen into the executable.
user_hook_dirs : list
List of the absolute paths of all directories containing user-defined hooks for the current application or
`None` if no such directories were specified.
Returns
----------
PyiModuleGraph
Module graph with core dependencies.
"""
# Normalize parameters to ensure tuples and make comparison work.
user_hook_dirs = user_hook_dirs or ()
excludes = excludes or ()
# Ensure that __main__ is always excluded from the modulegraph, to prevent accidentally pulling PyInstaller itself
# into the modulegraph. This seems to happen on Windows, because modulegraph is able to resolve `__main__` as
# `.../PyInstaller.exe/__main__.py` and analyze it. The `__main__` has a different meaning during analysis compared
# to the program run-time, when it refers to the program's entry-point (which would always be part of the
# modulegraph anyway, by virtue of being the starting point of the analysis).
if "__main__" not in excludes:
excludes += ("__main__",)
# If there is a graph cached with the same excludes, reuse it. See ``PyiModulegraph._reset()`` for what is
# reset. This cache is used primarily to speed up the test-suite. Fixture `pyi_modgraph` calls this function with
# empty excludes, creating a graph suitable for the huge majority of tests.
global _cached_module_graph_
if _cached_module_graph_ and _cached_module_graph_._excludes == excludes:
logger.info('Reusing cached module dependency graph...')
graph = deepcopy(_cached_module_graph_)
graph._reset(user_hook_dirs)
return graph
logger.info('Initializing module dependency graph...')
# Construct the initial module graph by analyzing all import statements.
graph = PyiModuleGraph(
HOMEPATH,
excludes=excludes,
# get_implies() are hidden imports known by modulgraph.
implies=get_implies(),
user_hook_dirs=user_hook_dirs,
)
if not _cached_module_graph_:
# Only cache the first graph, see above for explanation.
logger.info('Caching module dependency graph...')
# cache a deep copy of the graph
_cached_module_graph_ = deepcopy(graph)
# Clear data which does not need to be copied from the cached graph since it will be reset by
# ``PyiModulegraph._reset()`` anyway.
_cached_module_graph_._hooks = None
_cached_module_graph_._hooks_pre_safe_import_module = None
_cached_module_graph_._hooks_pre_find_module_path = None
return graph
def get_bootstrap_modules():
"""
Get TOC with the bootstrapping modules and their dependencies.
:return: TOC with modules
"""
# Import 'struct' modules to get real paths to module file names.
mod_struct = __import__('struct')
# Basic modules necessary for the bootstrap process.
loader_mods = list()
loaderpath = os.path.join(HOMEPATH, 'PyInstaller', 'loader')
# On some platforms (Windows, Debian/Ubuntu) '_struct' and zlib modules are built-in modules (linked statically)
# and thus does not have attribute __file__. 'struct' module is required for reading Python bytecode from
# executable. 'zlib' is required to decompress this bytecode.
for mod_name in ['_struct', 'zlib']:
mod = __import__(mod_name) # C extension.
if hasattr(mod, '__file__'):
mod_file = os.path.abspath(mod.__file__)
if os.path.basename(os.path.dirname(mod_file)) == 'lib-dynload':
# Divert extensions originating from python's lib-dynload directory, to match behavior of #5604.
mod_name = os.path.join('lib-dynload', mod_name)
loader_mods.append(add_suffix_to_extension(mod_name, mod_file, 'EXTENSION'))
loader_mods.append(('struct', os.path.abspath(mod_struct.__file__), 'PYMODULE'))
# Loader/bootstrap modules.
# NOTE: These modules should be kept simple without any complicated dependencies.
loader_mods += [
('pyimod01_archive', os.path.join(loaderpath, 'pyimod01_archive.py'), 'PYMODULE'),
('pyimod02_importers', os.path.join(loaderpath, 'pyimod02_importers.py'), 'PYMODULE'),
('pyimod03_ctypes', os.path.join(loaderpath, 'pyimod03_ctypes.py'), 'PYMODULE'),
]
if is_win:
loader_mods.append(('pyimod04_pywin32', os.path.join(loaderpath, 'pyimod04_pywin32.py'), 'PYMODULE'))
# The bootstrap script
loader_mods.append(('pyiboot01_bootstrap', os.path.join(loaderpath, 'pyiboot01_bootstrap.py'), 'PYSOURCE'))
return loader_mods

View File

@ -0,0 +1,909 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Find external dependencies of binary libraries.
"""
import ctypes.util
import os
import pathlib
import re
import sys
import sysconfig
import subprocess
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import dylib, utils
from PyInstaller.utils.win32 import winutils
if compat.is_darwin:
import PyInstaller.utils.osx as osxutils
logger = logging.getLogger(__name__)
_exe_machine_type = None
if compat.is_win:
_exe_machine_type = winutils.get_pe_file_machine_type(compat.python_executable)
#- High-level binary dependency analysis
def _get_paths_for_parent_directory_preservation():
"""
Return list of paths that serve as prefixes for parent-directory preservation of collected binaries and/or
shared libraries. If a binary is collected from a location that starts with a path from this list, the relative
directory structure is preserved within the frozen application bundle; otherwise, the binary is collected to the
frozen application's top-level directory.
"""
# Use only site-packages paths. We have no control over contents of `sys.path`, so using all paths from that may
# lead to unintended behavior in corner cases. For example, if `sys.path` contained the drive root (see #7028),
# all paths that do not match some other sub-path rooted in that drive will end up recognized as relative to the
# drive root. In such case, any DLL collected from `c:\Windows\system32` will be collected into `Windows\system32`
# sub-directory; ucrt DLLs collected from MSVC or Windows SDK installed in `c:\Program Files\...` will end up
# collected into `Program Files\...` subdirectory; etc.
#
# On the other hand, the DLL parent directory preservation is primarily aimed at packages installed via PyPI
# wheels, which are typically installed into site-packages. Therefore, limiting the directory preservation for
# shared libraries collected from site-packages should do the trick, and should be reasonably safe.
import site
orig_paths = site.getsitepackages()
orig_paths.append(site.getusersitepackages())
# Explicitly excluded paths. `site.getsitepackages` seems to include `sys.prefix`, which we need to exclude, to
# avoid issue swith DLLs in its sub-directories. We need both resolved and unresolved variant to handle cases
# where `base_prefix` itself is a symbolic link (e.g., `scoop`-installed python on Windows, see #8023).
excluded_paths = {
pathlib.Path(sys.base_prefix),
pathlib.Path(sys.base_prefix).resolve(),
pathlib.Path(sys.prefix),
pathlib.Path(sys.prefix).resolve(),
}
# For each path in orig_paths, append a resolved variant. This helps with linux venv where we need to consider
# both `venv/lib/python3.11/site-packages` and `venv/lib/python3.11/site-packages` and `lib64` is a symlink
# to `lib`.
orig_paths += [pathlib.Path(path).resolve() for path in orig_paths]
paths = set()
for path in orig_paths:
if not path:
continue
path = pathlib.Path(path)
# Filter out non-directories (e.g., /path/to/python3x.zip) or non-existent paths
if not path.is_dir():
continue
# Filter out explicitly excluded paths
if path in excluded_paths:
continue
paths.add(path)
# Sort by length (in term of path components) to ensure match against the longest common prefix (for example, match
# /path/to/venv/lib/site-packages instead of /path/to/venv when both paths are in site paths).
paths = sorted(paths, key=lambda x: len(x.parents), reverse=True)
return paths
def _select_destination_directory(src_filename, parent_dir_preservation_paths):
# Check parent directory preservation paths
for parent_dir_preservation_path in parent_dir_preservation_paths:
if parent_dir_preservation_path in src_filename.parents:
# Collect into corresponding sub-directory.
return src_filename.relative_to(parent_dir_preservation_path)
# Collect into top-level directory.
return src_filename.name
def binary_dependency_analysis(binaries, search_paths=None):
"""
Perform binary dependency analysis on the given TOC list of collected binaries, by recursively scanning each binary
for linked dependencies (shared library imports). Returns new TOC list that contains both original entries and their
binary dependencies.
Additional search paths for dependencies' full path resolution may be supplied via optional argument.
"""
# Get all path prefixes for binaries' parent-directory preservation. For binaries collected from packages in (for
# example) site-packages directory, we should try to preserve the parent directory structure.
parent_dir_preservation_paths = _get_paths_for_parent_directory_preservation()
# Keep track of processed binaries and processed dependencies.
processed_binaries = set()
processed_dependencies = set()
# Keep track of unresolved dependencies, in order to defer the missing-library warnings until after everything has
# been processed. This allows us to suppress warnings for dependencies that end up being collected anyway; for
# details, see the end of this function.
missing_dependencies = []
# Populate output TOC with input binaries - this also serves as TODO list, as we iterate over it while appending
# new entries at the end.
output_toc = binaries[:]
for dest_name, src_name, typecode in output_toc:
# Do not process symbolic links (already present in input TOC list, or added during analysis below).
if typecode == 'SYMLINK':
continue
# Keep track of processed binaries, to avoid unnecessarily repeating analysis of the same file. Use pathlib.Path
# to avoid having to worry about case normalization.
src_path = pathlib.Path(src_name)
if src_path in processed_binaries:
continue
processed_binaries.add(src_path)
logger.debug("Analyzing binary %r", src_name)
# Analyze imports (linked dependencies)
for dep_name, dep_src_path in get_imports(src_name, search_paths):
logger.debug("Processing dependency, name: %r, resolved path: %r", dep_name, dep_src_path)
# Skip unresolved dependencies. Defer the missing-library warnings until after binary dependency analysis
# is complete.
if not dep_src_path:
missing_dependencies.append((dep_name, src_name))
continue
# Compare resolved dependency against global inclusion/exclusion rules.
if not dylib.include_library(dep_src_path):
logger.debug("Skipping dependency %r due to global exclusion rules.", dep_src_path)
continue
dep_src_path = pathlib.Path(dep_src_path) # Turn into pathlib.Path for subsequent processing
# Avoid processing this dependency if we have already processed it.
if dep_src_path in processed_dependencies:
logger.debug("Skipping dependency %r due to prior processing.", str(dep_src_path))
continue
processed_dependencies.add(dep_src_path)
# Try to preserve parent directory structure, if applicable.
# NOTE: do not resolve the source path, because on macOS and linux, it may be a versioned .so (e.g.,
# libsomething.so.1, pointing at libsomething.so.1.2.3), and we need to collect it under original name!
dep_dest_path = _select_destination_directory(dep_src_path, parent_dir_preservation_paths)
dep_dest_path = pathlib.PurePath(dep_dest_path) # Might be a str() if it is just a basename...
# If we are collecting library into top-level directory on macOS, check whether it comes from a
# .framework bundle. If it does, re-create the .framework bundle in the top-level directory
# instead.
if compat.is_darwin and dep_dest_path.parent == pathlib.PurePath('.'):
if osxutils.is_framework_bundle_lib(dep_src_path):
# dst_src_path is parent_path/Name.framework/Versions/Current/Name
framework_parent_path = dep_src_path.parent.parent.parent.parent
dep_dest_path = pathlib.PurePath(dep_src_path.relative_to(framework_parent_path))
logger.debug("Collecting dependency %r as %r.", str(dep_src_path), str(dep_dest_path))
output_toc.append((str(dep_dest_path), str(dep_src_path), 'BINARY'))
# On non-Windows, if we are not collecting the binary into application's top-level directory ('.'),
# add a symbolic link from top-level directory to the actual location. This is to accommodate
# LD_LIBRARY_PATH being set to the top-level application directory on linux (although library search
# should be mostly done via rpaths, so this might be redundant) and to accommodate library path
# rewriting on macOS, which assumes that the library was collected into top-level directory.
if not compat.is_win and dep_dest_path.parent != pathlib.PurePath('.'):
logger.debug("Adding symbolic link from %r to top-level application directory.", str(dep_dest_path))
output_toc.append((str(dep_dest_path.name), str(dep_dest_path), 'SYMLINK'))
# Display warnings about missing dependencies
seen_binaries = set([
os.path.normcase(os.path.basename(src_name)) for dest_name, src_name, typecode in output_toc
if typecode != 'SYMLINK'
])
for dependency_name, referring_binary in missing_dependencies:
# Ignore libraries that we would not collect in the first place.
if not dylib.include_library(dependency_name):
continue
# Apply global warning suppression rules.
if not dylib.warn_missing_lib(dependency_name):
continue
# If the binary with a matching basename happens to be among the discovered binaries, suppress the message as
# well. This might happen either because the library was collected by some other mechanism (for example, via
# hook, or supplied by the user), or because it was discovered during the analysis of another binary (which,
# for example, had properly set run-paths on Linux/macOS or was located next to that other analyzed binary on
# Windows).
if os.path.normcase(os.path.basename(dependency_name)) in seen_binaries:
continue
logger.warning("Library not found: could not resolve %r, dependency of %r.", dependency_name, referring_binary)
return output_toc
#- Low-level import analysis
def get_imports(filename, search_paths=None):
"""
Analyze the given binary file (shared library or executable), and obtain the list of shared libraries it imports
(i.e., link-time dependencies).
Returns set of tuples (name, fullpath). The name component is the referenced name, and on macOS, may not be just
a base name. If the library's full path cannot be resolved, fullpath element is None.
Additional list of search paths may be specified via `search_paths`, to be used as a fall-back when the
platform-specific resolution mechanism fails to resolve a library fullpath.
"""
if compat.is_win:
if filename.lower().endswith(".manifest"):
return []
return _get_imports_pefile(filename, search_paths)
elif compat.is_darwin:
return _get_imports_macholib(filename, search_paths)
else:
return _get_imports_ldd(filename, search_paths)
def _get_imports_pefile(filename, search_paths):
"""
Windows-specific helper for `get_imports`, which uses the `pefile` library to walk through PE header.
"""
import pefile
output = set()
# By default, pefile library parses all PE information. We are only interested in the list of dependent dlls.
# Performance is improved by reading only needed information. https://code.google.com/p/pefile/wiki/UsageExamples
pe = pefile.PE(filename, fast_load=True)
pe.parse_data_directories(
directories=[
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
],
forwarded_exports_only=True,
import_dllnames_only=True,
)
# If a library has no binary dependencies, pe.DIRECTORY_ENTRY_IMPORT does not exist.
for entry in getattr(pe, 'DIRECTORY_ENTRY_IMPORT', []):
dll_str = entry.dll.decode('utf-8')
output.add(dll_str)
# We must also read the exports table to find forwarded symbols:
# http://blogs.msdn.com/b/oldnewthing/archive/2006/07/19/671238.aspx
exported_symbols = getattr(pe, 'DIRECTORY_ENTRY_EXPORT', None)
if exported_symbols:
for symbol in exported_symbols.symbols:
if symbol.forwarder is not None:
# symbol.forwarder is a bytes object. Convert it to a string.
forwarder = symbol.forwarder.decode('utf-8')
# symbol.forwarder is for example 'KERNEL32.EnterCriticalSection'
dll = forwarder.split('.')[0]
output.add(dll + ".dll")
pe.close()
# Attempt to resolve full paths to referenced DLLs. Always add the input binary's parent directory to the search
# paths.
search_paths = [os.path.dirname(filename)] + (search_paths or [])
output = {(lib, resolve_library_path(lib, search_paths)) for lib in output}
return output
def _get_imports_ldd(filename, search_paths):
"""
Helper for `get_imports`, which uses `ldd` to analyze shared libraries. Used on Linux and other POSIX-like platforms
(with exception of macOS).
"""
output = set()
# Output of ldd varies between platforms...
if compat.is_aix:
# Match libs of the form
# 'archivelib.a(objectmember.so/.o)'
# or
# 'sharedlib.so'
# Will not match the fake lib '/unix'
LDD_PATTERN = re.compile(r"^\s*(((?P<libarchive>(.*\.a))(?P<objectmember>\(.*\)))|((?P<libshared>(.*\.so))))$")
elif compat.is_hpux:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so => /usr/local/lib/hpux32/libpython2.7.so'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
elif compat.is_solar:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so.1.0 => /usr/local/lib/libpython2.7.so.1.0'
# Will not match the platform specific libs starting with '/platform'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
else:
LDD_PATTERN = re.compile(r"\s*(.*?)\s+=>\s+(.*?)\s+\(.*\)")
p = subprocess.run(
['ldd', filename],
stdin=subprocess.DEVNULL,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
encoding='utf-8',
)
for line in p.stderr.splitlines():
if not line:
continue
# Python extensions (including stdlib ones) are not linked against python.so but rely on Python's symbols having
# already been loaded into symbol space at runtime. musl's ldd issues a series of harmless warnings to stderr
# telling us that those symbols are unfindable. These should be suppressed.
elif line.startswith("Error relocating ") and line.endswith(" symbol not found"):
continue
# Propagate any other warnings it might have.
print(line, file=sys.stderr)
for line in p.stdout.splitlines():
name = None # Referenced name
lib = None # Resolved library path
m = LDD_PATTERN.search(line)
if m:
if compat.is_aix:
libarchive = m.group('libarchive')
if libarchive:
# We matched an archive lib with a request for a particular embedded shared object.
# 'archivelib.a(objectmember.so/.o)'
lib = libarchive
name = os.path.basename(lib) + m.group('objectmember')
else:
# We matched a stand-alone shared library.
# 'sharedlib.so'
lib = m.group('libshared')
name = os.path.basename(lib)
elif compat.is_hpux:
name, lib = m.group(1), m.group(2)
else:
name, lib = m.group(1), m.group(2)
if name[:10] in ('linux-gate', 'linux-vdso'):
# linux-gate is a fake library which does not exist and should be ignored. See also:
# http://www.trilithium.com/johan/2005/08/linux-gate/
continue
if compat.is_cygwin:
# exclude Windows system library
if lib.lower().startswith('/cygdrive/c/windows/system'):
continue
# Reset library path if it does not exist
if not os.path.exists(lib):
lib = None
elif line.endswith("not found"):
# On glibc-based linux distributions, missing libraries are marked with name.so => not found
tokens = line.split('=>')
if len(tokens) != 2:
continue
name = tokens[0].strip()
lib = None
else:
# TODO: should we warn about unprocessed lines?
continue
# Fall back to searching the supplied search paths, if any.
if not lib:
lib = _resolve_library_path_in_search_paths(
os.path.basename(name), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if lib:
lib = os.path.normpath(lib)
# Return referenced name as-is instead of computing a basename, to provide additional context when library
# cannot be resolved.
output.add((name, lib))
return output
def _get_imports_macholib(filename, search_paths):
"""
macOS-specific helper for `get_imports`, which uses `macholib` to analyze library load commands in Mach-O headers.
"""
from macholib.dyld import dyld_find
from macholib.mach_o import LC_RPATH
from macholib.MachO import MachO
output = set()
referenced_libs = set() # Libraries referenced in Mach-O headers.
# Parent directory of the input binary and parent directory of python executable, used to substitute @loader_path
# and @executable_path. The MacOS dylib loader (dyld) fully resolves the symbolic links when using @loader_path
# and @executable_path references, so we need to do the same using `os.path.realpath`.
bin_path = os.path.dirname(os.path.realpath(filename))
python_bin_path = os.path.dirname(os.path.realpath(sys.executable))
# Walk through Mach-O headers, and collect all referenced libraries.
m = MachO(filename)
for header in m.headers:
for idx, name, lib in header.walkRelocatables():
referenced_libs.add(lib)
# Find LC_RPATH commands to collect rpaths. macholib does not handle @rpath, so we need to handle run paths
# ourselves.
run_paths = set()
for header in m.headers:
for command in header.commands:
# A command is a tuple like:
# (<macholib.mach_o.load_command object at 0x>,
# <macholib.mach_o.rpath_command object at 0x>,
# '../lib\x00\x00')
cmd_type = command[0].cmd
if cmd_type == LC_RPATH:
rpath = command[2].decode('utf-8')
# Remove trailing '\x00' characters. E.g., '../lib\x00\x00'
rpath = rpath.rstrip('\x00')
# If run path starts with @, ensure it starts with either @loader_path or @executable_path. We cannot
# process anything else.
if rpath.startswith("@") and not rpath.startswith(("@executable_path", "@loader_path")):
logger.warning("Unsupported rpath format %r found in binary %r - ignoring...", rpath, filename)
continue
run_paths.add(rpath)
# For distributions like Anaconda, all of the dylibs are stored in the lib directory of the Python distribution, not
# alongside of the .so's in each module's subdirectory. Usually, libraries using @rpath to reference their
# dependencies also set up their run-paths via LC_RPATH commands. However, they are not strictly required to do so,
# because run-paths are inherited from the process within which the libraries are loaded. Therefore, if the python
# executable uses an LC_RPATH command to set up run-path that resolves the shared lib directory (for example,
# `@loader_path/../lib` in case of the Anaconda python), all libraries loaded within the python process are able
# to resolve the shared libraries within the environment's shared lib directory without using LC_RPATH commands
# themselves.
#
# Our analysis does not account for inherited run-paths, and we attempt to work around this limitation by
# registering the following fall-back run-path.
run_paths.add(os.path.join(compat.base_prefix, 'lib'))
def _resolve_using_loader_path(lib, bin_path, python_bin_path):
# macholib does not support @loader_path, so replace it with @executable_path. Strictly speaking, @loader_path
# should be anchored to parent directory of analyzed binary (`bin_path`), while @executable_path should be
# anchored to the parent directory of the process' executable. Typically, this would be python executable
# (`python_bin_path`), unless we are analyzing a collected 3rd party executable. In that case, `bin_path`
# is correct option. So we first try resolving using `bin_path`, and then fall back to `python_bin_path`.
# This does not account for transitive run paths of higher-order dependencies, but there is only so much we
# can do here...
if lib.startswith('@loader_path'):
lib = lib.replace('@loader_path', '@executable_path')
try:
# Try resolving with binary's path first...
return dyld_find(lib, executable_path=bin_path)
except ValueError:
# ... and fall-back to resolving with python executable's path
try:
return dyld_find(lib, executable_path=python_bin_path)
except ValueError:
return None
def _resolve_using_path(lib):
try:
return dyld_find(lib)
except ValueError:
return None
# Try to resolve full path of the referenced libraries.
for referenced_lib in referenced_libs:
resolved_lib = None
# If path starts with @rpath, we have to handle it ourselves.
if referenced_lib.startswith('@rpath'):
lib = os.path.join(*referenced_lib.split(os.sep)[1:]) # Remove the @rpath/ prefix
# Try all run paths.
for run_path in run_paths:
# Join the path.
lib_path = os.path.join(run_path, lib)
if lib_path.startswith(("@executable_path", "@loader_path")):
# Run path starts with @executable_path or @loader_path.
lib_path = _resolve_using_loader_path(lib_path, bin_path, python_bin_path)
else:
# If run path was relative, anchor it to binary's location.
if not os.path.isabs(lib_path):
os.path.join(bin_path, lib_path)
lib_path = _resolve_using_path(lib_path)
if lib_path and os.path.exists(lib_path):
resolved_lib = lib_path
break
else:
if referenced_lib.startswith(("@executable_path", "@loader_path")):
resolved_lib = _resolve_using_loader_path(referenced_lib, bin_path, python_bin_path)
else:
resolved_lib = _resolve_using_path(referenced_lib)
# Fall back to searching the supplied search paths, if any.
if not resolved_lib:
resolved_lib = _resolve_library_path_in_search_paths(
os.path.basename(referenced_lib), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if resolved_lib:
resolved_lib = os.path.normpath(resolved_lib)
# Return referenced library name as-is instead of computing a basename. Full referenced name carries additional
# information that might be useful for the caller to determine how to deal with unresolved library (e.g., ignore
# unresolved libraries that are supposed to be located in system-wide directories).
output.add((referenced_lib, resolved_lib))
return output
#- Library full path resolution
def resolve_library_path(name, search_paths=None):
"""
Given a library name, attempt to resolve full path to that library. The search for library is done via
platform-specific mechanism and fall back to optionally-provided list of search paths. Returns None if library
cannot be resolved. If give library name is already an absolute path, the given path is returned without any
processing.
"""
# No-op if path is already absolute.
if os.path.isabs(name):
return name
if compat.is_unix:
# Use platform-specific helper.
fullpath = _resolve_library_path_unix(name)
if fullpath:
return fullpath
# Fall back to searching the supplied search paths, if any
return _resolve_library_path_in_search_paths(name, search_paths)
elif compat.is_win:
# Try the caller-supplied search paths, if any.
fullpath = _resolve_library_path_in_search_paths(name, search_paths)
if fullpath:
return fullpath
# Fall back to default Windows search paths, using the PATH environment variable (which should also include
# the system paths, such as c:\windows and c:\windows\system32)
win_search_paths = [path for path in compat.getenv('PATH', '').split(os.pathsep) if path]
return _resolve_library_path_in_search_paths(name, win_search_paths)
else:
return ctypes.util.find_library(name)
return None
# Compatibility aliases for hooks from contributed hooks repository. All of these now point to the high-level
# `resolve_library_path`.
findLibrary = resolve_library_path
findSystemLibrary = resolve_library_path
def _resolve_library_path_in_search_paths(name, search_paths=None):
"""
Low-level helper for resolving given library name to full path in given list of search paths.
"""
for search_path in search_paths or []:
fullpath = os.path.join(search_path, name)
if not os.path.isfile(fullpath):
continue
# On Windows, ensure that architecture matches that of running python interpreter.
if compat.is_win:
try:
dll_machine_type = winutils.get_pe_file_machine_type(fullpath)
except Exception:
# A search path might contain a DLL that we cannot analyze; for example, a stub file. Skip over.
continue
if dll_machine_type != _exe_machine_type:
continue
return os.path.normpath(fullpath)
return None
def _resolve_library_path_unix(name):
"""
UNIX-specific helper for resolving library path.
Emulates the algorithm used by dlopen. `name` must include the prefix, e.g., ``libpython2.4.so``.
"""
assert compat.is_unix, "Current implementation for Unix only (Linux, Solaris, AIX, FreeBSD)"
# Look in the LD_LIBRARY_PATH according to platform.
if compat.is_aix:
lp = compat.getenv('LIBPATH', '')
elif compat.is_darwin:
lp = compat.getenv('DYLD_LIBRARY_PATH', '')
else:
lp = compat.getenv('LD_LIBRARY_PATH', '')
lib = _which_library(name, filter(None, lp.split(os.pathsep)))
# Look in /etc/ld.so.cache
# Solaris does not have /sbin/ldconfig. Just check if this file exists.
if lib is None:
utils.load_ldconfig_cache()
lib = utils.LDCONFIG_CACHE.get(name)
if lib:
assert os.path.isfile(lib)
# Look in the known safe paths.
if lib is None:
# Architecture independent locations.
paths = ['/lib', '/usr/lib']
# Architecture dependent locations.
if compat.architecture == '32bit':
paths.extend(['/lib32', '/usr/lib32'])
else:
paths.extend(['/lib64', '/usr/lib64'])
# Machine dependent locations.
if compat.machine == 'intel':
if compat.architecture == '32bit':
paths.extend(['/usr/lib/i386-linux-gnu'])
else:
paths.extend(['/usr/lib/x86_64-linux-gnu'])
# On Debian/Ubuntu /usr/bin/python is linked statically with libpython. Newer Debian/Ubuntu with multiarch
# support puts the libpythonX.Y.so in paths like /usr/lib/i386-linux-gnu/. Try to query the arch-specific
# sub-directory, if available.
arch_subdir = sysconfig.get_config_var('multiarchsubdir')
if arch_subdir:
arch_subdir = os.path.basename(arch_subdir)
paths.append(os.path.join('/usr/lib', arch_subdir))
else:
logger.debug('Multiarch directory not detected.')
# Termux (a Ubuntu like subsystem for Android) has an additional libraries directory.
if os.path.isdir('/data/data/com.termux/files/usr/lib'):
paths.append('/data/data/com.termux/files/usr/lib')
if compat.is_aix:
paths.append('/opt/freeware/lib')
elif compat.is_hpux:
if compat.architecture == '32bit':
paths.append('/usr/local/lib/hpux32')
else:
paths.append('/usr/local/lib/hpux64')
elif compat.is_freebsd or compat.is_openbsd:
paths.append('/usr/local/lib')
lib = _which_library(name, paths)
# Give up :(
if lib is None:
return None
# Resolve the file name into the soname
if compat.is_freebsd or compat.is_aix or compat.is_openbsd:
# On FreeBSD objdump does not show SONAME, and on AIX objdump does not exist, so we just return the lib we
# have found.
return lib
else:
dir = os.path.dirname(lib)
return os.path.join(dir, _get_so_name(lib))
def _which_library(name, dirs):
"""
Search for a shared library in a list of directories.
Args:
name:
The library name including the `lib` prefix but excluding any `.so` suffix.
dirs:
An iterable of folders to search in.
Returns:
The path to the library if found or None otherwise.
"""
matcher = _library_matcher(name)
for path in filter(os.path.exists, dirs):
for _path in os.listdir(path):
if matcher(_path):
return os.path.join(path, _path)
def _library_matcher(name):
"""
Create a callable that matches libraries if **name** is a valid library prefix for input library full names.
"""
return re.compile(name + r"[0-9]*\.").match
def _get_so_name(filename):
"""
Return the soname of a library.
Soname is useful when there are multiple symplinks to one library.
"""
# TODO verify that objdump works on other unixes and not Linux only.
cmd = ["objdump", "-p", filename]
pattern = r'\s+SONAME\s+([^\s]+)'
if compat.is_solar:
cmd = ["elfdump", "-d", filename]
pattern = r'\s+SONAME\s+[^\s]+\s+([^\s]+)'
m = re.search(pattern, compat.exec_command(*cmd))
return m.group(1)
#- Python shared library search
def get_python_library_path():
"""
Find dynamic Python library that will be bundled with frozen executable.
NOTE: This is a fallback option when the Python executable is likely statically linked with the Python library and
we need to search more for it. For example, this is the case on Debian/Ubuntu.
Return full path to Python dynamic library or None when not found.
We need to know name of the Python dynamic library for the bootloader. Bootloader has to know what library to
load and not try to guess.
Some linux distributions (e.g. debian-based) statically link the Python executable to the libpython,
so bindepend does not include it in its output. In this situation let's try to find it.
Custom Mac OS builds could possibly also have non-framework style libraries, so this method also checks for that
variant as well.
"""
def _find_lib_in_libdirs(*libdirs):
for libdir in libdirs:
for name in compat.PYDYLIB_NAMES:
full_path = os.path.join(libdir, name)
if not os.path.exists(full_path):
continue
# Resolve potential symbolic links to achieve consistent results with linker-based search; e.g., on
# POSIX systems, linker resolves unversioned library names (python3.X.so) to versioned ones
# (libpython3.X.so.1.0) due to former being symbolic linkes to the latter. See #6831.
full_path = os.path.realpath(full_path)
if not os.path.exists(full_path):
continue
return full_path
return None
# If this is Microsoft App Store Python, check the compat.base_path first. While compat.python_executable resolves
# to actual python.exe file, the latter contains a relative library reference that we fail to properly resolve.
if compat.is_ms_app_store:
python_libname = _find_lib_in_libdirs(compat.base_prefix)
if python_libname:
return python_libname
# Try to get Python library name from the Python executable. It assumes that Python library is not statically
# linked.
imported_libraries = get_imports(compat.python_executable) # (name, fullpath) tuples
for _, lib_path in imported_libraries:
if lib_path is None:
continue # Skip unresolved imports
for name in compat.PYDYLIB_NAMES:
if os.path.normcase(os.path.basename(lib_path)) == name:
# Python library found. Return absolute path to it.
return lib_path
# Python library NOT found. Resume searching using alternative methods.
# Work around for python venv having VERSION.dll rather than pythonXY.dll
if compat.is_win and any([os.path.normcase(lib_name) == 'version.dll' for lib_name, _ in imported_libraries]):
pydll = 'python%d%d.dll' % sys.version_info[:2]
return resolve_library_path(pydll, [os.path.dirname(compat.python_executable)])
# Applies only to non Windows platforms and conda.
if compat.is_conda:
# Conda needs to be the first here since it overrules the operating system specific paths.
python_libname = _find_lib_in_libdirs(os.path.join(compat.base_prefix, 'lib'))
if python_libname:
return python_libname
elif compat.is_unix:
for name in compat.PYDYLIB_NAMES:
python_libname = findLibrary(name)
if python_libname:
return python_libname
if compat.is_darwin or compat.is_linux:
# On MacPython, Analysis.assemble is able to find the libpython with no additional help, asking for
# sys.executable dependencies. However, this fails on system python, because the shared library is not listed as
# a dependency of the binary (most probably it is opened at runtime using some dlopen trickery). This happens on
# Mac OS when Python is compiled as Framework.
# Linux using pyenv is similarly linked so that sys.executable dependencies does not yield libpython.so.
# Python compiled as Framework contains same values in sys.prefix and exec_prefix. That is why we can use just
# sys.prefix. In virtualenv, PyInstaller is not able to find Python library. We need special care for this case.
python_libname = _find_lib_in_libdirs(
compat.base_prefix,
os.path.join(compat.base_prefix, 'lib'),
)
if python_libname:
return python_libname
# Python library NOT found. Return None and let the caller deal with this.
return None
#- Binary vs data (re)classification
def classify_binary_vs_data(filename):
"""
Classify the given file as either BINARY or a DATA, using appropriate platform-specific method. Returns 'BINARY'
or 'DATA' string depending on the determined file type, or None if classification cannot be performed (non-existing
file, missing tool, and other errors during classification).
"""
# We cannot classify non-existent files.
if not os.path.isfile(filename):
return None
# Use platform-specific implementation.
return _classify_binary_vs_data(filename)
if compat.is_linux:
def _classify_binary_vs_data(filename):
# First check for ELF signature, in order to avoid calling `objdump` on every data file, which can be costly.
try:
with open(filename, 'rb') as fp:
sig = fp.read(4)
except Exception:
return None
if sig != b"\x7FELF":
return "DATA"
# Verify the binary by checking if `objdump` recognizes the file. The preceding ELF signature check should
# ensure that this is an ELF file, while this check should ensure that it is a valid ELF file. In the future,
# we could try checking that the architecture matches the running platform.
cmd_args = ['objdump', '-a', filename]
try:
p = subprocess.run(
cmd_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.DEVNULL,
encoding='utf8',
)
except Exception:
return None # Failed to run `objdump` or `objdump` unavailable.
return 'BINARY' if p.returncode == 0 else 'DATA'
elif compat.is_win:
def _classify_binary_vs_data(filename):
# See if the file can be opened using `pefile`.
import pefile
try:
pe = pefile.PE(filename, fast_load=True) # noqa: F841
return 'BINARY'
except Exception:
# TODO: catch only `pefile.PEFormatError`?
pass
return 'DATA'
elif compat.is_darwin:
def _classify_binary_vs_data(filename):
# See if the file can be opened using `macholib`.
import macholib.MachO
try:
macho = macholib.MachO.MachO(filename) # noqa: F841
return 'BINARY'
except Exception:
# TODO: catch only `ValueError`?
pass
return 'DATA'
else:
def _classify_binary_vs_data(filename):
# Classification not implemented for the platform.
return None

View File

@ -0,0 +1,327 @@
# -*- coding: utf-8 -*-
"""
Tools for searching bytecode for key statements that indicate the need for additional resources, such as data files
and package metadata.
By *bytecode* I mean the ``code`` object given by ``compile()``, accessible from the ``__code__`` attribute of any
non-builtin function or, in PyInstallerLand, the ``PyiModuleGraph.node("some.module").code`` attribute. The best
guide for bytecode format I have found is the disassembler reference: https://docs.python.org/3/library/dis.html
This parser implementation aims to combine the flexibility and speed of regex with the clarity of the output of
``dis.dis(code)``. It has not achieved the 2nd, but C'est la vie...
The biggest clarity killer here is the ``EXTENDED_ARG`` opcode which can appear almost anywhere and therefore needs
to be tiptoed around at every step. If this code needs to expand significantly, I would recommend an upgrade to a
regex-based grammar parsing library such as Reparse. This way, little steps like unpacking ``EXTENDED_ARGS`` can be
defined once then simply referenced forming a nice hierarchy rather than copied everywhere its needed.
"""
import dis
import re
from types import CodeType
from typing import Pattern
from PyInstaller import compat
# opcode name -> opcode map
# Python 3.11 introduced specialized opcodes that are not covered by opcode.opmap (and equivalent dis.opmap), but dis
# has a private map of all opcodes called _all_opmap. So use the latter, if available.
opmap = getattr(dis, '_all_opmap', dis.opmap)
def _instruction_to_regex(x: str):
"""
Get a regex-escaped opcode byte from its human readable name.
"""
return re.escape(bytes([opmap[x]]))
def bytecode_regex(pattern: bytes, flags=re.VERBOSE | re.DOTALL):
"""
A regex-powered Python bytecode matcher.
``bytecode_regex`` provides a very thin wrapper around :func:`re.compile`.
* Any opcode names wrapped in backticks are substituted for their corresponding opcode bytes.
* Patterns are compiled in VERBOSE mode by default so that whitespace and comments may be used.
This aims to mirror the output of :func:`dis.dis`, which is far more readable than looking at raw byte strings.
"""
assert isinstance(pattern, bytes)
# Replace anything wrapped in backticks with regex-escaped opcodes.
pattern = re.sub(
rb"`(\w+)`",
lambda m: _instruction_to_regex(m[1].decode()),
pattern,
)
return re.compile(pattern, flags=flags)
def finditer(pattern: Pattern, string: bytes):
"""
Call ``pattern.finditer(string)``, but remove any matches beginning on an odd byte (i.e., matches where
match.start() is not a multiple of 2).
This should be used to avoid false positive matches where a bytecode pair's argument is mistaken for an opcode.
"""
assert isinstance(string, bytes)
string = _cleanup_bytecode_string(string)
matches = pattern.finditer(string)
while True:
for match in matches:
if match.start() % 2 == 0:
# All is good. This match starts on an OPCODE.
yield match
else:
# This match has started on an odd byte, meaning that it is a false positive and should be skipped.
# There is a very slim chance that a genuine match overlaps this one and, because re.finditer() does not
# allow overlapping matches, it would be lost. To avoid that, restart the regex scan, starting at the
# next even byte.
matches = pattern.finditer(string, match.start() + 1)
break
else:
break
# Opcodes involved in function calls with constant arguments. The differences between python versions are handled by
# variables below, which are then used to construct the _call_function_bytecode regex.
# NOTE1: the _OPCODES_* entries are typically used in (non-capturing) groups that match the opcode plus an arbitrary
# argument. But because the entries themselves may contain more than on opcode (with OR operator between them), they
# themselves need to be enclosed in another (non-capturing) group. E.g., "(?:(?:_OPCODES_FUNCTION_GLOBAL).)".
# NOTE2: _OPCODES_EXTENDED_ARG2 is an exception, as it is used as a list of opcodes to exclude, i.e.,
# "[^_OPCODES_EXTENDED_ARG2]". Therefore, multiple opcodes are not separated by the OR operator.
if not compat.is_py311:
# Python 3.7 introduced two new function-related opcodes, LOAD_METHOD and CALL_METHOD
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`"
_OPCODES_EXTENDED_ARG2 = _OPCODES_EXTENDED_ARG
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`|`LOAD_METHOD`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`CALL_FUNCTION`|`CALL_METHOD`|`CALL_FUNCTION_EX`"
def _cleanup_bytecode_string(bytecode):
return bytecode # Nothing to do here
elif not compat.is_py312:
# Python 3.11 removed CALL_FUNCTION and CALL_METHOD, and replaced them with PRECALL + CALL instruction sequence.
# As both PRECALL and CALL have the same parameter (the argument count), we need to match only up to the PRECALL.
# The CALL_FUNCTION_EX is still present.
# From Python 3.11b1 on, there is an EXTENDED_ARG_QUICK specialization opcode present.
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`|`EXTENDED_ARG_QUICK`"
_OPCODES_EXTENDED_ARG2 = rb"`EXTENDED_ARG``EXTENDED_ARG_QUICK`" # Special case; see note above the if/else block!
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`|`LOAD_METHOD`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`PRECALL`|`CALL_FUNCTION_EX`"
# Starting with python 3.11, the bytecode is peppered with CACHE instructions (which dis module conveniently hides
# unless show_caches=True is used). Dealing with these CACHE instructions in regex rules is going to render them
# unreadable, so instead we pre-process the bytecode and filter the offending opcodes out.
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\2", bytecode)
else:
# Python 3.12 merged EXTENDED_ARG_QUICK back in to EXTENDED_ARG, and LOAD_METHOD in to LOAD_ATTR
# PRECALL is no longer a valid key
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`"
_OPCODES_EXTENDED_ARG2 = _OPCODES_EXTENDED_ARG
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`CALL`|`CALL_FUNCTION_EX`"
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\2", bytecode)
# language=PythonVerboseRegExp
_call_function_bytecode = bytecode_regex(
rb"""
# Matches `global_function('some', 'constant', 'arguments')`.
# Load the global function. In code with >256 of names, this may require extended name references.
(
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:(?:""" + _OPCODES_FUNCTION_GLOBAL + rb""").)
)
# For foo.bar.whizz(), the above is the 'foo', below is the 'bar.whizz' (one opcode per name component, each
# possibly preceded by name reference extension).
(
(?:
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_LOAD + rb""").
)*
)
# Load however many arguments it takes. These (for now) must all be constants.
# Again, code with >256 constants may need extended enumeration.
(
(?:
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_ARGS + rb""").
)*
)
# Call the function. If opcode is CALL_FUNCTION_EX, the parameter are flags. For other opcodes, the parameter
# is the argument count (which may be > 256).
(
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_CALL + rb""").
)
"""
)
# language=PythonVerboseRegExp
_extended_arg_bytecode = bytecode_regex(
rb"""(
# Arbitrary number of EXTENDED_ARG pairs.
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
# Followed by some other instruction (usually a LOAD).
[^""" + _OPCODES_EXTENDED_ARG2 + rb"""].
)"""
)
def extended_arguments(extended_args: bytes):
"""
Unpack the (extended) integer used to reference names or constants.
The input should be a bytecode snippet of the following form::
EXTENDED_ARG ? # Repeated 0-4 times.
LOAD_xxx ? # Any of LOAD_NAME/LOAD_CONST/LOAD_METHOD/...
Each ? byte combined together gives the number we want.
"""
return int.from_bytes(extended_args[1::2], "big")
def load(raw: bytes, code: CodeType) -> str:
"""
Parse an (extended) LOAD_xxx instruction.
"""
# Get the enumeration.
index = extended_arguments(raw)
# Work out what that enumeration was for (constant/local var/global var).
# If the last instruction byte is a LOAD_FAST:
if raw[-2] == opmap["LOAD_FAST"]:
# Then this is a local variable.
return code.co_varnames[index]
# Or if it is a LOAD_CONST:
if raw[-2] == opmap["LOAD_CONST"]:
# Then this is a literal.
return code.co_consts[index]
# Otherwise, it is a global name.
if compat.is_py311 and raw[-2] == opmap["LOAD_GLOBAL"]:
# In python 3.11, namei>>1 is pushed on stack...
return code.co_names[index >> 1]
if compat.is_py312 and raw[-2] == opmap["LOAD_ATTR"]:
# In python 3.12, namei>>1 is pushed on stack...
return code.co_names[index >> 1]
return code.co_names[index]
def loads(raw: bytes, code: CodeType) -> list:
"""
Parse multiple consecutive LOAD_xxx instructions. Or load() in a for loop.
May be used to unpack a function's parameters or nested attributes ``(foo.bar.pop.whack)``.
"""
return [load(i, code) for i in _extended_arg_bytecode.findall(raw)]
def function_calls(code: CodeType) -> list:
"""
Scan a code object for all function calls on constant arguments.
"""
match: re.Match
out = []
for match in finditer(_call_function_bytecode, code.co_code):
function_root, methods, args, function_call = match.groups()
# For foo():
# `function_root` contains 'foo' and `methods` is empty.
# For foo.bar.whizz():
# `function_root` contains 'foo' and `methods` contains the rest.
function_root = load(function_root, code)
methods = loads(methods, code)
function = ".".join([function_root] + methods)
args = loads(args, code)
if function_call[0] == opmap['CALL_FUNCTION_EX']:
flags = extended_arguments(function_call)
if flags != 0:
# Keyword arguments present. Unhandled at the moment.
continue
# In calls with const arguments, args contains a single
# tuple with all values.
if len(args) != 1 or not isinstance(args[0], tuple):
continue
args = list(args[0])
else:
arg_count = extended_arguments(function_call)
if arg_count != len(args):
# This happens if there are variable or keyword arguments. Bail out in either case.
continue
out.append((function, args))
return out
def search_recursively(search: callable, code: CodeType, _memo=None) -> dict:
"""
Apply a search function to a code object, recursing into child code objects (function definitions).
"""
if _memo is None:
_memo = {}
if code not in _memo:
_memo[code] = search(code)
for const in code.co_consts:
if isinstance(const, CodeType):
search_recursively(search, const, _memo)
return _memo
def recursive_function_calls(code: CodeType) -> dict:
"""
Scan a code object for function calls on constant arguments, recursing into function definitions and bodies of
comprehension loops.
"""
return search_recursively(function_calls, code)
def any_alias(full_name: str):
"""List possible aliases of a fully qualified Python name.
>>> list(any_alias("foo.bar.wizz"))
['foo.bar.wizz', 'bar.wizz', 'wizz']
This crudely allows us to capture uses of wizz() under any of
::
import foo
foo.bar.wizz()
::
from foo import bar
bar.wizz()
::
from foo.bar import wizz
wizz()
However, it will fail for any form of aliases and quite likely find false matches.
"""
parts = full_name.split('.')
while parts:
yield ".".join(parts)
parts = parts[1:]

View File

@ -0,0 +1,379 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Manipulating with dynamic libraries.
"""
import os.path
from PyInstaller.utils.win32 import winutils
__all__ = ['exclude_list', 'include_list', 'include_library']
import os
import re
import PyInstaller.log as logging
from PyInstaller import compat
logger = logging.getLogger(__name__)
# Ignoring some system libraries speeds up packaging process
_excludes = {
# Ignore annoying warnings with Windows system DLLs.
#
# 'W: library kernel32.dll required via ctypes not found'
# 'W: library coredll.dll required via ctypes not found'
#
# These these dlls has to be ignored for all operating systems because they might be resolved when scanning code for
# ctypes dependencies.
r'advapi32\.dll',
r'ws2_32\.dll',
r'gdi32\.dll',
r'oleaut32\.dll',
r'shell32\.dll',
r'ole32\.dll',
r'coredll\.dll',
r'crypt32\.dll',
r'kernel32',
r'kernel32\.dll',
r'msvcrt\.dll',
r'rpcrt4\.dll',
r'user32\.dll',
# Some modules tries to import the Python library. e.g. pyreadline.console.console
r'python\%s\%s',
}
# Regex includes - overrides excludes. Include list is used only to override specific libraries from exclude list.
_includes = set()
_win_includes = {
# We need to allow collection of Visual Studio C++ (VC) runtime DLLs from system directories in order to avoid
# missing DLL errors when the frozen application is run on a system that does not have the corresponding VC
# runtime installed. The VC runtime DLLs may be dependencies of python shared library itself or of extension
# modules provided by 3rd party packages.
# Visual Studio 2010 (VC10) runtime
# http://msdn.microsoft.com/en-us/library/8kche8ah(v=vs.100).aspx
r'atl100\.dll',
r'msvcr100\.dll',
r'msvcp100\.dll',
r'mfc100\.dll',
r'mfc100u\.dll',
r'mfcmifc80\.dll',
r'mfcm100\.dll',
r'mfcm100u\.dll',
# Visual Studio 2012 (VC11) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2013/2012-redistribution-vs
#
# VC110.ATL
r'atl110\.dll',
# VC110.CRT
r'msvcp110\.dll',
r'msvcr110\.dll',
r'vccorlib110\.dll',
# VC110.CXXAMP
r'vcamp110\.dll',
# VC110.MFC
r'mfc110\.dll',
r'mfc110u\.dll',
r'mfcm110\.dll',
r'mfcm110u\.dll',
# VC110.MFCLOC
r'mfc110chs\.dll',
r'mfc110cht\.dll',
r'mfc110enu\.dll',
r'mfc110esn\.dll',
r'mfc110deu\.dll',
r'mfc110fra\.dll',
r'mfc110ita\.dll',
r'mfc110jpn\.dll',
r'mfc110kor\.dll',
r'mfc110rus\.dll',
# VC110.OpenMP
r'vcomp110\.dll',
# DIA SDK
r'msdia110\.dll',
# Visual Studio 2013 (VC12) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2013/2013-redistribution-vs
#
# VC120.CRT
r'msvcp120\.dll',
r'msvcr120\.dll',
r'vccorlib120\.dll',
# VC120.CXXAMP
r'vcamp120\.dll',
# VC120.MFC
r'mfc120\.dll',
r'mfc120u\.dll',
r'mfcm120\.dll',
r'mfcm120u\.dll',
# VC120.MFCLOC
r'mfc120chs\.dll',
r'mfc120cht\.dll',
r'mfc120deu\.dll',
r'mfc120enu\.dll',
r'mfc120esn\.dll',
r'mfc120fra\.dll',
r'mfc120ita\.dll',
r'mfc120jpn\.dll',
r'mfc120kor\.dll',
r'mfc120rus\.dll',
# VC120.OPENMP
r'vcomp120\.dll',
# DIA SDK
r'msdia120\.dll',
# Cpp REST Windows SDK
r'casablanca120.winrt\.dll',
# Mobile Services Cpp Client
r'zumosdk120.winrt\.dll',
# Cpp REST SDK
r'casablanca120\.dll',
# Universal C Runtime Library (since Visual Studio 2015)
#
# NOTE: these should be put under a switch, as they need not to be bundled if deployment target is Windows 10
# and later, as "UCRT is now a system component in Windows 10 and later, managed by Windows Update".
# (https://docs.microsoft.com/en-us/cpp/windows/determining-which-dlls-to-redistribute?view=msvc-170)
# And as discovered in #6326, Windows prefers system-installed version over the bundled one, anyway
# (see https://docs.microsoft.com/en-us/cpp/windows/universal-crt-deployment?view=msvc-170#local-deployment).
r'api-ms-win-core.*',
r'api-ms-win-crt.*',
r'ucrtbase\.dll',
# Visual Studio 2015/2017/2019/2022 (VC14) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2022/redistribution
#
# VC141.CRT/VC142.CRT/VC143.CRT
r'concrt140\.dll',
r'msvcp140\.dll',
r'msvcp140_1\.dll',
r'msvcp140_2\.dll',
r'msvcp140_atomic_wait\.dll',
r'msvcp140_codecvt_ids\.dll',
r'vccorlib140\.dll',
r'vcruntime140\.dll',
r'vcruntime140_1\.dll',
# VC141.CXXAMP/VC142.CXXAMP/VC143.CXXAMP
r'vcamp140\.dll',
# VC141.OpenMP/VC142.OpenMP/VC143.OpenMP
r'vcomp140\.dll',
# DIA SDK
r'msdia140\.dll',
# Allow pythonNN.dll, pythoncomNN.dll, pywintypesNN.dll
r'py(?:thon(?:com(?:loader)?)?|wintypes)\d+\.dll',
}
_win_excludes = {
# On Windows, only .dll files can be loaded.
r'.*\.so',
r'.*\.dylib',
# MS assembly excludes
r'Microsoft\.Windows\.Common-Controls',
}
_unix_excludes = {
r'libc\.so(\..*)?',
r'libdl\.so(\..*)?',
r'libm\.so(\..*)?',
r'libpthread\.so(\..*)?',
r'librt\.so(\..*)?',
r'libthread_db\.so(\..*)?',
# glibc regex excludes.
r'ld-linux\.so(\..*)?',
r'libBrokenLocale\.so(\..*)?',
r'libanl\.so(\..*)?',
r'libcidn\.so(\..*)?',
r'libcrypt\.so(\..*)?',
r'libnsl\.so(\..*)?',
r'libnss_compat.*\.so(\..*)?',
r'libnss_dns.*\.so(\..*)?',
r'libnss_files.*\.so(\..*)?',
r'libnss_hesiod.*\.so(\..*)?',
r'libnss_nis.*\.so(\..*)?',
r'libnss_nisplus.*\.so(\..*)?',
r'libresolv\.so(\..*)?',
r'libutil\.so(\..*)?',
# graphical interface libraries come with graphical stack (see libglvnd)
r'libE?(Open)?GLX?(ESv1_CM|ESv2)?(dispatch)?\.so(\..*)?',
r'libdrm\.so(\..*)?',
# a subset of libraries included as part of the Nvidia Linux Graphics Driver as of 520.56.06:
# https://download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/installedcomponents.html
r'nvidia_drv\.so',
r'libglxserver_nvidia\.so(\..*)?',
r'libnvidia-egl-(gbm|wayland)\.so(\..*)?',
r'libnvidia-(cfg|compiler|e?glcore|glsi|glvkspirv|rtcore|allocator|tls|ml)\.so(\..*)?',
r'lib(EGL|GLX)_nvidia\.so(\..*)?',
# libxcb-dri changes ABI frequently (e.g.: between Ubuntu LTS releases) and is usually installed as dependency of
# the graphics stack anyway. No need to bundle it.
r'libxcb\.so(\..*)?',
r'libxcb-dri.*\.so(\..*)?',
}
_aix_excludes = {
r'libbz2\.a',
r'libc\.a',
r'libC\.a',
r'libcrypt\.a',
r'libdl\.a',
r'libintl\.a',
r'libpthreads\.a',
r'librt\\.a',
r'librtl\.a',
r'libz\.a',
}
if compat.is_win:
_includes |= _win_includes
_excludes |= _win_excludes
elif compat.is_aix:
# The exclude list for AIX differs from other *nix platforms.
_excludes |= _aix_excludes
elif compat.is_unix:
# Common excludes for *nix platforms -- except AIX.
_excludes |= _unix_excludes
class ExcludeList:
def __init__(self):
self.regex = re.compile('|'.join(_excludes), re.I)
def search(self, libname):
# Running re.search() on '' regex never returns None.
if _excludes:
return self.regex.match(os.path.basename(libname))
else:
return False
class IncludeList:
def __init__(self):
self.regex = re.compile('|'.join(_includes), re.I)
def search(self, libname):
# Running re.search() on '' regex never returns None.
if _includes:
return self.regex.match(os.path.basename(libname))
else:
return False
exclude_list = ExcludeList()
include_list = IncludeList()
if compat.is_darwin:
# On Mac use macholib to decide if a binary is a system one.
from macholib import util
class MacExcludeList:
def __init__(self, global_exclude_list):
# Wraps the global 'exclude_list' before it is overridden by this class.
self._exclude_list = global_exclude_list
def search(self, libname):
# First try global exclude list. If it matches, return its result; otherwise continue with other check.
result = self._exclude_list.search(libname)
if result:
return result
else:
return util.in_system_path(libname)
exclude_list = MacExcludeList(exclude_list)
elif compat.is_win:
class WinExcludeList:
def __init__(self, global_exclude_list):
self._exclude_list = global_exclude_list
# use normpath because msys2 uses / instead of \
self._windows_dir = os.path.normpath(winutils.get_windows_dir().lower())
def search(self, libname):
libname = libname.lower()
result = self._exclude_list.search(libname)
if result:
return result
else:
# Exclude everything from the Windows directory by default.
# .. sometimes realpath changes the case of libname, lower it
# .. use normpath because msys2 uses / instead of \
fn = os.path.normpath(os.path.realpath(libname).lower())
return fn.startswith(self._windows_dir)
exclude_list = WinExcludeList(exclude_list)
_seen_wine_dlls = set() # Used for warning tracking in include_library()
def include_library(libname):
"""
Check if the dynamic library should be included with application or not.
"""
if exclude_list:
if exclude_list.search(libname) and not include_list.search(libname):
# Library is excluded and is not overridden by include list. It should be excluded.
return False
# If we are running under Wine and the library is a Wine built-in DLL, ensure that it is always excluded. Typically,
# excluding a DLL leads to an incomplete bundle and run-time errors when the said DLL is not installed on the target
# system. However, having Wine built-in DLLs collected is even more detrimental, as they usually provide Wine's
# implementation of low-level functionality, and therefore cannot be used on actual Windows (i.e., system libraries
# from the C:\Windows\system32 directory that might end up collected due to ``_win_includes`` list; a prominent
# example are VC runtime DLLs, for which Wine provides their own implementation, unless user explicitly installs
# Microsoft's VC redistributable package in their Wine environment). Therefore, excluding the Wine built-in DLLs
# actually improves the chances of the bundle running on Windows, or at least makes the issue easier to debug by
# turning it into the "standard" missing DLL problem. Exclusion should not affect the bundle's ability to run under
# Wine itself, as the excluded DLLs are available there.
if compat.is_win_wine and compat.is_wine_dll(libname):
if libname not in _seen_wine_dlls:
logger.warning("Excluding Wine built-in DLL: %s", libname) # displayed only if DLL would have been included
_seen_wine_dlls.add(libname) # display only once for each DLL
return False
return True
# Patterns for suppressing warnings about missing dynamically linked libraries
_warning_suppressions = []
# On some systems (e.g., openwrt), libc.so might point to ldd. Suppress warnings about it.
if compat.is_linux:
_warning_suppressions.append(r'ldd')
# Suppress false warnings on win 10 and UCRT (see issue #1566).
if compat.is_win_10:
_warning_suppressions.append(r'api-ms-win-.*\.dll')
class MissingLibWarningSuppressionList:
def __init__(self):
self.regex = re.compile('|'.join(_warning_suppressions), re.I)
def search(self, libname):
# Running re.search() on '' regex never returns None.
if _warning_suppressions:
return self.regex.match(os.path.basename(libname))
else:
return False
missing_lib_warning_suppression_list = MissingLibWarningSuppressionList()
def warn_missing_lib(libname):
"""
Check if a missing-library warning should be displayed for the given library name (or full path).
"""
return not missing_lib_warning_suppression_list.search(libname)

View File

@ -0,0 +1,542 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Code related to processing of import hooks.
"""
import glob
import os.path
import sys
import weakref
from PyInstaller import log as logging
from PyInstaller.building.utils import format_binaries_and_datas
from PyInstaller.compat import expand_path, importlib_load_source
from PyInstaller.depend.imphookapi import PostGraphAPI
from PyInstaller.exceptions import ImportErrorWhenRunningHook
logger = logging.getLogger(__name__)
# Safety check: Hook module names need to be unique. Duplicate names might occur if the cached PyuModuleGraph has an
# issue.
HOOKS_MODULE_NAMES = set()
class ModuleHookCache(dict):
"""
Cache of lazily loadable hook script objects.
This cache is implemented as a `dict` subclass mapping from the fully-qualified names of all modules with at
least one hook script to lists of `ModuleHook` instances encapsulating these scripts. As a `dict` subclass,
all cached module names and hook scripts are accessible via standard dictionary operations.
Attributes
----------
module_graph : ModuleGraph
Current module graph.
_hook_module_name_prefix : str
String prefixing the names of all in-memory modules lazily loaded from cached hook scripts. See also the
`hook_module_name_prefix` parameter passed to the `ModuleHook.__init__()` method.
"""
_cache_id_next = 0
"""
0-based identifier unique to the next `ModuleHookCache` to be instantiated.
This identifier is incremented on each instantiation of a new `ModuleHookCache` to isolate in-memory modules of
lazily loaded hook scripts in that cache to the same cache-specific namespace, preventing edge-case collisions
with existing in-memory modules in other caches.
"""
def __init__(self, module_graph, hook_dirs):
"""
Cache all hook scripts in the passed directories.
**Order of caching is significant** with respect to hooks for the same module, as the values of this
dictionary are lists. Hooks for the same module will be run in the order in which they are cached. Previously
cached hooks are always preserved rather than overridden.
By default, official hooks are cached _before_ user-defined hooks. For modules with both official and
user-defined hooks, this implies that the former take priority over and hence will be loaded _before_ the
latter.
Parameters
----------
module_graph : ModuleGraph
Current module graph.
hook_dirs : list
List of the absolute or relative paths of all directories containing **hook scripts** (i.e.,
Python scripts with filenames matching `hook-{module_name}.py`, where `{module_name}` is the module
hooked by that script) to be cached.
"""
super().__init__()
# To avoid circular references and hence increased memory consumption, a weak rather than strong reference is
# stored to the passed graph. Since this graph is guaranteed to live longer than this cache,
# this is guaranteed to be safe.
self.module_graph = weakref.proxy(module_graph)
# String unique to this cache prefixing the names of all in-memory modules lazily loaded from cached hook
# scripts, privatized for safety.
self._hook_module_name_prefix = '__PyInstaller_hooks_{}_'.format(ModuleHookCache._cache_id_next)
ModuleHookCache._cache_id_next += 1
# Cache all hook scripts in the passed directories.
self._cache_hook_dirs(hook_dirs)
def _cache_hook_dirs(self, hook_dirs):
"""
Cache all hook scripts in the passed directories.
Parameters
----------
hook_dirs : list
List of the absolute or relative paths of all directories containing hook scripts to be cached.
"""
for hook_dir in hook_dirs:
# Canonicalize this directory's path and validate its existence.
hook_dir = os.path.abspath(expand_path(hook_dir))
if not os.path.isdir(hook_dir):
raise FileNotFoundError('Hook directory "{}" not found.'.format(hook_dir))
# For each hook script in this directory...
hook_filenames = glob.glob(os.path.join(hook_dir, 'hook-*.py'))
for hook_filename in hook_filenames:
# Fully-qualified name of this hook's corresponding module, constructed by removing the "hook-" prefix
# and ".py" suffix.
module_name = os.path.basename(hook_filename)[5:-3]
# Lazily loadable hook object.
module_hook = ModuleHook(
module_graph=self.module_graph,
module_name=module_name,
hook_filename=hook_filename,
hook_module_name_prefix=self._hook_module_name_prefix,
)
# Add this hook to this module's list of hooks.
module_hooks = self.setdefault(module_name, [])
module_hooks.append(module_hook)
def remove_modules(self, *module_names):
"""
Remove the passed modules and all hook scripts cached for these modules from this cache.
Parameters
----------
module_names : list
List of all fully-qualified module names to be removed.
"""
for module_name in module_names:
# Unload this module's hook script modules from memory. Since these are top-level pure-Python modules cached
# only in the "sys.modules" dictionary, popping these modules from this dictionary suffices to garbage
# collect these modules.
module_hooks = self.get(module_name, [])
for module_hook in module_hooks:
sys.modules.pop(module_hook.hook_module_name, None)
# Remove this module and its hook script objects from this cache.
self.pop(module_name, None)
def _module_collection_mode_sanitizer(value):
if isinstance(value, dict):
# Hook set a dictionary; use it as-is
return value
elif isinstance(value, str):
# Hook set a mode string; convert to a dictionary and assign the string to `None` (= the hooked module).
return {None: value}
raise ValueError(f"Invalid module collection mode setting value: {value!r}")
# Dictionary mapping the names of magic attributes required by the "ModuleHook" class to 2-tuples "(default_type,
# sanitizer_func)", where:
#
# * "default_type" is the type to which that attribute will be initialized when that hook is lazily loaded.
# * "sanitizer_func" is the callable sanitizing the original value of that attribute defined by that hook into a
# safer value consumable by "ModuleHook" callers if any or "None" if the original value requires no sanitization.
#
# To avoid subtleties in the ModuleHook.__getattr__() method, this dictionary is declared as a module rather than a
# class attribute. If declared as a class attribute and then undefined (...for whatever reason), attempting to access
# this attribute from that method would produce infinite recursion.
_MAGIC_MODULE_HOOK_ATTRS = {
# Collections in which order is insignificant. This includes:
#
# * "datas", sanitized from hook-style 2-tuple lists defined by hooks into TOC-style 2-tuple sets consumable by
# "ModuleHook" callers.
# * "binaries", sanitized in the same way.
'datas': (set, format_binaries_and_datas),
'binaries': (set, format_binaries_and_datas),
'excludedimports': (set, None),
# Collections in which order is significant. This includes:
#
# * "hiddenimports", as order of importation is significant. On module importation, hook scripts are loaded and hook
# functions declared by these scripts are called. As these scripts and functions can have side effects dependent
# on module importation order, module importation itself can have side effects dependent on this order!
'hiddenimports': (list, None),
# Flags
'warn_on_missing_hiddenimports': (lambda: True, bool),
# Package/module collection mode dictionary.
'module_collection_mode': (dict, _module_collection_mode_sanitizer),
}
class ModuleHook:
"""
Cached object encapsulating a lazy loadable hook script.
This object exposes public attributes (e.g., `datas`) of the underlying hook script as attributes of the same
name of this object. On the first access of any such attribute, this hook script is lazily loaded into an
in-memory private module reused on subsequent accesses. These dynamic attributes are referred to as "magic." All
other static attributes of this object (e.g., `hook_module_name`) are referred to as "non-magic."
Attributes (Magic)
----------
datas : set
Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external non-executable files required by
the module being hooked, converted from the `datas` list of hook-style 2-tuples `(source_dir_or_glob,
target_dir)` defined by this hook script.
binaries : set
Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external executable files required by the
module being hooked, converted from the `binaries` list of hook-style 2-tuples `(source_dir_or_glob,
target_dir)` defined by this hook script.
excludedimports : set
Set of the fully-qualified names of all modules imported by the module being hooked to be ignored rather than
imported from that module, converted from the `excludedimports` list defined by this hook script. These
modules will only be "locally" rather than "globally" ignored. These modules will remain importable from all
modules other than the module being hooked.
hiddenimports : set
Set of the fully-qualified names of all modules imported by the module being hooked that are _not_
automatically detectable by PyInstaller (usually due to being dynamically imported in that module),
converted from the `hiddenimports` list defined by this hook script.
warn_on_missing_hiddenimports : bool
Boolean flag indicating whether missing hidden imports from the hook should generate warnings or not. This
behavior is enabled by default, but individual hooks can opt out of it.
module_collection_mode : dict
A dictionary of package/module names and their corresponding collection mode strings ('pyz', 'pyc', 'py',
'pyz+py', 'py+pyz').
Attributes (Non-magic)
----------
module_graph : ModuleGraph
Current module graph.
module_name : str
Name of the module hooked by this hook script.
hook_filename : str
Absolute or relative path of this hook script.
hook_module_name : str
Name of the in-memory module of this hook script's interpreted contents.
_hook_module : module
In-memory module of this hook script's interpreted contents, lazily loaded on the first call to the
`_load_hook_module()` method _or_ `None` if this method has yet to be accessed.
"""
#-- Magic --
def __init__(self, module_graph, module_name, hook_filename, hook_module_name_prefix):
"""
Initialize this metadata.
Parameters
----------
module_graph : ModuleGraph
Current module graph.
module_name : str
Name of the module hooked by this hook script.
hook_filename : str
Absolute or relative path of this hook script.
hook_module_name_prefix : str
String prefixing the name of the in-memory module for this hook script. To avoid namespace clashes with
similar modules created by other `ModuleHook` objects in other `ModuleHookCache` containers, this string
_must_ be unique to the `ModuleHookCache` container containing this `ModuleHook` object. If this string
is non-unique, an existing in-memory module will be erroneously reused when lazily loading this hook
script, thus erroneously resanitizing previously sanitized hook script attributes (e.g., `datas`) with
the `format_binaries_and_datas()` helper.
"""
# Note that the passed module graph is already a weak reference, avoiding circular reference issues. See
# ModuleHookCache.__init__(). TODO: Add a failure message
assert isinstance(module_graph, weakref.ProxyTypes)
self.module_graph = module_graph
self.module_name = module_name
self.hook_filename = hook_filename
# Name of the in-memory module fabricated to refer to this hook script.
self.hook_module_name = hook_module_name_prefix + self.module_name.replace('.', '_')
# Safety check, see above
global HOOKS_MODULE_NAMES
if self.hook_module_name in HOOKS_MODULE_NAMES:
# When self._shallow is true, this class never loads the hook and sets the attributes to empty values
self._shallow = True
else:
self._shallow = False
HOOKS_MODULE_NAMES.add(self.hook_module_name)
# Attributes subsequently defined by the _load_hook_module() method.
self._loaded = False
self._has_hook_function = False
self._hook_module = None
def __getattr__(self, attr_name):
"""
Get the magic attribute with the passed name (e.g., `datas`) from this lazily loaded hook script if any _or_
raise `AttributeError` otherwise.
This special method is called only for attributes _not_ already defined by this object. This includes
undefined attributes and the first attempt to access magic attributes.
This special method is _not_ called for subsequent attempts to access magic attributes. The first attempt to
access magic attributes defines corresponding instance variables accessible via the `self.__dict__` instance
dictionary (e.g., as `self.datas`) without calling this method. This approach also allows magic attributes to
be deleted from this object _without_ defining the `__delattr__()` special method.
See Also
----------
Class docstring for supported magic attributes.
"""
# If this is a magic attribute, initialize this attribute by lazy loading this hook script and then return
# this attribute.
if attr_name in _MAGIC_MODULE_HOOK_ATTRS and not self._loaded:
self._load_hook_module()
return getattr(self, attr_name)
# Else, this is an undefined attribute. Raise an exception.
else:
raise AttributeError(attr_name)
def __setattr__(self, attr_name, attr_value):
"""
Set the attribute with the passed name to the passed value.
If this is a magic attribute, this hook script will be lazily loaded before setting this attribute. Unlike
`__getattr__()`, this special method is called to set _any_ attribute -- including magic, non-magic,
and undefined attributes.
See Also
----------
Class docstring for supported magic attributes.
"""
# If this is a magic attribute, initialize this attribute by lazy loading this hook script before overwriting
# this attribute.
if attr_name in _MAGIC_MODULE_HOOK_ATTRS:
self._load_hook_module()
# Set this attribute to the passed value. To avoid recursion, the superclass method rather than setattr() is
# called.
return super().__setattr__(attr_name, attr_value)
#-- Loading --
def _load_hook_module(self, keep_module_ref=False):
"""
Lazily load this hook script into an in-memory private module.
This method (and, indeed, this class) preserves all attributes and functions defined by this hook script as
is, ensuring sane behaviour in hook functions _not_ expecting unplanned external modification. Instead,
this method copies public attributes defined by this hook script (e.g., `binaries`) into private attributes
of this object, which the special `__getattr__()` and `__setattr__()` methods safely expose to external
callers. For public attributes _not_ defined by this hook script, the corresponding private attributes will
be assigned sane defaults. For some public attributes defined by this hook script, the corresponding private
attributes will be transformed into objects more readily and safely consumed elsewhere by external callers.
See Also
----------
Class docstring for supported attributes.
"""
# If this hook script module has already been loaded, or we are _shallow, noop.
if (self._loaded and (self._hook_module is not None or not keep_module_ref)) or self._shallow:
if self._shallow:
self._loaded = True
self._hook_module = True # Not None
# Inform the user
logger.debug(
'Skipping module hook %r from %r because a hook for %s has already been loaded.',
*os.path.split(self.hook_filename)[::-1], self.module_name
)
# Set the default attributes to empty instances of the type.
for attr_name, (attr_type, _) in _MAGIC_MODULE_HOOK_ATTRS.items():
super().__setattr__(attr_name, attr_type())
return
# Load and execute the hook script. Even if mechanisms from the import machinery are used, this does not import
# the hook as the module.
head, tail = os.path.split(self.hook_filename)
logger.info('Loading module hook %r from %r...', tail, head)
try:
self._hook_module = importlib_load_source(self.hook_module_name, self.hook_filename)
except ImportError:
logger.debug("Hook failed with:", exc_info=True)
raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
# Mark as loaded
self._loaded = True
# Check if module has hook() function.
self._has_hook_function = hasattr(self._hook_module, 'hook')
# Copy hook script attributes into magic attributes exposed as instance variables of the current "ModuleHook"
# instance.
for attr_name, (default_type, sanitizer_func) in _MAGIC_MODULE_HOOK_ATTRS.items():
# Unsanitized value of this attribute.
attr_value = getattr(self._hook_module, attr_name, None)
# If this attribute is undefined, expose a sane default instead.
if attr_value is None:
attr_value = default_type()
# Else if this attribute requires sanitization, do so.
elif sanitizer_func is not None:
attr_value = sanitizer_func(attr_value)
# Else, expose the unsanitized value of this attribute.
# Expose this attribute as an instance variable of the same name.
setattr(self, attr_name, attr_value)
# If module_collection_mode has an entry with None key, reassign it to the hooked module's name.
setattr(
self, 'module_collection_mode', {
key if key is not None else self.module_name: value
for key, value in getattr(self, 'module_collection_mode').items()
}
)
# Release the module if we do not need the reference. This is the case when hook is loaded during the analysis
# rather as part of the post-graph operations.
if not keep_module_ref:
self._hook_module = None
#-- Hooks --
def post_graph(self, analysis):
"""
Call the **post-graph hook** (i.e., `hook()` function) defined by this hook script, if any.
Parameters
----------
analysis: build_main.Analysis
Analysis that calls the hook
This method is intended to be called _after_ the module graph for this application is constructed.
"""
# Lazily load this hook script into an in-memory module.
# The script might have been loaded before during modulegraph analysis; in that case, it needs to be reloaded
# only if it provides a hook() function.
if not self._loaded or self._has_hook_function:
# Keep module reference when loading the hook, so we can call its hook function!
self._load_hook_module(keep_module_ref=True)
# Call this hook script's hook() function, which modifies attributes accessed by subsequent methods and
# hence must be called first.
self._process_hook_func(analysis)
# Order is insignificant here.
self._process_hidden_imports()
def _process_hook_func(self, analysis):
"""
Call this hook's `hook()` function if defined.
Parameters
----------
analysis: build_main.Analysis
Analysis that calls the hook
"""
# If this hook script defines no hook() function, noop.
if not hasattr(self._hook_module, 'hook'):
return
# Call this hook() function.
hook_api = PostGraphAPI(module_name=self.module_name, module_graph=self.module_graph, analysis=analysis)
try:
self._hook_module.hook(hook_api)
except ImportError:
logger.debug("Hook failed with:", exc_info=True)
raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
# Update all magic attributes modified by the prior call.
self.datas.update(set(hook_api._added_datas))
self.binaries.update(set(hook_api._added_binaries))
self.hiddenimports.extend(hook_api._added_imports)
self.module_collection_mode.update(hook_api._module_collection_mode)
# FIXME: `hook_api._deleted_imports` should be appended to `self.excludedimports` and used to suppress module
# import during the modulegraph construction rather than handled here. However, for that to work, the `hook()`
# function needs to be ran during modulegraph construction instead of in post-processing (and this in turn
# requires additional code refactoring in order to be able to pass `analysis` to `PostGraphAPI` object at
# that point). So once the modulegraph rewrite is complete, remove the code block below.
for deleted_module_name in hook_api._deleted_imports:
# Remove the graph link between the hooked module and item. This removes the 'item' node from the graph if
# no other links go to it (no other modules import it)
self.module_graph.removeReference(hook_api.node, deleted_module_name)
def _process_hidden_imports(self):
"""
Add all imports listed in this hook script's `hiddenimports` attribute to the module graph as if directly
imported by this hooked module.
These imports are typically _not_ implicitly detectable by PyInstaller and hence must be explicitly defined
by hook scripts.
"""
# For each hidden import required by the module being hooked...
for import_module_name in self.hiddenimports:
try:
# Graph node for this module. Do not implicitly create namespace packages for non-existent packages.
caller = self.module_graph.find_node(self.module_name, create_nspkg=False)
# Manually import this hidden import from this module.
self.module_graph.import_hook(import_module_name, caller)
# If this hidden import is unimportable, print a non-fatal warning. Hidden imports often become
# desynchronized from upstream packages and hence are only "soft" recommendations.
except ImportError:
if self.warn_on_missing_hiddenimports:
logger.warning('Hidden import "%s" not found!', import_module_name)
class AdditionalFilesCache:
"""
Cache for storing what binaries and datas were pushed by what modules when import hooks were processed.
"""
def __init__(self):
self._binaries = {}
self._datas = {}
def add(self, modname, binaries, datas):
self._binaries.setdefault(modname, [])
self._binaries[modname].extend(binaries or [])
self._datas.setdefault(modname, [])
self._datas[modname].extend(datas or [])
def __contains__(self, name):
return name in self._binaries or name in self._datas
def binaries(self, modname):
"""
Return list of binaries for given module name.
"""
return self._binaries.get(modname, [])
def datas(self, modname):
"""
Return list of datas for given module name.
"""
return self._datas.get(modname, [])

View File

@ -0,0 +1,475 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Classes facilitating communication between PyInstaller and import hooks.
PyInstaller passes instances of classes defined by this module to corresponding functions defined by external import
hooks, which commonly modify the contents of these instances before returning. PyInstaller then detects and converts
these modifications into appropriate operations on the current `PyiModuleGraph` instance, thus modifying which
modules will be frozen into the executable.
"""
from PyInstaller.building.utils import format_binaries_and_datas
from PyInstaller.lib.modulegraph.modulegraph import (RuntimeModule, RuntimePackage)
class PreSafeImportModuleAPI:
"""
Metadata communicating changes made by the current **pre-safe import module hook** (i.e., hook run immediately
_before_ a call to `ModuleGraph._safe_import_module()` recursively adding the hooked module, package,
or C extension and all transitive imports thereof to the module graph) back to PyInstaller.
Pre-safe import module hooks _must_ define a `pre_safe_import_module()` function accepting an instance of this
class, whose attributes describe the subsequent `ModuleGraph._safe_import_module()` call creating the hooked
module's graph node.
Each pre-safe import module hook is run _only_ on the first attempt to create the hooked module's graph node and
then subsequently ignored. If this hook successfully creates that graph node, the subsequent
`ModuleGraph._safe_import_module()` call will observe this fact and silently return without attempting to
recreate that graph node.
Pre-safe import module hooks are typically used to create graph nodes for **runtime modules** (i.e.,
modules dynamically defined at runtime). Most modules are physically defined in external `.py`-suffixed scripts.
Some modules, however, are dynamically defined at runtime (e.g., `six.moves`, dynamically defined by the
physically defined `six.py` module). However, `ModuleGraph` only parses `import` statements residing in external
scripts. `ModuleGraph` is _not_ a full-fledged, Turing-complete Python interpreter and hence has no means of
parsing `import` statements performed by runtime modules existing only in-memory.
'With great power comes great responsibility.'
Attributes (Immutable)
----------------------------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
module_graph : PyiModuleGraph
Current module graph.
parent_package : Package
Graph node for the package providing this module _or_ `None` if this module is a top-level module.
Attributes (Mutable)
-----------------------------
The following attributes are editable.
module_basename : str
Unqualified name of the module to be imported (e.g., `text`).
module_name : str
Fully-qualified name of this module (e.g., `email.mime.text`).
"""
def __init__(self, module_graph, module_basename, module_name, parent_package):
self._module_graph = module_graph
self.module_basename = module_basename
self.module_name = module_name
self._parent_package = parent_package
# Immutable properties. No corresponding setters are defined.
@property
def module_graph(self):
"""
Current module graph.
"""
return self._module_graph
@property
def parent_package(self):
"""
Parent Package of this node.
"""
return self._parent_package
def add_runtime_module(self, module_name):
"""
Add a graph node representing a non-package Python module with the passed name dynamically defined at runtime.
Most modules are statically defined on-disk as standard Python files. Some modules, however, are dynamically
defined in-memory at runtime (e.g., `gi.repository.Gst`, dynamically defined by the statically defined
`gi.repository.__init__` module).
This method adds a graph node representing such a runtime module. Since this module is _not_ a package,
all attempts to import submodules from this module in `from`-style import statements (e.g., the `queue`
submodule in `from six.moves import queue`) will be silently ignored. To circumvent this, simply call
`add_runtime_package()` instead.
Parameters
----------
module_name : str
Fully-qualified name of this module (e.g., `gi.repository.Gst`).
Examples
----------
This method is typically called by `pre_safe_import_module()` hooks, e.g.:
def pre_safe_import_module(api):
api.add_runtime_module(api.module_name)
"""
self._module_graph.add_module(RuntimeModule(module_name))
def add_runtime_package(self, package_name):
"""
Add a graph node representing a non-namespace Python package with the passed name dynamically defined at
runtime.
Most packages are statically defined on-disk as standard subdirectories containing `__init__.py` files. Some
packages, however, are dynamically defined in-memory at runtime (e.g., `six.moves`, dynamically defined by
the statically defined `six` module).
This method adds a graph node representing such a runtime package. All attributes imported from this package
in `from`-style import statements that are submodules of this package (e.g., the `queue` submodule in `from
six.moves import queue`) will be imported rather than ignored.
Parameters
----------
package_name : str
Fully-qualified name of this package (e.g., `six.moves`).
Examples
----------
This method is typically called by `pre_safe_import_module()` hooks, e.g.:
def pre_safe_import_module(api):
api.add_runtime_package(api.module_name)
"""
self._module_graph.add_module(RuntimePackage(package_name))
def add_alias_module(self, real_module_name, alias_module_name):
"""
Alias the source module to the target module with the passed names.
This method ensures that the next call to findNode() given the target module name will resolve this alias.
This includes importing and adding a graph node for the source module if needed as well as adding a reference
from the target to the source module.
Parameters
----------
real_module_name : str
Fully-qualified name of the **existing module** (i.e., the module being aliased).
alias_module_name : str
Fully-qualified name of the **non-existent module** (i.e., the alias to be created).
"""
self._module_graph.alias_module(real_module_name, alias_module_name)
def append_package_path(self, directory):
"""
Modulegraph does a good job at simulating Python's, but it cannot handle packagepath `__path__` modifications
packages make at runtime.
Therefore there is a mechanism whereby you can register extra paths in this map for a package, and it will be
honored.
Parameters
----------
directory : str
Absolute or relative path of the directory to be appended to this package's `__path__` attribute.
"""
self._module_graph.append_package_path(self.module_name, directory)
class PreFindModulePathAPI:
"""
Metadata communicating changes made by the current **pre-find module path hook** (i.e., hook run immediately
_before_ a call to `ModuleGraph._find_module_path()` finding the hooked module's absolute path) back to PyInstaller.
Pre-find module path hooks _must_ define a `pre_find_module_path()` function accepting an instance of this class,
whose attributes describe the subsequent `ModuleGraph._find_module_path()` call to be performed.
Pre-find module path hooks are typically used to change the absolute path from which a module will be
subsequently imported and thus frozen into the executable. To do so, hooks may overwrite the default
`search_dirs` list of the absolute paths of all directories to be searched for that module: e.g.,
def pre_find_module_path(api):
api.search_dirs = ['/the/one/true/package/providing/this/module']
Each pre-find module path hook is run _only_ on the first call to `ModuleGraph._find_module_path()` for the
corresponding module.
Attributes
----------
The following attributes are **mutable** (i.e., modifiable). All changes to these attributes will be immediately
respected by PyInstaller:
search_dirs : list
List of the absolute paths of all directories to be searched for this module (in order). Searching will halt
at the first directory containing this module.
Attributes (Immutable)
----------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
module_name : str
Fully-qualified name of this module.
module_graph : PyiModuleGraph
Current module graph. For efficiency, this attribute is technically mutable. To preserve graph integrity,
this attribute should nonetheless _never_ be modified. While read-only `PyiModuleGraph` methods (e.g.,
`findNode()`) are safely callable from within pre-find module path hooks, methods modifying the graph are
_not_. If graph modifications are required, consider an alternative type of hook (e.g., pre-import module
hooks).
"""
def __init__(
self,
module_graph,
module_name,
search_dirs,
):
# Mutable attributes.
self.search_dirs = search_dirs
# Immutable attributes.
self._module_graph = module_graph
self._module_name = module_name
# Immutable properties. No corresponding setters are defined.
@property
def module_graph(self):
"""
Current module graph.
"""
return self._module_graph
@property
def module_name(self):
"""
Fully-qualified name of this module.
"""
return self._module_name
class PostGraphAPI:
"""
Metadata communicating changes made by the current **post-graph hook** (i.e., hook run for a specific module
transitively imported by the current application _after_ the module graph of all `import` statements performed by
this application has been constructed) back to PyInstaller.
Post-graph hooks may optionally define a `post_graph()` function accepting an instance of this class,
whose attributes describe the current state of the module graph and the hooked module's graph node.
Attributes (Mutable)
----------
The following attributes are **mutable** (i.e., modifiable). All changes to these attributes will be immediately
respected by PyInstaller:
module_graph : PyiModuleGraph
Current module graph.
module : Node
Graph node for the currently hooked module.
'With great power comes great responsibility.'
Attributes (Immutable)
----------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
__name__ : str
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
__file__ : str
Absolute path of this module. If this module is:
* A standard (rather than namespace) package, this is the absolute path of this package's directory.
* A namespace (rather than standard) package, this is the abstract placeholder `-`. (Don't ask. Don't tell.)
* A non-package module or C extension, this is the absolute path of the corresponding file.
__path__ : list
List of the absolute paths of all directories comprising this package if this module is a package _or_ `None`
otherwise. If this module is a standard (rather than namespace) package, this list contains only the absolute
path of this package's directory.
co : code
Code object compiled from the contents of `__file__` (e.g., via the `compile()` builtin).
analysis: build_main.Analysis
The Analysis that load the hook.
Attributes (Private)
----------
The following attributes are technically mutable but private, and hence should _never_ be externally accessed or
modified by hooks. Call the corresponding public methods instead:
_added_datas : list
List of the `(name, path)` 2-tuples or TOC objects of all external data files required by the current hook,
defaulting to the empty list. This is equivalent to the global `datas` hook attribute.
_added_imports : list
List of the fully-qualified names of all modules imported by the current hook, defaulting to the empty list.
This is equivalent to the global `hiddenimports` hook attribute.
_added_binaries : list
List of the `(name, path)` 2-tuples or TOC objects of all external C extensions imported by the current hook,
defaulting to the empty list. This is equivalent to the global `binaries` hook attribute.
_module_collection_mode : dict
Dictionary of package/module names and their corresponding collection mode strings. This is equivalent to the
global `module_collection_mode` hook attribute.
"""
def __init__(self, module_name, module_graph, analysis):
# Mutable attributes.
self.module_graph = module_graph
self.module = module_graph.find_node(module_name)
assert self.module is not None # should not occur
# Immutable attributes.
self.___name__ = module_name
self.___file__ = self.module.filename
self._co = self.module.code
self._analysis = analysis
# To enforce immutability, convert this module's package path if any into an immutable tuple.
self.___path__ = tuple(self.module.packagepath) \
if self.module.packagepath is not None else None
#FIXME: Refactor "_added_datas", "_added_binaries", and "_deleted_imports" into sets. Since order of
#import is important, "_added_imports" must remain a list.
# Private attributes.
self._added_binaries = []
self._added_datas = []
self._added_imports = []
self._deleted_imports = []
self._module_collection_mode = {}
# Immutable properties. No corresponding setters are defined.
@property
def __file__(self):
"""
Absolute path of this module's file.
"""
return self.___file__
@property
def __path__(self):
"""
List of the absolute paths of all directories comprising this package if this module is a package _or_ `None`
otherwise. If this module is a standard (rather than namespace) package, this list contains only the absolute
path of this package's directory.
"""
return self.___path__
@property
def __name__(self):
"""
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
"""
return self.___name__
@property
def co(self):
"""
Code object compiled from the contents of `__file__` (e.g., via the `compile()` builtin).
"""
return self._co
@property
def analysis(self):
"""
build_main.Analysis that calls the hook.
"""
return self._analysis
# Obsolete immutable properties provided to preserve backward compatibility.
@property
def name(self):
"""
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
**This property has been deprecated by the `__name__` property.**
"""
return self.___name__
@property
def graph(self):
"""
Current module graph.
**This property has been deprecated by the `module_graph` property.**
"""
return self.module_graph
@property
def node(self):
"""
Graph node for the currently hooked module.
**This property has been deprecated by the `module` property.**
"""
return self.module
# TODO: This incorrectly returns the list of the graph nodes of all modules *TRANSITIVELY* (rather than directly)
# imported by this module. Unfortunately, this implies that most uses of this property are currently broken
# (e.g., "hook-PIL.SpiderImagePlugin.py"). We only require this for the aforementioned hook, so contemplate
# alternative approaches.
@property
def imports(self):
"""
List of the graph nodes of all modules directly imported by this module.
"""
return self.module_graph.iter_graph(start=self.module)
def add_imports(self, *module_names):
"""
Add all Python modules whose fully-qualified names are in the passed list as "hidden imports" upon which the
current module depends.
This is equivalent to appending such names to the hook-specific `hiddenimports` attribute.
"""
# Append such names to the current list of all such names.
self._added_imports.extend(module_names)
def del_imports(self, *module_names):
"""
Remove the named fully-qualified modules from the set of imports (either hidden or visible) upon which the
current module depends.
This is equivalent to appending such names to the hook-specific `excludedimports` attribute.
"""
self._deleted_imports.extend(module_names)
def add_binaries(self, binaries):
"""
Add all external dynamic libraries in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the
current module. This is equivalent to adding to the global `binaries` hook attribute.
For convenience, the `binaries` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`.
"""
# Detect TOC 3-tuple list by checking the length of the first entry
if binaries and len(binaries[0]) == 3:
self._added_binaries.extend(entry[:2] for entry in binaries)
else:
# NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output
# format `(dest_name, src_name)`.
self._added_binaries.extend(format_binaries_and_datas(binaries))
def add_datas(self, datas):
"""
Add all external data files in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the
current module. This is equivalent to adding to the global `datas` hook attribute.
For convenience, the `datas` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`.
"""
# Detect TOC 3-tuple list by checking the length of the first entry
if datas and len(datas[0]) == 3:
self._added_datas.extend(entry[:2] for entry in datas)
else:
# NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output
# format `(dest_name, src_name)`.
self._added_datas.extend(format_binaries_and_datas(datas))
def set_module_collection_mode(self, name, mode):
""""
Set the package/module collection mode for the specified module name. If `name` is `None`, the hooked
module/package name is used. `mode` can be one of valid mode strings (`'pyz'`, `'pyc'`, ˙'py'˙, `'pyz+py'`,
˙'py+pyz'`) or `None`, which clears the setting for the module/package - but only within this hook's context!
"""
if name is None:
name = self.__name__
if mode is None:
self._module_collection_mode.pop(name)
else:
self._module_collection_mode[name] = mode

View File

@ -0,0 +1,396 @@
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Utility functions related to analyzing/bundling dependencies.
"""
import ctypes.util
import io
import os
import re
import struct
import zipfile
from types import CodeType
import marshal
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import bytecode
from PyInstaller.depend.dylib import include_library
from PyInstaller.exceptions import ExecCommandFailed
from PyInstaller.lib.modulegraph import modulegraph
logger = logging.getLogger(__name__)
# TODO find out if modules from base_library.zip could be somehow bundled into the .exe file.
def create_py3_base_library(libzip_filename, graph):
"""
Package basic Python modules into .zip file. The .zip file with basic modules is necessary to have on PYTHONPATH
for initializing libpython3 in order to run the frozen executable with Python 3.
"""
# Import strip_paths_in_code locally to avoid cyclic import between building.utils and depend.utils (this module);
# building.utils imports depend.bindepend, which in turn imports depend.utils.
from PyInstaller.building.utils import strip_paths_in_code
# Construct regular expression for matching modules that should be bundled into base_library.zip. Excluded are plain
# 'modules' or 'submodules.ANY_NAME'. The match has to be exact - start and end of string not substring.
regex_modules = '|'.join([rf'(^{x}$)' for x in compat.PY3_BASE_MODULES])
regex_submod = '|'.join([rf'(^{x}\..*$)' for x in compat.PY3_BASE_MODULES])
regex_str = regex_modules + '|' + regex_submod
module_filter = re.compile(regex_str)
try:
# Remove .zip from previous run.
if os.path.exists(libzip_filename):
os.remove(libzip_filename)
logger.debug('Adding python files to base_library.zip')
# Class zipfile.PyZipFile is not suitable for PyInstaller needs.
with zipfile.ZipFile(libzip_filename, mode='w') as zf:
zf.debug = 3
# Sort the graph nodes by identifier to ensure repeatable builds
graph_nodes = list(graph.iter_graph())
graph_nodes.sort(key=lambda item: item.identifier)
for mod in graph_nodes:
if type(mod) in (modulegraph.SourceModule, modulegraph.Package, modulegraph.CompiledModule):
# Bundling just required modules.
if module_filter.match(mod.identifier):
# Name inside the archive. The ZIP format specification requires forward slashes as directory
# separator.
if type(mod) is modulegraph.Package:
new_name = mod.identifier.replace('.', '/') + '/__init__.pyc'
else:
new_name = mod.identifier.replace('.', '/') + '.pyc'
# Write code to a file. This code is similar to py_compile.compile().
with io.BytesIO() as fc:
fc.write(compat.BYTECODE_MAGIC)
fc.write(struct.pack('<I', 0b01)) # PEP-552: hash-based pyc, check_source=False
fc.write(b'\00' * 8) # Match behavior of `building.utils.compile_pymodule`
code = strip_paths_in_code(mod.code) # Strip paths
marshal.dump(code, fc)
# Use a ZipInfo to set timestamp for deterministic build.
info = zipfile.ZipInfo(new_name)
zf.writestr(info, fc.getvalue())
except Exception:
logger.error('base_library.zip could not be created!')
raise
def scan_code_for_ctypes(co):
binaries = __recursively_scan_code_objects_for_ctypes(co)
# If any of the libraries has been requested with anything else than the basename, drop that entry and warn the
# user - PyInstaller would need to patch the compiled pyc file to make it work correctly!
binaries = set(binaries)
for binary in list(binaries):
# 'binary' might be in some cases None. Some Python modules (e.g., PyObjC.objc._bridgesupport) might contain
# code like this:
# dll = ctypes.CDLL(None)
if not binary:
# None values have to be removed too.
binaries.remove(binary)
elif binary != os.path.basename(binary):
# TODO make these warnings show up somewhere.
try:
filename = co.co_filename
except Exception:
filename = 'UNKNOWN'
logger.warning(
"Ignoring %s imported from %s - only basenames are supported with ctypes imports!", binary, filename
)
binaries.remove(binary)
binaries = _resolveCtypesImports(binaries)
return binaries
def __recursively_scan_code_objects_for_ctypes(code: CodeType):
"""
Detects ctypes dependencies, using reasonable heuristics that should cover most common ctypes usages; returns a
list containing names of binaries detected as dependencies.
"""
from PyInstaller.depend.bytecode import any_alias, search_recursively
binaries = []
ctypes_dll_names = {
*any_alias("ctypes.CDLL"),
*any_alias("ctypes.cdll.LoadLibrary"),
*any_alias("ctypes.WinDLL"),
*any_alias("ctypes.windll.LoadLibrary"),
*any_alias("ctypes.OleDLL"),
*any_alias("ctypes.oledll.LoadLibrary"),
*any_alias("ctypes.PyDLL"),
*any_alias("ctypes.pydll.LoadLibrary"),
}
find_library_names = {
*any_alias("ctypes.util.find_library"),
}
for calls in bytecode.recursive_function_calls(code).values():
for (name, args) in calls:
if not len(args) == 1 or not isinstance(args[0], str):
continue
if name in ctypes_dll_names:
# ctypes.*DLL() or ctypes.*dll.LoadLibrary()
binaries.append(*args)
elif name in find_library_names:
# ctypes.util.find_library() needs to be handled separately, because we need to resolve the library base
# name given as the argument (without prefix and suffix, e.g. 'gs') into corresponding full name (e.g.,
# 'libgs.so.9').
libname = args[0]
if libname:
try: # this try was inserted due to the ctypes bug https://github.com/python/cpython/issues/93094
libname = ctypes.util.find_library(libname)
except FileNotFoundError:
libname = None
logger.warning(
'ctypes.util.find_library raised a FileNotFoundError. '
'Supressing and assuming no lib with the name "%s" was found.', args[0]
)
if libname:
# On Windows, `find_library` may return a full pathname. See issue #1934.
libname = os.path.basename(libname)
binaries.append(libname)
# The above handles any flavour of function/class call. We still need to capture the (albeit rarely used) case of
# loading libraries with ctypes.cdll's getattr.
for i in search_recursively(_scan_code_for_ctypes_getattr, code).values():
binaries.extend(i)
return binaries
_ctypes_getattr_regex = bytecode.bytecode_regex(
rb"""
# Matches 'foo.bar' or 'foo.bar.whizz'.
# Load the 'foo'.
(
(?:(?:""" + bytecode._OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + bytecode._OPCODES_FUNCTION_GLOBAL + rb""").
)
# Load the 'bar.whizz' (one opcode per name component, each possibly preceded by name reference extension).
(
(?:
(?:(?:""" + bytecode._OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + bytecode._OPCODES_FUNCTION_LOAD + rb""").
)+
)
"""
)
def _scan_code_for_ctypes_getattr(code: CodeType):
"""
Detect uses of ``ctypes.cdll.library_name``, which implies that ``library_name.dll`` should be collected.
"""
key_names = ("cdll", "oledll", "pydll", "windll")
for match in bytecode.finditer(_ctypes_getattr_regex, code.co_code):
name, attrs = match.groups()
name = bytecode.load(name, code)
attrs = bytecode.loads(attrs, code)
if attrs and attrs[-1] == "LoadLibrary":
continue
# Capture `from ctypes import ole; ole.dll_name`.
if len(attrs) == 1:
if name in key_names:
yield attrs[0] + ".dll"
# Capture `import ctypes; ctypes.ole.dll_name`.
if len(attrs) == 2:
if name == "ctypes" and attrs[0] in key_names:
yield attrs[1] + ".dll"
# TODO: reuse this code with modulegraph implementation.
def _resolveCtypesImports(cbinaries):
"""
Completes ctypes BINARY entries for modules with their full path.
Input is a list of c-binary-names (as found by `scan_code_instruction_for_ctypes`). Output is a list of tuples
ready to be appended to the ``binaries`` of a modules.
This function temporarily extents PATH, LD_LIBRARY_PATH or DYLD_LIBRARY_PATH (depending on the platform) by
CONF['pathex'] so shared libs will be search there, too.
Example:
>>> _resolveCtypesImports(['libgs.so'])
[(libgs.so', ''/usr/lib/libgs.so', 'BINARY')]
"""
from ctypes.util import find_library
from PyInstaller.config import CONF
if compat.is_unix:
envvar = "LD_LIBRARY_PATH"
elif compat.is_darwin:
envvar = "DYLD_LIBRARY_PATH"
else:
envvar = "PATH"
def _setPaths():
path = os.pathsep.join(CONF['pathex'])
old = compat.getenv(envvar)
if old is not None:
path = os.pathsep.join((path, old))
compat.setenv(envvar, path)
return old
def _restorePaths(old):
if old is None:
compat.unsetenv(envvar)
else:
compat.setenv(envvar, old)
ret = []
# Try to locate the shared library on the disk. This is done by calling ctypes.util.find_library with
# ImportTracker's local paths temporarily prepended to the library search paths (and restored after the call).
old = _setPaths()
for cbin in cbinaries:
try:
# There is an issue with find_library() where it can run into errors trying to locate the library. See
# #5734.
cpath = find_library(os.path.splitext(cbin)[0])
except FileNotFoundError:
# In these cases, find_library() should return None.
cpath = None
if compat.is_unix:
# CAVEAT: find_library() is not the correct function. ctype's documentation says that it is meant to resolve
# only the filename (as a *compiler* does) not the full path. Anyway, it works well enough on Windows and
# Mac OS. On Linux, we need to implement more code to find out the full path.
if cpath is None:
cpath = cbin
# "man ld.so" says that we should first search LD_LIBRARY_PATH and then the ldcache.
for d in compat.getenv(envvar, '').split(os.pathsep):
if os.path.isfile(os.path.join(d, cpath)):
cpath = os.path.join(d, cpath)
break
else:
if LDCONFIG_CACHE is None:
load_ldconfig_cache()
if cpath in LDCONFIG_CACHE:
cpath = LDCONFIG_CACHE[cpath]
assert os.path.isfile(cpath)
else:
cpath = None
if cpath is None:
# Skip warning message if cbin (basename of library) is ignored. This prevents messages like:
# 'W: library kernel32.dll required via ctypes not found'
if not include_library(cbin):
continue
logger.warning("Library %s required via ctypes not found", cbin)
else:
if not include_library(cpath):
continue
ret.append((cbin, cpath, "BINARY"))
_restorePaths(old)
return ret
LDCONFIG_CACHE = None # cache the output of `/sbin/ldconfig -p`
def load_ldconfig_cache():
"""
Create a cache of the `ldconfig`-output to call it only once.
It contains thousands of libraries and running it on every dylib is expensive.
"""
global LDCONFIG_CACHE
if LDCONFIG_CACHE is not None:
return
if compat.is_musl:
# Musl deliberately doesn't use ldconfig. The ldconfig executable either doesn't exist or it's a functionless
# executable which, on calling with any arguments, simply tells you that those arguments are invalid.
LDCONFIG_CACHE = {}
return
from distutils.spawn import find_executable
ldconfig = find_executable('ldconfig')
if ldconfig is None:
# If `ldconfig` is not found in $PATH, search for it in some fixed directories. Simply use a second call instead
# of fiddling around with checks for empty env-vars and string-concat.
ldconfig = find_executable('ldconfig', '/usr/sbin:/sbin:/usr/bin:/usr/sbin')
# If we still could not find the 'ldconfig' command...
if ldconfig is None:
LDCONFIG_CACHE = {}
return
if compat.is_freebsd or compat.is_openbsd:
# This has a quite different format than other Unixes:
# [vagrant@freebsd-10 ~]$ ldconfig -r
# /var/run/ld-elf.so.hints:
# search directories: /lib:/usr/lib:/usr/lib/compat:...
# 0:-lgeom.5 => /lib/libgeom.so.5
# 184:-lpython2.7.1 => /usr/local/lib/libpython2.7.so.1
ldconfig_arg = '-r'
splitlines_count = 2
pattern = re.compile(r'^\s+\d+:-l(\S+)(\s.*)? => (\S+)')
else:
# Skip first line of the library list because it is just an informative line and might contain localized
# characters. Example of first line with locale set to cs_CZ.UTF-8:
#$ /sbin/ldconfig -p
#V keši „/etc/ld.so.cache“ nalezeno knihoven: 2799
# libzvbi.so.0 (libc6,x86-64) => /lib64/libzvbi.so.0
# libzvbi-chains.so.0 (libc6,x86-64) => /lib64/libzvbi-chains.so.0
ldconfig_arg = '-p'
splitlines_count = 1
pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)')
try:
text = compat.exec_command(ldconfig, ldconfig_arg)
except ExecCommandFailed:
logger.warning("Failed to execute ldconfig. Disabling LD cache.")
LDCONFIG_CACHE = {}
return
text = text.strip().splitlines()[splitlines_count:]
LDCONFIG_CACHE = {}
for line in text:
# :fixme: this assumes library names do not contain whitespace
m = pattern.match(line)
# Sanitize away any abnormal lines of output.
if m is None:
# Warn about it then skip the rest of this iteration.
if re.search("Cache generated by:", line):
# See #5540. This particular line is harmless.
pass
else:
logger.warning("Unrecognised line of output %r from ldconfig", line)
continue
path = m.groups()[-1]
if compat.is_freebsd or compat.is_openbsd:
# Insert `.so` at the end of the lib's basename. soname and filename may have (different) trailing versions.
# We assume the `.so` in the filename to mark the end of the lib's basename.
bname = os.path.basename(path).split('.so', 1)[0]
name = 'lib' + m.group(1)
assert name.startswith(bname)
name = bname + '.so' + name[len(bname):]
else:
name = m.group(1)
# ldconfig may know about several versions of the same lib, e.g., different arch, different libc, etc.
# Use the first entry.
if name not in LDCONFIG_CACHE:
LDCONFIG_CACHE[name] = path

View File

@ -0,0 +1,82 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
from PyInstaller import compat
class ExecCommandFailed(SystemExit):
pass
class HookError(Exception):
"""
Base class for hook related errors.
"""
pass
class ImportErrorWhenRunningHook(HookError):
def __str__(self):
return (
"Failed to import module {0} required by hook for module {1}. Please check whether module {0} actually "
"exists and whether the hook is compatible with your version of {1}: You might want to read more about "
"hooks in the manual and provide a pull-request to improve PyInstaller.".format(self.args[0], self.args[1])
)
class RemovedCipherFeatureError(SystemExit):
def __init__(self, message):
super().__init__(
f"Bytecode encryption was removed in PyInstaller v6.0. {message}"
" For the rationale and alternatives see https://github.com/pyinstaller/pyinstaller/pull/6999"
)
class RemovedExternalManifestError(SystemExit):
def __init__(self, message):
super().__init__(f"Support for external executable manifest was removed in PyInstaller v6.0. {message}")
class RemovedWinSideBySideSupportError(SystemExit):
def __init__(self, message):
super().__init__(
f"Support for collecting and processing WinSxS assemblies was removed in PyInstaller v6.0. {message}"
)
_MISSING_PYTHON_LIB_MSG = \
"""Python library not found: {0}
This means your Python installation does not come with proper shared library files.
This usually happens due to missing development package, or unsuitable build parameters of the Python installation.
* On Debian/Ubuntu, you need to install Python development packages:
* apt-get install python3-dev
* apt-get install python-dev
* If you are building Python by yourself, rebuild with `--enable-shared` (or, `--enable-framework` on macOS).
"""
class PythonLibraryNotFoundError(IOError):
def __init__(self):
super().__init__(_MISSING_PYTHON_LIB_MSG.format(", ".join(compat.PYDYLIB_NAMES),))
class InvalidSrcDestTupleError(SystemExit):
def __init__(self, src_dest, message):
super().__init__(f"Invalid (SRC, DEST_DIR) tuple: {src_dest!r}. {message}")
class ImportlibMetadataError(SystemExit):
def __init__(self):
super().__init__(
"PyInstaller requires importlib.metadata from python >= 3.10 stdlib or importlib_metadata from "
"importlib-metadata >= 4.6"
)

View File

@ -0,0 +1,15 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2023, PyInstaller Development Team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: Apache-2.0
# -----------------------------------------------------------------------------
import sys
# A boolean indicating whether the frozen application is a macOS .app bundle.
is_macos_app_bundle = sys.platform == 'darwin' and sys._MEIPASS.endswith("Contents/Frameworks")

View File

@ -0,0 +1,300 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2023, PyInstaller Development Team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: Apache-2.0
# -----------------------------------------------------------------------------
import ctypes
import ctypes.wintypes
# Constants from win32 headers
TOKEN_QUERY = 0x0008
TokenUser = 1 # from TOKEN_INFORMATION_CLASS enum
ERROR_INSUFFICIENT_BUFFER = 122
INVALID_HANDLE = -1
FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100
FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
SDDL_REVISION1 = 1
# Structures for ConvertSidToStringSidW
PSID = ctypes.wintypes.LPVOID
class SID_AND_ATTRIBUTES(ctypes.Structure):
_fields_ = [
("Sid", PSID),
("Attributes", ctypes.wintypes.DWORD),
]
class TOKEN_USER(ctypes.Structure):
_fields_ = [
("User", SID_AND_ATTRIBUTES),
]
PTOKEN_USER = ctypes.POINTER(TOKEN_USER)
# SECURITY_ATTRIBUTES structure for CreateDirectoryW
PSECURITY_DESCRIPTOR = ctypes.wintypes.LPVOID
class SECURITY_ATTRIBUTES(ctypes.Structure):
_fields_ = [
("nLength", ctypes.wintypes.DWORD),
("lpSecurityDescriptor", PSECURITY_DESCRIPTOR),
("bInheritHandle", ctypes.wintypes.BOOL),
]
# win32 API functions, bound via ctypes.
# NOTE: we do not use ctypes.windll.<dll_name> to avoid modifying its (global) function prototypes, which might affect
# user's code.
advapi32 = ctypes.WinDLL("advapi32")
kernel32 = ctypes.WinDLL("kernel32")
advapi32.ConvertSidToStringSidW.restype = ctypes.wintypes.BOOL
advapi32.ConvertSidToStringSidW.argtypes = (
PSID, # [in] PSID Sid
ctypes.POINTER(ctypes.wintypes.LPWSTR), # [out] LPWSTR *StringSid
)
advapi32.ConvertStringSecurityDescriptorToSecurityDescriptorW.restype = ctypes.wintypes.BOOL
advapi32.ConvertStringSecurityDescriptorToSecurityDescriptorW.argtypes = (
ctypes.wintypes.LPCWSTR, # [in] LPCWSTR StringSecurityDescriptor
ctypes.wintypes.DWORD, # [in] DWORD StringSDRevision
ctypes.POINTER(PSECURITY_DESCRIPTOR), # [out] PSECURITY_DESCRIPTOR *SecurityDescriptor
ctypes.wintypes.PULONG, # [out] PULONG SecurityDescriptorSize
)
advapi32.GetTokenInformation.restype = ctypes.wintypes.BOOL
advapi32.GetTokenInformation.argtypes = (
ctypes.wintypes.HANDLE, # [in] HANDLE TokenHandle
ctypes.c_int, # [in] TOKEN_INFORMATION_CLASS TokenInformationClass
ctypes.wintypes.LPVOID, # [out, optional] LPVOID TokenInformation
ctypes.wintypes.DWORD, # [in] DWORD TokenInformationLength
ctypes.wintypes.PDWORD, # [out] PDWORD ReturnLength
)
kernel32.CloseHandle.restype = ctypes.wintypes.BOOL
kernel32.CloseHandle.argtypes = (
ctypes.wintypes.HANDLE, # [in] HANDLE hObject
)
kernel32.CreateDirectoryW.restype = ctypes.wintypes.BOOL
kernel32.CreateDirectoryW.argtypes = (
ctypes.wintypes.LPCWSTR, # [in] LPCWSTR lpPathName
ctypes.POINTER(SECURITY_ATTRIBUTES), # [in, optional] LPSECURITY_ATTRIBUTES lpSecurityAttributes
)
kernel32.FormatMessageW.restype = ctypes.wintypes.DWORD
kernel32.FormatMessageW.argtypes = (
ctypes.wintypes.DWORD, # [in] DWORD dwFlags
ctypes.wintypes.LPCVOID, # [in, optional] LPCVOID lpSource
ctypes.wintypes.DWORD, # [in] DWORD dwMessageId
ctypes.wintypes.DWORD, # [in] DWORD dwLanguageId
ctypes.wintypes.LPWSTR, # [out] LPWSTR lpBuffer
ctypes.wintypes.DWORD, # [in] DWORD nSize
ctypes.wintypes.LPVOID, # [in, optional] va_list *Arguments
)
kernel32.GetCurrentProcess.restype = ctypes.wintypes.HANDLE
# kernel32.GetCurrentProcess has no arguments
kernel32.GetLastError.restype = ctypes.wintypes.DWORD
# kernel32.GetLastError has no arguments
kernel32.LocalFree.restype = ctypes.wintypes.BOOL
kernel32.LocalFree.argtypes = (
ctypes.wintypes.HLOCAL, # [in] _Frees_ptr_opt_ HLOCAL hMem
)
kernel32.OpenProcessToken.restype = ctypes.wintypes.BOOL
kernel32.OpenProcessToken.argtypes = (
ctypes.wintypes.HANDLE, # [in] HANDLE ProcessHandle
ctypes.wintypes.DWORD, # [in] DWORD DesiredAccess
ctypes.wintypes.PHANDLE, # [out] PHANDLE TokenHandle
)
def _win_error_to_message(error_code):
"""
Convert win32 error code to message.
"""
message_wstr = ctypes.wintypes.LPWSTR(None)
ret = kernel32.FormatMessageW(
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
None, # lpSource
error_code, # dwMessageId
0x400, # dwLanguageId = MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT)
ctypes.cast(
ctypes.byref(message_wstr),
ctypes.wintypes.LPWSTR,
), # pointer to LPWSTR due to FORMAT_MESSAGE_ALLOCATE_BUFFER; needs to be cast to LPWSTR
64, # due to FORMAT_MESSAGE_ALLOCATE_BUFFER, this is minimum number of characters to allocate
None,
)
if ret == 0:
return None
message = message_wstr.value
kernel32.LocalFree(message_wstr)
# Strip trailing CR/LF.
if message:
message = message.strip()
return message
def _get_user_sid():
"""
Obtain the SID for the current user.
"""
process_token = ctypes.wintypes.HANDLE(INVALID_HANDLE)
try:
# Get access token for the current process
ret = kernel32.OpenProcessToken(
kernel32.GetCurrentProcess(),
TOKEN_QUERY,
ctypes.pointer(process_token),
)
if ret == 0:
error_code = kernel32.GetLastError()
raise RuntimeError(f"Failed to open process token! Error code: 0x{error_code:X}")
# Query buffer size for user info structure
user_info_size = ctypes.wintypes.DWORD(0)
ret = advapi32.GetTokenInformation(
process_token,
TokenUser,
None,
0,
ctypes.byref(user_info_size),
)
# We expect this call to fail with ERROR_INSUFFICIENT_BUFFER
if ret == 0:
error_code = kernel32.GetLastError()
if error_code != ERROR_INSUFFICIENT_BUFFER:
raise RuntimeError(f"Failed to query token information buffer size! Error code: 0x{error_code:X}")
else:
raise RuntimeError("Unexpected return value from GetTokenInformation!")
# Allocate buffer
user_info = ctypes.create_string_buffer(user_info_size.value)
ret = advapi32.GetTokenInformation(
process_token,
TokenUser,
user_info,
user_info_size,
ctypes.byref(user_info_size),
)
if ret == 0:
error_code = kernel32.GetLastError()
raise RuntimeError(f"Failed to query token information! Error code: 0x{error_code:X}")
# Convert SID to string
# Technically, we need to pass user_info->User.Sid, but as they are at the beginning of the
# buffer, just pass the buffer instead...
sid_wstr = ctypes.wintypes.LPWSTR(None)
ret = advapi32.ConvertSidToStringSidW(
ctypes.cast(user_info, PTOKEN_USER).contents.User.Sid,
ctypes.pointer(sid_wstr),
)
if ret == 0:
error_code = kernel32.GetLastError()
raise RuntimeError(f"Failed to convert SID to string! Error code: 0x{error_code:X}")
sid = sid_wstr.value
kernel32.LocalFree(sid_wstr)
except Exception:
sid = None
finally:
# Close the process token
if process_token.value != INVALID_HANDLE:
kernel32.CloseHandle(process_token)
return sid
# Get and cache current user's SID
_user_sid = _get_user_sid()
def secure_mkdir(dir_name):
"""
Replacement for mkdir that limits the access to created directory to current user.
"""
# Create security descriptor
# Prefer actual user SID over SID S-1-3-4 (current owner), because at the time of writing, Wine does not properly
# support the latter.
sid = _user_sid or "S-1-3-4"
# DACL descriptor (D):
# ace_type;ace_flags;rights;object_guid;inherit_object_guid;account_sid;(resource_attribute)
# - ace_type = SDDL_ACCESS_ALLOWED (A)
# - rights = SDDL_FILE_ALL (FA)
# - account_sid = current user (queried SID)
security_desc_str = f"D:(A;;FA;;;{sid})"
security_desc = ctypes.wintypes.LPVOID(None)
ret = advapi32.ConvertStringSecurityDescriptorToSecurityDescriptorW(
security_desc_str,
SDDL_REVISION1,
ctypes.byref(security_desc),
None,
)
if ret == 0:
error_code = kernel32.GetLastError()
raise RuntimeError(
f"Failed to create security descriptor! Error code: 0x{error_code:X}, "
f"message: {_win_error_to_message(error_code)}"
)
security_attr = SECURITY_ATTRIBUTES()
security_attr.nLength = ctypes.sizeof(SECURITY_ATTRIBUTES)
security_attr.lpSecurityDescriptor = security_desc
security_attr.bInheritHandle = False
# Create directory
ret = kernel32.CreateDirectoryW(
dir_name,
security_attr,
)
if ret == 0:
# Call failed; store error code immediately, to avoid it being overwritten in cleanup below.
error_code = kernel32.GetLastError()
# Free security descriptor
kernel32.LocalFree(security_desc)
# Exit on succeess
if ret != 0:
return
# Construct OSError from win error code
error_message = _win_error_to_message(error_code)
# Strip trailing dot to match error message from os.mkdir().
if error_message and error_message[-1] == '.':
error_message = error_message[:-1]
raise OSError(
None, # errno
error_message, # strerror
dir_name, # filename
error_code, # winerror
None, # filename2
)

View File

@ -0,0 +1,56 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2023, PyInstaller Development Team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: Apache-2.0
# -----------------------------------------------------------------------------
import os
import sys
import errno
import tempfile
# Helper for creating temporary directories with access restricted to the user running the process.
# On POSIX systems, this is already achieved by `tempfile.mkdtemp`, which uses 0o700 permissions mask.
# On Windows, however, the POSIX permissions semantics have no effect, and we need to provide our own implementation
# that restricts the access by passing appropriate security attributes to the `CreateDirectory` function.
if os.name == 'nt':
from . import _win32
def secure_mkdtemp(suffix=None, prefix=None, dir=None):
"""
Windows-specific replacement for `tempfile.mkdtemp` that restricts access to the user running the process.
Based on `mkdtemp` implementation from python 3.11 stdlib.
"""
prefix, suffix, dir, output_type = tempfile._sanitize_params(prefix, suffix, dir)
names = tempfile._get_candidate_names()
if output_type is bytes:
names = map(os.fsencode, names)
for seq in range(tempfile.TMP_MAX):
name = next(names)
file = os.path.join(dir, prefix + name + suffix)
sys.audit("tempfile.mkdtemp", file)
try:
_win32.secure_mkdir(file)
except FileExistsError:
continue # try again
except PermissionError:
# This exception is thrown when a directory with the chosen name already exists on windows.
if (os.name == 'nt' and os.path.isdir(dir) and os.access(dir, os.W_OK)):
continue
else:
raise
return file
raise FileExistsError(errno.EEXIST, "No usable temporary directory name found")
else:
secure_mkdtemp = tempfile.mkdtemp

View File

@ -0,0 +1,223 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: Apache-2.0
# -----------------------------------------------------------------------------
# This module is not a "fake module" in the classical sense, but a real module that can be imported. It acts as an RPC
# interface for the functions of the bootloader.
"""
This module connects to the bootloader to send messages to the splash screen.
It is intended to act as a RPC interface for the functions provided by the bootloader, such as displaying text or
closing. This makes the users python program independent of how the communication with the bootloader is implemented,
since a consistent API is provided.
To connect to the bootloader, it connects to a local tcp socket whose port is passed through the environment variable
'_PYIBoot_SPLASH'. The bootloader creates a server socket and accepts every connection request. Since the os-module,
which is needed to request the environment variable, is not available at boot time, the module does not establish the
connection until initialization.
The protocol by which the Python interpreter communicates with the bootloader is implemented in this module.
This module does not support reloads while the splash screen is displayed, i.e. it cannot be reloaded (such as by
importlib.reload), because the splash screen closes automatically when the connection to this instance of the module
is lost.
"""
import atexit
import os
# Import the _socket module instead of the socket module. All used functions to connect to the ipc system are
# provided by the C module and the users program does not necessarily need to include the socket module and all
# required modules it uses.
import _socket
__all__ = ["CLOSE_CONNECTION", "FLUSH_CHARACTER", "is_alive", "close", "update_text"]
try:
# The user might have excluded logging from imports.
import logging as _logging
except ImportError:
_logging = None
try:
# The user might have excluded functools from imports.
from functools import update_wrapper
except ImportError:
update_wrapper = None
# Utility
def _log(level, msg, *args, **kwargs):
"""
Conditional wrapper around logging module. If the user excluded logging from the imports or it was not imported,
this function should handle it and avoid using the logger.
"""
if _logging:
logger = _logging.getLogger(__name__)
logger.log(level, msg, *args, **kwargs)
# These constants define single characters which are needed to send commands to the bootloader. Those constants are
# also set in the tcl script.
CLOSE_CONNECTION = b'\x04' # ASCII End-of-Transmission character
FLUSH_CHARACTER = b'\x0D' # ASCII Carriage Return character
# Module internal variables
_initialized = False
# Keep these variables always synchronized
_ipc_socket_closed = True
_ipc_socket = _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM)
def _initialize():
"""
Initialize this module
:return:
"""
global _initialized, _ipc_socket, _ipc_socket_closed
try:
_ipc_socket.connect(("localhost", _ipc_port))
_ipc_socket_closed = False
_initialized = True
_log(20, "A connection to the splash screen was successfully established.") # log-level: info
except OSError as err:
raise ConnectionError("Unable to connect to the tcp server socket on port %d" % _ipc_port) from err
# We expect a splash screen from the bootloader, but if _PYIBoot_SPLASH is not set, the module cannot connect to it.
try:
_ipc_port = int(os.environ['_PYIBoot_SPLASH'])
del os.environ['_PYIBoot_SPLASH']
# Initialize the connection upon importing this module. This will establish a connection to the bootloader's TCP
# server socket.
_initialize()
except (KeyError, ValueError) as _err:
# log-level: warning
_log(
30, "The environment does not allow connecting to the splash screen. Are the splash resources attached to the "
"bootloader or did an error occur?",
exc_info=_err
)
except ConnectionError as _err:
# log-level: error
_log(40, "Cannot connect to the bootloaders ipc server socket", exc_info=_err)
def _check_connection(func):
"""
Utility decorator for checking whether the function should be executed.
The wrapped function may raise a ConnectionError if the module was not initialized correctly.
"""
def wrapper(*args, **kwargs):
"""
Executes the wrapped function if the environment allows it.
That is, if the connection to to bootloader has not been closed and the module is initialized.
:raises RuntimeError: if the module was not initialized correctly.
"""
if _initialized and _ipc_socket_closed:
_log(
20, "The module has been disabled, so the use of the splash screen is no longer supported."
) # log-level: info
return
elif not _initialized:
raise RuntimeError("This module is not initialized; did it fail to load?")
return func(*args, **kwargs)
if update_wrapper:
# For runtime introspection
update_wrapper(wrapper, func)
return wrapper
@_check_connection
def _send_command(cmd, args=None):
"""
Send the command followed by args to the splash screen.
:param str cmd: The command to send. All command have to be defined as procedures in the tcl splash screen script.
:param list[str] args: All arguments to send to the receiving function
"""
if args is None:
args = []
full_cmd = "%s(%s)" % (cmd, " ".join(args))
try:
_ipc_socket.sendall(full_cmd.encode("utf-8") + FLUSH_CHARACTER)
except OSError as err:
raise ConnectionError("Unable to send '%s' to the bootloader" % full_cmd) from err
def is_alive():
"""
Indicates whether the module can be used.
Returns False if the module is either not initialized or was disabled by closing the splash screen. Otherwise,
the module should be usable.
"""
return _initialized and not _ipc_socket_closed
@_check_connection
def update_text(msg: str):
"""
Updates the text on the splash screen window.
:param str msg: the text to be displayed
:raises ConnectionError: If the OS fails to write to the socket.
:raises RuntimeError: If the module is not initialized.
"""
_send_command("update_text", [msg])
def close():
"""
Close the connection to the ipc tcp server socket.
This will close the splash screen and renders this module unusable. After this function is called, no connection
can be opened to the splash screen again and all functions in this module become unusable.
"""
global _ipc_socket_closed
if _initialized and not _ipc_socket_closed:
_ipc_socket.sendall(CLOSE_CONNECTION)
_ipc_socket.close()
_ipc_socket_closed = True
@atexit.register
def _exit():
close()
# Discarded idea:
# Problem:
# There was a race condition between the tcl (splash screen) and python interpreter. Initially the tcl was started as a
# separate thread next to the bootloader thread, which starts python. Tcl sets the environment variable
# '_PYIBoot_SPLASH' with a port to connect to. If the python interpreter is faster initialized than the tcl interpreter
# (sometimes the case in onedir mode) the environment variable does not yet exist. Since python caches the environment
# variables at startup, updating the environ from tcl does not update the python environ.
#
# Considered Solution:
# Dont rely on python itself to look up the environment variables. We could implement via ctypes functions to look up
# the latest environ. See https://stackoverflow.com/a/33642551/5869139 for a possible implementation.
#
# Discarded because:
# This module would need to implement for every supported OS a dll hook to link to the environ variable, technically
# reimplementing the C function 'convertenviron' from posixmodule.c_ in python. The implemented solution now waits for
# the tcl interpreter to finish before starting python.
#
# .. _posixmodule.c:
# https://github.com/python/cpython/blob/3.7/Modules/posixmodule.c#L1315-L1393

View File

@ -0,0 +1 @@
#

Some files were not shown because too many files have changed in this diff Show More