SpaceLife-Updater/venv/lib64/python3.12/site-packages/PyInstaller/depend/bindepend.py

910 lines
39 KiB
Python
Executable File

#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Find external dependencies of binary libraries.
"""
import ctypes.util
import os
import pathlib
import re
import sys
import sysconfig
import subprocess
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import dylib, utils
from PyInstaller.utils.win32 import winutils
if compat.is_darwin:
import PyInstaller.utils.osx as osxutils
logger = logging.getLogger(__name__)
_exe_machine_type = None
if compat.is_win:
_exe_machine_type = winutils.get_pe_file_machine_type(compat.python_executable)
#- High-level binary dependency analysis
def _get_paths_for_parent_directory_preservation():
"""
Return list of paths that serve as prefixes for parent-directory preservation of collected binaries and/or
shared libraries. If a binary is collected from a location that starts with a path from this list, the relative
directory structure is preserved within the frozen application bundle; otherwise, the binary is collected to the
frozen application's top-level directory.
"""
# Use only site-packages paths. We have no control over contents of `sys.path`, so using all paths from that may
# lead to unintended behavior in corner cases. For example, if `sys.path` contained the drive root (see #7028),
# all paths that do not match some other sub-path rooted in that drive will end up recognized as relative to the
# drive root. In such case, any DLL collected from `c:\Windows\system32` will be collected into `Windows\system32`
# sub-directory; ucrt DLLs collected from MSVC or Windows SDK installed in `c:\Program Files\...` will end up
# collected into `Program Files\...` subdirectory; etc.
#
# On the other hand, the DLL parent directory preservation is primarily aimed at packages installed via PyPI
# wheels, which are typically installed into site-packages. Therefore, limiting the directory preservation for
# shared libraries collected from site-packages should do the trick, and should be reasonably safe.
import site
orig_paths = site.getsitepackages()
orig_paths.append(site.getusersitepackages())
# Explicitly excluded paths. `site.getsitepackages` seems to include `sys.prefix`, which we need to exclude, to
# avoid issue swith DLLs in its sub-directories. We need both resolved and unresolved variant to handle cases
# where `base_prefix` itself is a symbolic link (e.g., `scoop`-installed python on Windows, see #8023).
excluded_paths = {
pathlib.Path(sys.base_prefix),
pathlib.Path(sys.base_prefix).resolve(),
pathlib.Path(sys.prefix),
pathlib.Path(sys.prefix).resolve(),
}
# For each path in orig_paths, append a resolved variant. This helps with linux venv where we need to consider
# both `venv/lib/python3.11/site-packages` and `venv/lib/python3.11/site-packages` and `lib64` is a symlink
# to `lib`.
orig_paths += [pathlib.Path(path).resolve() for path in orig_paths]
paths = set()
for path in orig_paths:
if not path:
continue
path = pathlib.Path(path)
# Filter out non-directories (e.g., /path/to/python3x.zip) or non-existent paths
if not path.is_dir():
continue
# Filter out explicitly excluded paths
if path in excluded_paths:
continue
paths.add(path)
# Sort by length (in term of path components) to ensure match against the longest common prefix (for example, match
# /path/to/venv/lib/site-packages instead of /path/to/venv when both paths are in site paths).
paths = sorted(paths, key=lambda x: len(x.parents), reverse=True)
return paths
def _select_destination_directory(src_filename, parent_dir_preservation_paths):
# Check parent directory preservation paths
for parent_dir_preservation_path in parent_dir_preservation_paths:
if parent_dir_preservation_path in src_filename.parents:
# Collect into corresponding sub-directory.
return src_filename.relative_to(parent_dir_preservation_path)
# Collect into top-level directory.
return src_filename.name
def binary_dependency_analysis(binaries, search_paths=None):
"""
Perform binary dependency analysis on the given TOC list of collected binaries, by recursively scanning each binary
for linked dependencies (shared library imports). Returns new TOC list that contains both original entries and their
binary dependencies.
Additional search paths for dependencies' full path resolution may be supplied via optional argument.
"""
# Get all path prefixes for binaries' parent-directory preservation. For binaries collected from packages in (for
# example) site-packages directory, we should try to preserve the parent directory structure.
parent_dir_preservation_paths = _get_paths_for_parent_directory_preservation()
# Keep track of processed binaries and processed dependencies.
processed_binaries = set()
processed_dependencies = set()
# Keep track of unresolved dependencies, in order to defer the missing-library warnings until after everything has
# been processed. This allows us to suppress warnings for dependencies that end up being collected anyway; for
# details, see the end of this function.
missing_dependencies = []
# Populate output TOC with input binaries - this also serves as TODO list, as we iterate over it while appending
# new entries at the end.
output_toc = binaries[:]
for dest_name, src_name, typecode in output_toc:
# Do not process symbolic links (already present in input TOC list, or added during analysis below).
if typecode == 'SYMLINK':
continue
# Keep track of processed binaries, to avoid unnecessarily repeating analysis of the same file. Use pathlib.Path
# to avoid having to worry about case normalization.
src_path = pathlib.Path(src_name)
if src_path in processed_binaries:
continue
processed_binaries.add(src_path)
logger.debug("Analyzing binary %r", src_name)
# Analyze imports (linked dependencies)
for dep_name, dep_src_path in get_imports(src_name, search_paths):
logger.debug("Processing dependency, name: %r, resolved path: %r", dep_name, dep_src_path)
# Skip unresolved dependencies. Defer the missing-library warnings until after binary dependency analysis
# is complete.
if not dep_src_path:
missing_dependencies.append((dep_name, src_name))
continue
# Compare resolved dependency against global inclusion/exclusion rules.
if not dylib.include_library(dep_src_path):
logger.debug("Skipping dependency %r due to global exclusion rules.", dep_src_path)
continue
dep_src_path = pathlib.Path(dep_src_path) # Turn into pathlib.Path for subsequent processing
# Avoid processing this dependency if we have already processed it.
if dep_src_path in processed_dependencies:
logger.debug("Skipping dependency %r due to prior processing.", str(dep_src_path))
continue
processed_dependencies.add(dep_src_path)
# Try to preserve parent directory structure, if applicable.
# NOTE: do not resolve the source path, because on macOS and linux, it may be a versioned .so (e.g.,
# libsomething.so.1, pointing at libsomething.so.1.2.3), and we need to collect it under original name!
dep_dest_path = _select_destination_directory(dep_src_path, parent_dir_preservation_paths)
dep_dest_path = pathlib.PurePath(dep_dest_path) # Might be a str() if it is just a basename...
# If we are collecting library into top-level directory on macOS, check whether it comes from a
# .framework bundle. If it does, re-create the .framework bundle in the top-level directory
# instead.
if compat.is_darwin and dep_dest_path.parent == pathlib.PurePath('.'):
if osxutils.is_framework_bundle_lib(dep_src_path):
# dst_src_path is parent_path/Name.framework/Versions/Current/Name
framework_parent_path = dep_src_path.parent.parent.parent.parent
dep_dest_path = pathlib.PurePath(dep_src_path.relative_to(framework_parent_path))
logger.debug("Collecting dependency %r as %r.", str(dep_src_path), str(dep_dest_path))
output_toc.append((str(dep_dest_path), str(dep_src_path), 'BINARY'))
# On non-Windows, if we are not collecting the binary into application's top-level directory ('.'),
# add a symbolic link from top-level directory to the actual location. This is to accommodate
# LD_LIBRARY_PATH being set to the top-level application directory on linux (although library search
# should be mostly done via rpaths, so this might be redundant) and to accommodate library path
# rewriting on macOS, which assumes that the library was collected into top-level directory.
if not compat.is_win and dep_dest_path.parent != pathlib.PurePath('.'):
logger.debug("Adding symbolic link from %r to top-level application directory.", str(dep_dest_path))
output_toc.append((str(dep_dest_path.name), str(dep_dest_path), 'SYMLINK'))
# Display warnings about missing dependencies
seen_binaries = set([
os.path.normcase(os.path.basename(src_name)) for dest_name, src_name, typecode in output_toc
if typecode != 'SYMLINK'
])
for dependency_name, referring_binary in missing_dependencies:
# Ignore libraries that we would not collect in the first place.
if not dylib.include_library(dependency_name):
continue
# Apply global warning suppression rules.
if not dylib.warn_missing_lib(dependency_name):
continue
# If the binary with a matching basename happens to be among the discovered binaries, suppress the message as
# well. This might happen either because the library was collected by some other mechanism (for example, via
# hook, or supplied by the user), or because it was discovered during the analysis of another binary (which,
# for example, had properly set run-paths on Linux/macOS or was located next to that other analyzed binary on
# Windows).
if os.path.normcase(os.path.basename(dependency_name)) in seen_binaries:
continue
logger.warning("Library not found: could not resolve %r, dependency of %r.", dependency_name, referring_binary)
return output_toc
#- Low-level import analysis
def get_imports(filename, search_paths=None):
"""
Analyze the given binary file (shared library or executable), and obtain the list of shared libraries it imports
(i.e., link-time dependencies).
Returns set of tuples (name, fullpath). The name component is the referenced name, and on macOS, may not be just
a base name. If the library's full path cannot be resolved, fullpath element is None.
Additional list of search paths may be specified via `search_paths`, to be used as a fall-back when the
platform-specific resolution mechanism fails to resolve a library fullpath.
"""
if compat.is_win:
if filename.lower().endswith(".manifest"):
return []
return _get_imports_pefile(filename, search_paths)
elif compat.is_darwin:
return _get_imports_macholib(filename, search_paths)
else:
return _get_imports_ldd(filename, search_paths)
def _get_imports_pefile(filename, search_paths):
"""
Windows-specific helper for `get_imports`, which uses the `pefile` library to walk through PE header.
"""
import pefile
output = set()
# By default, pefile library parses all PE information. We are only interested in the list of dependent dlls.
# Performance is improved by reading only needed information. https://code.google.com/p/pefile/wiki/UsageExamples
pe = pefile.PE(filename, fast_load=True)
pe.parse_data_directories(
directories=[
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
],
forwarded_exports_only=True,
import_dllnames_only=True,
)
# If a library has no binary dependencies, pe.DIRECTORY_ENTRY_IMPORT does not exist.
for entry in getattr(pe, 'DIRECTORY_ENTRY_IMPORT', []):
dll_str = entry.dll.decode('utf-8')
output.add(dll_str)
# We must also read the exports table to find forwarded symbols:
# http://blogs.msdn.com/b/oldnewthing/archive/2006/07/19/671238.aspx
exported_symbols = getattr(pe, 'DIRECTORY_ENTRY_EXPORT', None)
if exported_symbols:
for symbol in exported_symbols.symbols:
if symbol.forwarder is not None:
# symbol.forwarder is a bytes object. Convert it to a string.
forwarder = symbol.forwarder.decode('utf-8')
# symbol.forwarder is for example 'KERNEL32.EnterCriticalSection'
dll = forwarder.split('.')[0]
output.add(dll + ".dll")
pe.close()
# Attempt to resolve full paths to referenced DLLs. Always add the input binary's parent directory to the search
# paths.
search_paths = [os.path.dirname(filename)] + (search_paths or [])
output = {(lib, resolve_library_path(lib, search_paths)) for lib in output}
return output
def _get_imports_ldd(filename, search_paths):
"""
Helper for `get_imports`, which uses `ldd` to analyze shared libraries. Used on Linux and other POSIX-like platforms
(with exception of macOS).
"""
output = set()
# Output of ldd varies between platforms...
if compat.is_aix:
# Match libs of the form
# 'archivelib.a(objectmember.so/.o)'
# or
# 'sharedlib.so'
# Will not match the fake lib '/unix'
LDD_PATTERN = re.compile(r"^\s*(((?P<libarchive>(.*\.a))(?P<objectmember>\(.*\)))|((?P<libshared>(.*\.so))))$")
elif compat.is_hpux:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so => /usr/local/lib/hpux32/libpython2.7.so'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
elif compat.is_solar:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so.1.0 => /usr/local/lib/libpython2.7.so.1.0'
# Will not match the platform specific libs starting with '/platform'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
else:
LDD_PATTERN = re.compile(r"\s*(.*?)\s+=>\s+(.*?)\s+\(.*\)")
p = subprocess.run(
['ldd', filename],
stdin=subprocess.DEVNULL,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
encoding='utf-8',
)
for line in p.stderr.splitlines():
if not line:
continue
# Python extensions (including stdlib ones) are not linked against python.so but rely on Python's symbols having
# already been loaded into symbol space at runtime. musl's ldd issues a series of harmless warnings to stderr
# telling us that those symbols are unfindable. These should be suppressed.
elif line.startswith("Error relocating ") and line.endswith(" symbol not found"):
continue
# Propagate any other warnings it might have.
print(line, file=sys.stderr)
for line in p.stdout.splitlines():
name = None # Referenced name
lib = None # Resolved library path
m = LDD_PATTERN.search(line)
if m:
if compat.is_aix:
libarchive = m.group('libarchive')
if libarchive:
# We matched an archive lib with a request for a particular embedded shared object.
# 'archivelib.a(objectmember.so/.o)'
lib = libarchive
name = os.path.basename(lib) + m.group('objectmember')
else:
# We matched a stand-alone shared library.
# 'sharedlib.so'
lib = m.group('libshared')
name = os.path.basename(lib)
elif compat.is_hpux:
name, lib = m.group(1), m.group(2)
else:
name, lib = m.group(1), m.group(2)
if name[:10] in ('linux-gate', 'linux-vdso'):
# linux-gate is a fake library which does not exist and should be ignored. See also:
# http://www.trilithium.com/johan/2005/08/linux-gate/
continue
if compat.is_cygwin:
# exclude Windows system library
if lib.lower().startswith('/cygdrive/c/windows/system'):
continue
# Reset library path if it does not exist
if not os.path.exists(lib):
lib = None
elif line.endswith("not found"):
# On glibc-based linux distributions, missing libraries are marked with name.so => not found
tokens = line.split('=>')
if len(tokens) != 2:
continue
name = tokens[0].strip()
lib = None
else:
# TODO: should we warn about unprocessed lines?
continue
# Fall back to searching the supplied search paths, if any.
if not lib:
lib = _resolve_library_path_in_search_paths(
os.path.basename(name), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if lib:
lib = os.path.normpath(lib)
# Return referenced name as-is instead of computing a basename, to provide additional context when library
# cannot be resolved.
output.add((name, lib))
return output
def _get_imports_macholib(filename, search_paths):
"""
macOS-specific helper for `get_imports`, which uses `macholib` to analyze library load commands in Mach-O headers.
"""
from macholib.dyld import dyld_find
from macholib.mach_o import LC_RPATH
from macholib.MachO import MachO
output = set()
referenced_libs = set() # Libraries referenced in Mach-O headers.
# Parent directory of the input binary and parent directory of python executable, used to substitute @loader_path
# and @executable_path. The MacOS dylib loader (dyld) fully resolves the symbolic links when using @loader_path
# and @executable_path references, so we need to do the same using `os.path.realpath`.
bin_path = os.path.dirname(os.path.realpath(filename))
python_bin_path = os.path.dirname(os.path.realpath(sys.executable))
# Walk through Mach-O headers, and collect all referenced libraries.
m = MachO(filename)
for header in m.headers:
for idx, name, lib in header.walkRelocatables():
referenced_libs.add(lib)
# Find LC_RPATH commands to collect rpaths. macholib does not handle @rpath, so we need to handle run paths
# ourselves.
run_paths = set()
for header in m.headers:
for command in header.commands:
# A command is a tuple like:
# (<macholib.mach_o.load_command object at 0x>,
# <macholib.mach_o.rpath_command object at 0x>,
# '../lib\x00\x00')
cmd_type = command[0].cmd
if cmd_type == LC_RPATH:
rpath = command[2].decode('utf-8')
# Remove trailing '\x00' characters. E.g., '../lib\x00\x00'
rpath = rpath.rstrip('\x00')
# If run path starts with @, ensure it starts with either @loader_path or @executable_path. We cannot
# process anything else.
if rpath.startswith("@") and not rpath.startswith(("@executable_path", "@loader_path")):
logger.warning("Unsupported rpath format %r found in binary %r - ignoring...", rpath, filename)
continue
run_paths.add(rpath)
# For distributions like Anaconda, all of the dylibs are stored in the lib directory of the Python distribution, not
# alongside of the .so's in each module's subdirectory. Usually, libraries using @rpath to reference their
# dependencies also set up their run-paths via LC_RPATH commands. However, they are not strictly required to do so,
# because run-paths are inherited from the process within which the libraries are loaded. Therefore, if the python
# executable uses an LC_RPATH command to set up run-path that resolves the shared lib directory (for example,
# `@loader_path/../lib` in case of the Anaconda python), all libraries loaded within the python process are able
# to resolve the shared libraries within the environment's shared lib directory without using LC_RPATH commands
# themselves.
#
# Our analysis does not account for inherited run-paths, and we attempt to work around this limitation by
# registering the following fall-back run-path.
run_paths.add(os.path.join(compat.base_prefix, 'lib'))
def _resolve_using_loader_path(lib, bin_path, python_bin_path):
# macholib does not support @loader_path, so replace it with @executable_path. Strictly speaking, @loader_path
# should be anchored to parent directory of analyzed binary (`bin_path`), while @executable_path should be
# anchored to the parent directory of the process' executable. Typically, this would be python executable
# (`python_bin_path`), unless we are analyzing a collected 3rd party executable. In that case, `bin_path`
# is correct option. So we first try resolving using `bin_path`, and then fall back to `python_bin_path`.
# This does not account for transitive run paths of higher-order dependencies, but there is only so much we
# can do here...
if lib.startswith('@loader_path'):
lib = lib.replace('@loader_path', '@executable_path')
try:
# Try resolving with binary's path first...
return dyld_find(lib, executable_path=bin_path)
except ValueError:
# ... and fall-back to resolving with python executable's path
try:
return dyld_find(lib, executable_path=python_bin_path)
except ValueError:
return None
def _resolve_using_path(lib):
try:
return dyld_find(lib)
except ValueError:
return None
# Try to resolve full path of the referenced libraries.
for referenced_lib in referenced_libs:
resolved_lib = None
# If path starts with @rpath, we have to handle it ourselves.
if referenced_lib.startswith('@rpath'):
lib = os.path.join(*referenced_lib.split(os.sep)[1:]) # Remove the @rpath/ prefix
# Try all run paths.
for run_path in run_paths:
# Join the path.
lib_path = os.path.join(run_path, lib)
if lib_path.startswith(("@executable_path", "@loader_path")):
# Run path starts with @executable_path or @loader_path.
lib_path = _resolve_using_loader_path(lib_path, bin_path, python_bin_path)
else:
# If run path was relative, anchor it to binary's location.
if not os.path.isabs(lib_path):
os.path.join(bin_path, lib_path)
lib_path = _resolve_using_path(lib_path)
if lib_path and os.path.exists(lib_path):
resolved_lib = lib_path
break
else:
if referenced_lib.startswith(("@executable_path", "@loader_path")):
resolved_lib = _resolve_using_loader_path(referenced_lib, bin_path, python_bin_path)
else:
resolved_lib = _resolve_using_path(referenced_lib)
# Fall back to searching the supplied search paths, if any.
if not resolved_lib:
resolved_lib = _resolve_library_path_in_search_paths(
os.path.basename(referenced_lib), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if resolved_lib:
resolved_lib = os.path.normpath(resolved_lib)
# Return referenced library name as-is instead of computing a basename. Full referenced name carries additional
# information that might be useful for the caller to determine how to deal with unresolved library (e.g., ignore
# unresolved libraries that are supposed to be located in system-wide directories).
output.add((referenced_lib, resolved_lib))
return output
#- Library full path resolution
def resolve_library_path(name, search_paths=None):
"""
Given a library name, attempt to resolve full path to that library. The search for library is done via
platform-specific mechanism and fall back to optionally-provided list of search paths. Returns None if library
cannot be resolved. If give library name is already an absolute path, the given path is returned without any
processing.
"""
# No-op if path is already absolute.
if os.path.isabs(name):
return name
if compat.is_unix:
# Use platform-specific helper.
fullpath = _resolve_library_path_unix(name)
if fullpath:
return fullpath
# Fall back to searching the supplied search paths, if any
return _resolve_library_path_in_search_paths(name, search_paths)
elif compat.is_win:
# Try the caller-supplied search paths, if any.
fullpath = _resolve_library_path_in_search_paths(name, search_paths)
if fullpath:
return fullpath
# Fall back to default Windows search paths, using the PATH environment variable (which should also include
# the system paths, such as c:\windows and c:\windows\system32)
win_search_paths = [path for path in compat.getenv('PATH', '').split(os.pathsep) if path]
return _resolve_library_path_in_search_paths(name, win_search_paths)
else:
return ctypes.util.find_library(name)
return None
# Compatibility aliases for hooks from contributed hooks repository. All of these now point to the high-level
# `resolve_library_path`.
findLibrary = resolve_library_path
findSystemLibrary = resolve_library_path
def _resolve_library_path_in_search_paths(name, search_paths=None):
"""
Low-level helper for resolving given library name to full path in given list of search paths.
"""
for search_path in search_paths or []:
fullpath = os.path.join(search_path, name)
if not os.path.isfile(fullpath):
continue
# On Windows, ensure that architecture matches that of running python interpreter.
if compat.is_win:
try:
dll_machine_type = winutils.get_pe_file_machine_type(fullpath)
except Exception:
# A search path might contain a DLL that we cannot analyze; for example, a stub file. Skip over.
continue
if dll_machine_type != _exe_machine_type:
continue
return os.path.normpath(fullpath)
return None
def _resolve_library_path_unix(name):
"""
UNIX-specific helper for resolving library path.
Emulates the algorithm used by dlopen. `name` must include the prefix, e.g., ``libpython2.4.so``.
"""
assert compat.is_unix, "Current implementation for Unix only (Linux, Solaris, AIX, FreeBSD)"
# Look in the LD_LIBRARY_PATH according to platform.
if compat.is_aix:
lp = compat.getenv('LIBPATH', '')
elif compat.is_darwin:
lp = compat.getenv('DYLD_LIBRARY_PATH', '')
else:
lp = compat.getenv('LD_LIBRARY_PATH', '')
lib = _which_library(name, filter(None, lp.split(os.pathsep)))
# Look in /etc/ld.so.cache
# Solaris does not have /sbin/ldconfig. Just check if this file exists.
if lib is None:
utils.load_ldconfig_cache()
lib = utils.LDCONFIG_CACHE.get(name)
if lib:
assert os.path.isfile(lib)
# Look in the known safe paths.
if lib is None:
# Architecture independent locations.
paths = ['/lib', '/usr/lib']
# Architecture dependent locations.
if compat.architecture == '32bit':
paths.extend(['/lib32', '/usr/lib32'])
else:
paths.extend(['/lib64', '/usr/lib64'])
# Machine dependent locations.
if compat.machine == 'intel':
if compat.architecture == '32bit':
paths.extend(['/usr/lib/i386-linux-gnu'])
else:
paths.extend(['/usr/lib/x86_64-linux-gnu'])
# On Debian/Ubuntu /usr/bin/python is linked statically with libpython. Newer Debian/Ubuntu with multiarch
# support puts the libpythonX.Y.so in paths like /usr/lib/i386-linux-gnu/. Try to query the arch-specific
# sub-directory, if available.
arch_subdir = sysconfig.get_config_var('multiarchsubdir')
if arch_subdir:
arch_subdir = os.path.basename(arch_subdir)
paths.append(os.path.join('/usr/lib', arch_subdir))
else:
logger.debug('Multiarch directory not detected.')
# Termux (a Ubuntu like subsystem for Android) has an additional libraries directory.
if os.path.isdir('/data/data/com.termux/files/usr/lib'):
paths.append('/data/data/com.termux/files/usr/lib')
if compat.is_aix:
paths.append('/opt/freeware/lib')
elif compat.is_hpux:
if compat.architecture == '32bit':
paths.append('/usr/local/lib/hpux32')
else:
paths.append('/usr/local/lib/hpux64')
elif compat.is_freebsd or compat.is_openbsd:
paths.append('/usr/local/lib')
lib = _which_library(name, paths)
# Give up :(
if lib is None:
return None
# Resolve the file name into the soname
if compat.is_freebsd or compat.is_aix or compat.is_openbsd:
# On FreeBSD objdump does not show SONAME, and on AIX objdump does not exist, so we just return the lib we
# have found.
return lib
else:
dir = os.path.dirname(lib)
return os.path.join(dir, _get_so_name(lib))
def _which_library(name, dirs):
"""
Search for a shared library in a list of directories.
Args:
name:
The library name including the `lib` prefix but excluding any `.so` suffix.
dirs:
An iterable of folders to search in.
Returns:
The path to the library if found or None otherwise.
"""
matcher = _library_matcher(name)
for path in filter(os.path.exists, dirs):
for _path in os.listdir(path):
if matcher(_path):
return os.path.join(path, _path)
def _library_matcher(name):
"""
Create a callable that matches libraries if **name** is a valid library prefix for input library full names.
"""
return re.compile(name + r"[0-9]*\.").match
def _get_so_name(filename):
"""
Return the soname of a library.
Soname is useful when there are multiple symplinks to one library.
"""
# TODO verify that objdump works on other unixes and not Linux only.
cmd = ["objdump", "-p", filename]
pattern = r'\s+SONAME\s+([^\s]+)'
if compat.is_solar:
cmd = ["elfdump", "-d", filename]
pattern = r'\s+SONAME\s+[^\s]+\s+([^\s]+)'
m = re.search(pattern, compat.exec_command(*cmd))
return m.group(1)
#- Python shared library search
def get_python_library_path():
"""
Find dynamic Python library that will be bundled with frozen executable.
NOTE: This is a fallback option when the Python executable is likely statically linked with the Python library and
we need to search more for it. For example, this is the case on Debian/Ubuntu.
Return full path to Python dynamic library or None when not found.
We need to know name of the Python dynamic library for the bootloader. Bootloader has to know what library to
load and not try to guess.
Some linux distributions (e.g. debian-based) statically link the Python executable to the libpython,
so bindepend does not include it in its output. In this situation let's try to find it.
Custom Mac OS builds could possibly also have non-framework style libraries, so this method also checks for that
variant as well.
"""
def _find_lib_in_libdirs(*libdirs):
for libdir in libdirs:
for name in compat.PYDYLIB_NAMES:
full_path = os.path.join(libdir, name)
if not os.path.exists(full_path):
continue
# Resolve potential symbolic links to achieve consistent results with linker-based search; e.g., on
# POSIX systems, linker resolves unversioned library names (python3.X.so) to versioned ones
# (libpython3.X.so.1.0) due to former being symbolic linkes to the latter. See #6831.
full_path = os.path.realpath(full_path)
if not os.path.exists(full_path):
continue
return full_path
return None
# If this is Microsoft App Store Python, check the compat.base_path first. While compat.python_executable resolves
# to actual python.exe file, the latter contains a relative library reference that we fail to properly resolve.
if compat.is_ms_app_store:
python_libname = _find_lib_in_libdirs(compat.base_prefix)
if python_libname:
return python_libname
# Try to get Python library name from the Python executable. It assumes that Python library is not statically
# linked.
imported_libraries = get_imports(compat.python_executable) # (name, fullpath) tuples
for _, lib_path in imported_libraries:
if lib_path is None:
continue # Skip unresolved imports
for name in compat.PYDYLIB_NAMES:
if os.path.normcase(os.path.basename(lib_path)) == name:
# Python library found. Return absolute path to it.
return lib_path
# Python library NOT found. Resume searching using alternative methods.
# Work around for python venv having VERSION.dll rather than pythonXY.dll
if compat.is_win and any([os.path.normcase(lib_name) == 'version.dll' for lib_name, _ in imported_libraries]):
pydll = 'python%d%d.dll' % sys.version_info[:2]
return resolve_library_path(pydll, [os.path.dirname(compat.python_executable)])
# Applies only to non Windows platforms and conda.
if compat.is_conda:
# Conda needs to be the first here since it overrules the operating system specific paths.
python_libname = _find_lib_in_libdirs(os.path.join(compat.base_prefix, 'lib'))
if python_libname:
return python_libname
elif compat.is_unix:
for name in compat.PYDYLIB_NAMES:
python_libname = findLibrary(name)
if python_libname:
return python_libname
if compat.is_darwin or compat.is_linux:
# On MacPython, Analysis.assemble is able to find the libpython with no additional help, asking for
# sys.executable dependencies. However, this fails on system python, because the shared library is not listed as
# a dependency of the binary (most probably it is opened at runtime using some dlopen trickery). This happens on
# Mac OS when Python is compiled as Framework.
# Linux using pyenv is similarly linked so that sys.executable dependencies does not yield libpython.so.
# Python compiled as Framework contains same values in sys.prefix and exec_prefix. That is why we can use just
# sys.prefix. In virtualenv, PyInstaller is not able to find Python library. We need special care for this case.
python_libname = _find_lib_in_libdirs(
compat.base_prefix,
os.path.join(compat.base_prefix, 'lib'),
)
if python_libname:
return python_libname
# Python library NOT found. Return None and let the caller deal with this.
return None
#- Binary vs data (re)classification
def classify_binary_vs_data(filename):
"""
Classify the given file as either BINARY or a DATA, using appropriate platform-specific method. Returns 'BINARY'
or 'DATA' string depending on the determined file type, or None if classification cannot be performed (non-existing
file, missing tool, and other errors during classification).
"""
# We cannot classify non-existent files.
if not os.path.isfile(filename):
return None
# Use platform-specific implementation.
return _classify_binary_vs_data(filename)
if compat.is_linux:
def _classify_binary_vs_data(filename):
# First check for ELF signature, in order to avoid calling `objdump` on every data file, which can be costly.
try:
with open(filename, 'rb') as fp:
sig = fp.read(4)
except Exception:
return None
if sig != b"\x7FELF":
return "DATA"
# Verify the binary by checking if `objdump` recognizes the file. The preceding ELF signature check should
# ensure that this is an ELF file, while this check should ensure that it is a valid ELF file. In the future,
# we could try checking that the architecture matches the running platform.
cmd_args = ['objdump', '-a', filename]
try:
p = subprocess.run(
cmd_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.DEVNULL,
encoding='utf8',
)
except Exception:
return None # Failed to run `objdump` or `objdump` unavailable.
return 'BINARY' if p.returncode == 0 else 'DATA'
elif compat.is_win:
def _classify_binary_vs_data(filename):
# See if the file can be opened using `pefile`.
import pefile
try:
pe = pefile.PE(filename, fast_load=True) # noqa: F841
return 'BINARY'
except Exception:
# TODO: catch only `pefile.PEFormatError`?
pass
return 'DATA'
elif compat.is_darwin:
def _classify_binary_vs_data(filename):
# See if the file can be opened using `macholib`.
import macholib.MachO
try:
macho = macholib.MachO.MachO(filename) # noqa: F841
return 'BINARY'
except Exception:
# TODO: catch only `ValueError`?
pass
return 'DATA'
else:
def _classify_binary_vs_data(filename):
# Classification not implemented for the platform.
return None