mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
synced 2024-12-17 20:55:27 +00:00
ee8c336ab3
Flesh out the de-duplication logic in separate script. The copy-firmware.sh is already complex enough and de-duplication doesn't really fit in there. In the process we migrate away from the open-coded `ln --relative`. We also avoid touching symlinks, which are not created by rdfind. Otherwise we end up "fixing" the folder to folder symlinks (created earlier in the process) and things explode. As result we also get a few bonuses: - the COPYOPTS shell injection is gone - the variable was never used - people can dedup as separate step if/when they choose to do so Aside: based on the noise in git log and around distros ... I'm wondering if having the de-duplication as opt-in, would have been better. Is it too late to change or the ship has sailed? Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
170 lines
5.5 KiB
Python
Executable File
170 lines
5.5 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
import os, re, sys
|
|
from io import open
|
|
|
|
|
|
def list_whence():
|
|
with open("WHENCE", encoding="utf-8") as whence:
|
|
for line in whence:
|
|
match = re.match(r'(?:RawFile|File|Source):\s*"(.*)"', line)
|
|
if match:
|
|
yield match.group(1)
|
|
continue
|
|
match = re.match(r"(?:RawFile|File|Source):\s*(\S*)", line)
|
|
if match:
|
|
yield match.group(1)
|
|
continue
|
|
match = re.match(
|
|
r"Licen[cs]e: (?:.*\bSee (.*) for details\.?|(\S*))\n", line
|
|
)
|
|
if match:
|
|
if match.group(1):
|
|
for name in re.split(r", | and ", match.group(1)):
|
|
yield name
|
|
continue
|
|
if match.group(2):
|
|
# Just one word - may or may not be a filename
|
|
if not re.search(
|
|
r"unknown|distributable", match.group(2), re.IGNORECASE
|
|
):
|
|
yield match.group(2)
|
|
continue
|
|
|
|
|
|
def list_whence_files():
|
|
with open("WHENCE", encoding="utf-8") as whence:
|
|
for line in whence:
|
|
match = re.match(r"(?:RawFile|File):\s*(.*)", line)
|
|
if match:
|
|
yield match.group(1).replace(r"\ ", " ").replace('"', "")
|
|
continue
|
|
|
|
|
|
def list_links_list():
|
|
with open("WHENCE", encoding="utf-8") as whence:
|
|
for line in whence:
|
|
match = re.match(r"Link:\s*(.*)", line)
|
|
if match:
|
|
linkname, target = match.group(1).split("->")
|
|
|
|
linkname = linkname.strip().replace(r"\ ", " ").replace('"', "")
|
|
target = target.strip().replace(r"\ ", " ").replace('"', "")
|
|
|
|
# Link target is relative to the link
|
|
target = os.path.join(os.path.dirname(linkname), target)
|
|
target = os.path.normpath(target)
|
|
|
|
yield (linkname, target)
|
|
continue
|
|
|
|
|
|
def list_git():
|
|
with os.popen("git ls-files") as git_files:
|
|
for line in git_files:
|
|
yield line.rstrip("\n")
|
|
|
|
|
|
def main():
|
|
ret = 0
|
|
whence_list = list(list_whence())
|
|
whence_files = list(list_whence_files())
|
|
links_list = list(list_links_list())
|
|
whence_links = list(zip(*links_list))[0]
|
|
known_files = set(name for name in whence_list if not name.endswith("/")) | set(
|
|
[
|
|
".codespell.cfg",
|
|
".editorconfig",
|
|
".gitignore",
|
|
".gitlab-ci.yml",
|
|
".pre-commit-config.yaml",
|
|
"Dockerfile",
|
|
"Makefile",
|
|
"README.md",
|
|
"WHENCE",
|
|
"build_packages.py",
|
|
"check_whence.py",
|
|
"configure",
|
|
"contrib/process_linux_firmware.py",
|
|
"contrib/templates/debian.changelog",
|
|
"contrib/templates/debian.control",
|
|
"contrib/templates/debian.copyright",
|
|
"contrib/templates/rpm.spec",
|
|
"copy-firmware.sh",
|
|
"dedup-firmware.sh",
|
|
]
|
|
)
|
|
known_prefixes = set(name for name in whence_list if name.endswith("/"))
|
|
git_files = set(list_git())
|
|
|
|
for name in set(name for name in whence_files if name.endswith("/")):
|
|
sys.stderr.write("E: %s listed in WHENCE as File, but is directory\n" % name)
|
|
ret = 1
|
|
|
|
for name in set(name for name in whence_files if whence_files.count(name) > 1):
|
|
sys.stderr.write("E: %s listed in WHENCE twice\n" % name)
|
|
ret = 1
|
|
|
|
for name in set(link for link in whence_links if whence_links.count(link) > 1):
|
|
sys.stderr.write("E: %s listed in WHENCE twice\n" % name)
|
|
ret = 1
|
|
|
|
for name in set(file for file in whence_files if os.path.islink(file)):
|
|
sys.stderr.write("E: %s listed in WHENCE as File, but is a symlink\n" % name)
|
|
ret = 1
|
|
|
|
for name in set(link[0] for link in links_list if os.path.islink(link[0])):
|
|
sys.stderr.write("E: %s listed in WHENCE as Link, is in tree\n" % name)
|
|
ret = 1
|
|
|
|
invalid_targets = set(link[0] for link in links_list)
|
|
for link, target in sorted(links_list):
|
|
if target in invalid_targets:
|
|
sys.stderr.write(
|
|
"E: target %s of link %s is also a link\n" % (target, link)
|
|
)
|
|
ret = 1
|
|
|
|
for name in sorted(list(known_files - git_files)):
|
|
sys.stderr.write("E: %s listed in WHENCE does not exist\n" % name)
|
|
ret = 1
|
|
|
|
# A link can point to a file...
|
|
valid_targets = set(git_files)
|
|
|
|
# ... or to a directory
|
|
for target in set(valid_targets):
|
|
dirname = target
|
|
while True:
|
|
dirname = os.path.dirname(dirname)
|
|
if dirname == "":
|
|
break
|
|
valid_targets.add(dirname)
|
|
|
|
for link, target in sorted(links_list):
|
|
if target not in valid_targets:
|
|
sys.stderr.write(
|
|
"E: target %s of link %s in WHENCE" " does not exist\n" % (target, link)
|
|
)
|
|
ret = 1
|
|
|
|
for name in sorted(list(git_files - known_files)):
|
|
# Ignore subdirectory changelogs and GPG detached signatures
|
|
if name.endswith("/ChangeLog") or (
|
|
name.endswith(".asc") and name[:-4] in known_files
|
|
):
|
|
continue
|
|
|
|
# Ignore unknown files in known directories
|
|
for prefix in known_prefixes:
|
|
if name.startswith(prefix):
|
|
break
|
|
else:
|
|
sys.stderr.write("E: %s not listed in WHENCE\n" % name)
|
|
ret = 1
|
|
return ret
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|