public inbox for isar-users@googlegroups.com
 help / color / mirror / Atom feed
* [PATCH] deb-dl-dir: remove excessive calls to dpkg-deb in debsrc_download
@ 2025-03-05 13:11 'Cedric Hombourger' via isar-users
  2025-03-05 13:57 ` 'Jan Kiszka' via isar-users
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: 'Cedric Hombourger' via isar-users @ 2025-03-05 13:11 UTC (permalink / raw)
  To: isar-users; +Cc: ubely, Cedric Hombourger

Several calls to dpkg-deb are made for each single .deb file found in
downloads to parse individual fields. This approach is terribly slow
when a large amount of .deb files are found. Use apt-ftparchive to
produce an index of packages that were found and a simple awk script
to produce a (sorted) list of source package names and their versions.
Also avoid using sed to remove Epoch from the version when we are
trying to determine the name of the .dsc file: we instead use a simple
POSIX parameter expansion to remove everything up to the first colon

Signed-off-by: Cedric Hombourger <cedric.hombourger@siemens.com>
---
 meta/classes/deb-dl-dir.bbclass | 62 +++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/meta/classes/deb-dl-dir.bbclass b/meta/classes/deb-dl-dir.bbclass
index 7ebd057e..53ce4538 100644
--- a/meta/classes/deb-dl-dir.bbclass
+++ b/meta/classes/deb-dl-dir.bbclass
@@ -5,23 +5,6 @@
 
 inherit repository
 
-is_not_part_of_current_build() {
-    local package="$( dpkg-deb --show --showformat '${Package}' "${1}" )"
-    local arch="$( dpkg-deb --show --showformat '${Architecture}' "${1}" )"
-    local version="$( dpkg-deb --show --showformat '${Version}' "${1}" )"
-    # Since we are parsing all the debs in DEBDIR, we can to some extend
-    # try to eliminate some debs that are not part of the current multiconfig
-    # build using the below method.
-    local output="$( grep -xhs ".* status installed ${package}:${arch} ${version}" \
-            "${IMAGE_ROOTFS}"/var/log/dpkg.log \
-            "${SCHROOT_HOST_DIR}"/var/log/dpkg.log \
-            "${SCHROOT_TARGET_DIR}"/var/log/dpkg.log \
-            "${SCHROOT_HOST_DIR}"/tmp/dpkg_common.log \
-            "${SCHROOT_TARGET_DIR}"/tmp/dpkg_common.log | head -1 )"
-
-    [ -z "${output}" ]
-}
-
 debsrc_do_mounts() {
     sudo -s <<EOSUDO
     set -e
@@ -54,16 +37,41 @@ debsrc_download() {
     ( flock 9
     set -e
     printenv | grep -q BB_VERBOSE_LOGS && set -x
-    find "${rootfs}/var/cache/apt/archives/" -maxdepth 1 -type f -iname '*\.deb' | while read package; do
-        is_not_part_of_current_build "${package}" && continue
-        local src="$( dpkg-deb --show --showformat '${source:Package}' "${package}" )"
-        local version="$( dpkg-deb --show --showformat '${source:Version}' "${package}" )"
-        local dscname="$(echo ${src}_${version} | sed -e 's/_[0-9]\+:/_/')"
-        local dscfile=$(find "${DEBSRCDIR}"/"${rootfs_distro}" -name "${dscname}.dsc")
-        [ -n "$dscfile" ] && continue
-
-        sudo -E chroot --userspec=$( id -u ):$( id -g ) ${rootfs} \
-            sh -c ' mkdir -p "/deb-src/${1}/${2}" && cd "/deb-src/${1}/${2}" && apt-get -y --download-only --only-source source "$2"="$3" ' download-src "${rootfs_distro}" "${src}" "${version}"
+
+    # Use apt-ftparchive to scan all .deb files found in the download directory
+    # and produce an index that we can "parse" with awk. This is much faster
+    # than parsing each .deb file individually using dpkg-deb. Lines from the
+    # index we need are:
+    #
+    #    Package: <binary-name>
+    #    Version: <binary-version>
+    #    Source: <source-name> (<source-version>)
+    #
+    # If Source is omitted, then <source-name>=<binary-name> and
+    # if <source-version> is not specified then it is <binary-version>.
+    # The awk script handles these optional fields. It looks for Size: as a
+    # trigger to print the source,version tupple
+
+    apt-ftparchive --md5=no --sha1=no --sha256=no --sha512=no \
+                   -a "${DISTRO_ARCH}" packages \
+                   "${rootfs}/var/cache/apt/archives" \
+    | awk '/^Package:/ { s=$2; }
+           /^Version:/ { v=$2; next }
+           /^Source:/ { s=$2; if ($3 ~ /^\(/) v=substr($3, 2, length($3)-2) }
+           /^Size:/ { print s, v}' \
+    | sort -u \
+    | while read src version; do
+        # Name of the .dsc file does not include Epoch, remove it before checking
+        # if sources were already downloaded. Avoid using sed here to reduce the
+        # number of processes being spawned by this function: we assume that the
+        # version is correctly formatted and simply strip everything up to the
+        # first colon
+        dscname="${src}_${version#*:}.dsc"
+        [ -f "${DEBSRCDIR}"/"${rootfs_distro}"/"${src}"/"${dscname}" ] || {
+            # use apt-get source to download sources in DEBSRCDIR
+            sudo -E chroot --userspec=$( id -u ):$( id -g ) ${rootfs} \
+                sh -c ' mkdir -p "/deb-src/${1}/${2}" && cd "/deb-src/${1}/${2}" && apt-get -y --download-only --only-source source "$2"="$3" ' download-src "${rootfs_distro}" "${src}" "${version}"
+        }
     done
     ) 9>"${DEBSRCDIR}/${rootfs_distro}.lock"
 
-- 
2.39.5

-- 
You received this message because you are subscribed to the Google Groups "isar-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to isar-users+unsubscribe@googlegroups.com.
To view this discussion visit https://groups.google.com/d/msgid/isar-users/20250305131142.2717692-1-cedric.hombourger%40siemens.com.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-03-27 10:34 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-05 13:11 [PATCH] deb-dl-dir: remove excessive calls to dpkg-deb in debsrc_download 'Cedric Hombourger' via isar-users
2025-03-05 13:57 ` 'Jan Kiszka' via isar-users
2025-03-05 15:08   ` 'cedric.hombourger@siemens.com' via isar-users
2025-03-05 17:22 ` 'Niedermayr, BENEDIKT' via isar-users
2025-03-05 17:24   ` 'Niedermayr, BENEDIKT' via isar-users
2025-03-10 11:06 ` Srinuvasan Arjunan
2025-03-22  6:15 ` [PATCH v2 0/1] " 'Cedric Hombourger' via isar-users
2025-03-22  6:15   ` [PATCH v2 1/1] " 'Cedric Hombourger' via isar-users
2025-03-27 10:34   ` [PATCH v2 0/1] " Uladzimir Bely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox