From e047b2d5676f1ce0864c794333094ffe50e1ac94 Mon Sep 17 00:00:00 2001 From: Jakob Lechner Date: Fri, 19 Sep 2025 16:43:25 +0200 Subject: [PATCH] initial commit --- .envrc | 1 + .gitignore | 3 + ...add-support-for-non-rootfs-initramfs.patch | 150 +++++++++++ flake.lock | 27 ++ flake.nix | 31 +++ pxe.nix | 241 ++++++++++++++++++ run.sh | 24 ++ shell.nix | 6 + 8 files changed, 483 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 0001-mnt-add-support-for-non-rootfs-initramfs.patch create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 pxe.nix create mode 100755 run.sh create mode 100644 shell.nix diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..1d953f4 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..327f8c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.env +.direnv +result diff --git a/0001-mnt-add-support-for-non-rootfs-initramfs.patch b/0001-mnt-add-support-for-non-rootfs-initramfs.patch new file mode 100644 index 0000000..650d586 --- /dev/null +++ b/0001-mnt-add-support-for-non-rootfs-initramfs.patch @@ -0,0 +1,150 @@ +From b01222f1a1c6da4106f725366c54b20b8e8c6808 Mon Sep 17 00:00:00 2001 +From: Ignat Korchagin +Date: Tue, 31 Mar 2020 13:40:17 +0100 +Subject: [PATCH] mnt: add support for non-rootfs initramfs + +The main need for this is to support container runtimes on stateless Linux +system (pivot_root system call from initramfs). + +Normally, the task of initramfs is to mount and switch to a "real" root +filesystem. However, on stateless systems (booting over the network) it is +just convenient to have your "real" filesystem as initramfs from the start. + +This, however, breaks different container runtimes, because they usually +use pivot_root system call after creating their mount namespace. But +pivot_root does not work from initramfs, because initramfs runs from +rootfs, which is the root of the mount tree and can't be unmounted. + +One workaround is to do: + + mount --bind / / + +However, that defeats one of the purposes of using pivot_root in the +cloned containers: get rid of host root filesystem, should the code somehow +escapes the chroot. + +There is a way to solve this problem from userspace, but it is much more +cumbersome: + * either have to create a multilayered archive for initramfs, where the + outer layer creates a tmpfs filesystem and unpacks the inner layer, + switches root and does not forget to properly cleanup the old rootfs + * or we need to use keepinitrd kernel cmdline option, unpack initramfs + to rootfs, run a script to create our target tmpfs root, unpack the + same initramfs there, switch root to it and again properly cleanup + the old root, thus unpacking the same archive twice and also wasting + memory, because the kernel stores compressed initramfs image + indefinitely. + +With this change we can ask the kernel (by specifying nonroot_initramfs +kernel cmdline option) to create a "leaf" tmpfs mount for us and switch +root to it before the initramfs handling code, so initramfs gets unpacked +directly into the "leaf" tmpfs with rootfs being empty and no need to +clean up anything. + +This also bring the behaviour in line with the older style initrd, where +the initrd is located on some leaf filesystem in the mount tree and rootfs +remaining empty. + +Co-developed-by: Graham Christensen +Signed-off-by: Graham Christensen +Tested-by: Graham Christensen +Signed-off-by: Ignat Korchagin +--- + .../admin-guide/kernel-parameters.txt | 7 +++ + fs/namespace.c | 48 +++++++++++++++++++ + 2 files changed, 55 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 4ad60e127e04..0c76cc7a4fc5 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4024,6 +4024,13 @@ + shutdown the other cpus. Instead use the REBOOT_VECTOR + irq. + ++ nonroot_initramfs ++ [KNL] Create an additional tmpfs filesystem under rootfs ++ and unpack initramfs there instead of the rootfs itself. ++ This is useful for stateless systems, which run directly ++ from initramfs, create mount namespaces and use ++ "pivot_root" system call. ++ + nopat [X86,EARLY] Disable PAT (page attribute table extension of + pagetables) support. + +diff --git a/fs/namespace.c b/fs/namespace.c +index e04a9e9e3f14..2f93940910aa 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -18,6 +18,7 @@ + #include + #include + #include /* init_rootfs */ ++#include /* init_chdir, init_chroot, init_mkdir */ + #include /* get_fs_root et.al. */ + #include /* fsnotify_vfsmount_delete */ + #include +@@ -4403,6 +4404,49 @@ static void __init init_mount_tree(void) + set_fs_root(current->fs, &root); + } + ++#if IS_ENABLED(CONFIG_TMPFS) ++static int __initdata nonroot_initramfs; ++ ++static int __init nonroot_initramfs_param(char *str) ++{ ++ if (*str) ++ return 0; ++ nonroot_initramfs = 1; ++ return 1; ++} ++__setup("nonroot_initramfs", nonroot_initramfs_param); ++ ++static void __init init_nonroot_initramfs(void) ++{ ++ int err; ++ ++ if (!nonroot_initramfs) ++ return; ++ ++ err = init_mkdir("/root", 0700); ++ if (err < 0) ++ goto out; ++ ++ err = init_mount("tmpfs", "/root", "tmpfs", 0, NULL); ++ if (err) ++ goto out; ++ ++ err = init_chdir("/root"); ++ if (err) ++ goto out; ++ ++ err = init_mount(".", "/", NULL, MS_MOVE, NULL); ++ if (err) ++ goto out; ++ ++ err = init_chroot("."); ++ if (!err) ++ return; ++out: ++ pr_warn("Failed to create a non-root filesystem for initramfs\n"); ++} ++#endif /* IS_ENABLED(CONFIG_TMPFS) */ ++ + void __init mnt_init(void) + { + int err; +@@ -4436,6 +4480,10 @@ void __init mnt_init(void) + shmem_init(); + init_rootfs(); + init_mount_tree(); ++ ++#if IS_ENABLED(CONFIG_TMPFS) ++ init_nonroot_initramfs(); ++#endif + } + + void put_mnt_ns(struct mnt_namespace *ns) +-- +2.43.0 + diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..deff97f --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1759735786, + "narHash": "sha256-a0+h02lyP2KwSNrZz4wLJTu9ikujNsTWIC874Bv7IJ0=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "20c4598c84a671783f741e02bf05cbfaf4907cff", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..cb22cc5 --- /dev/null +++ b/flake.nix @@ -0,0 +1,31 @@ +{ + description = "Pixiecore VM boot system (flake version)"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixos-25.05"; + }; + + outputs = { nixpkgs, ... }: + let + system = "x86_64-linux"; + + pkgs = import nixpkgs { + inherit system; + }; + in + { + packages.${system}.pxe = import ./pxe.nix { inherit pkgs nixpkgs; }; + + /* + # Wenn du auch `nixos-rebuild` nutzen willst: + nixosConfigurations.vm = nixpkgs.lib.nixosSystem { + inherit system; + modules = [ + ({ pkgs, ... }: { + imports = []; + }) + ]; + }; + */ + }; +} diff --git a/pxe.nix b/pxe.nix new file mode 100644 index 0000000..4485eb3 --- /dev/null +++ b/pxe.nix @@ -0,0 +1,241 @@ +{ pkgs, nixpkgs }: + +let + sys = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + ({ pkgs, config, lib, ... }: { + # dein komplettes NixOS-Modul bleibt unverändert … + config = let + path = pkgs.lib.makeBinPath config.boot.initrd.systemd.initrdBin; + in + { + console.keyMap = "neo"; + + fileSystems."/" = { + fsType = "tmpfs"; + options = [ "mode=0755" ]; + }; + + systemd.repart.partitions = { + "10-esp" = { + Type = "esp"; + SizeMinBytes = "1G"; + SizeMaxBytes = "1G"; + }; + "20-btrfs" = { + Type = "linux-generic"; + }; + }; + + boot.loader.grub.enable = false; + boot.initrd = { + availableKernelModules = [ + "ahci" + "ata_piix" + "nvme" + "pata_marvell" + "sata_nv" + "sata_sis" + "sata_uli" + "sata_via" + "scsi_mod" + "sd_mod" + "sg" + "virtio_blk" + "virtio_pci" + "virtio_scsi" + "virtio_net" + "qxl" + ]; + kernelModules = [ + "loop" + "btrfs" + "zram" + ]; + systemd = { + enable = true; + repart = { + enable = true; + empty = "require"; + #empty = "force"; + device = "/dev/vda"; + discard = true; + }; + root = "fstab"; + + initrdBin = [ + pkgs.btrfs-progs + pkgs.nixStatic + pkgs.systemd + pkgs.util-linux + pkgs.git + ]; + + storePaths = [ + { + source = "${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt"; + target = "/etc/ssl/certs/ca-bundle.crt"; + } + { + source = "${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt"; + target = "/etc/ssl/certs/ca-certificates.crt"; + } + { + source = "${pkgs.ncurses}/share/terminfo"; + target = "/run/current-system/sw/share/terminfo"; + } + { + source = pkgs.writeText "nix.conf" '' + experimental-features = nix-command flakes + download-buffer-size = 536870912 + max-jobs = 1 + build-users-group = + sandbox = true + ''; + target = "/etc/nix/nix.conf"; + } + pkgs.git + ]; + + network = { + enable = true; + wait-online.enable = true; + networks."10-lan" = { + enable = true; + matchConfig.Name = "enp1s0"; + address = [ "192.168.122.110/24" ]; + gateway = [ "192.168.122.1" ]; + linkConfig.RequiredForOnline = "routable"; + }; + }; + + mounts = [ + { + type = "btrfs"; + what = "/dev/vda2"; + where = "/sysroot/nix"; + options = "nodev,noatime,compress-force=zstd:1,discard=async,subvol=@nix"; + before = [ "initrd-root-fs.target" ]; + } + { + type = "btrfs"; + what = "/dev/vda2"; + where = "/sysroot/root"; + options = "nodev,noatime,compress-force=zstd:1,discard=async,subvol=@root"; + before = [ "initrd-root-fs.target" ]; + } + ]; + + services = { + initrd-parse-etc = { + after = ["initrd-find-nixos-closure.service"]; + }; + + systemd-tmpfiles-setup-sysroot = { + after = ["initrd-find-nixos-closure.service"]; + unitConfig.RequiresMountsFor = lib.mkForce ["/sysroot" "/sysroot/nix" ]; + }; + + zramswap = { + description = "Create zram swap"; + before = ["initrd-find-nixos-closure.service"]; + script = '' + export PATH="$PATH:${path}" + mem_total_kb=$(sed -n -r 's/^MemTotal:\s*([0-9]*) kB$/\1/p' /proc/meminfo) + zramctl /dev/zram0 --algorithm zstd --size "$((mem_total_kb / 10 * 6))KiB" + mkswap -U clear /dev/zram0 + swapon --discard --priority 100 /dev/zram0 + ''; + }; + + emergency.serviceConfig.AmbientCapabilities = "~"; + + initrd-find-nixos-closure = { + description = lib.mkForce "Build NixOS closure"; + after = [ "dbus.service" ]; + bindsTo = ["initrd-root-fs.target"]; + requires = [ "dbus.service" ]; + unitConfig.RequiresMountsFor = lib.mkForce ["/sysroot/nix" "/sysroot/root"]; + serviceConfig = { + KillMode = "process"; + RemainAfterExit = true; + StandardOutput = "tty"; + TTYPath = "/dev/tty1"; + }; + script = lib.mkForce '' + export PATH="$PATH:${path}" + set -e + + export HOME=~ + mount --bind /sysroot/root "$HOME" + + # Figure out what closure to boot + closure= + for o in $(< /proc/cmdline); do + case $o in + init=*) + closure="$(echo "$o" | cut -d= -f 2-)" + ;; + esac + done + + # Sanity check + if [ -z "''${closure:-}" ]; then + echo 'No init= parameter on the kernel command line' >&2 + exit 1 + fi + + nix build \ + -o /sysroot/nixos-closure \ + --store /sysroot \ + --refresh \ + "$closure" + + umount "$HOME" + ''; + }; + }; + }; + }; + }; + }) + ]; + }; + + run-pixiecore = let + inherit (sys.config.system) build; + inherit (sys) pkgs; + inherit (pkgs) lib; + kernel = pkgs.linux.overrideAttrs (old: { + patches = old.patches ++ [./0001-mnt-add-support-for-non-rootfs-initramfs.patch]; + }); + kernelImage = "${kernel}/bzImage"; + initrd = "${build.initialRamdisk}/initrd"; + cmdLine = lib.strings.concatStringsSep " " [ + "loglevel=4" + "systemd.setenv=SYSTEMD_SULOGIN_FORCE=1" + "rd.systemd.debug_shell" + "nonroot_initramfs" + "init=git+https://git.jalr.de/jalr/nixos-configuration.git?ref=vm#nixosConfigurations.vm.config.system.build.toplevel" + ]; + in ( + pkgs.writeShellApplication { + name = "pixiecore"; + runtimeInputs = [ pkgs.pixiecore ]; + text = lib.strings.concatStringsSep " " [ + "exec ${pkgs.pixiecore}/bin/pixiecore" + "boot ${kernelImage} ${initrd}" + "--cmdline '${cmdLine}'" + "--debug" + "--dhcp-no-bind" + "--port 64172" + "--status-port 64172" + ''"$@"'' + ]; + } + ).overrideAttrs (old: { + meta.mainProgram = "pixiecore"; + }); +in + run-pixiecore diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..b3d4ae6 --- /dev/null +++ b/run.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +source .env + +script="$(mktemp)" +files="$(mktemp)" +result="$(mktemp -d)" + +git ls-files > "$files" + +trap 'rm -f "$script" "$files"; rm -rf "$result"' EXIT + +cat > "$script" << EOF + if nix build .#pxe -o "$result/out"; then + [ "$LIBVIRT_DOMAIN" ] && virsh reset $LIBVIRT_DOMAIN + "$result/out/bin/pixiecore" + else + echo "Build failed" >&2 + exit 1 + fi +EOF +chmod +x "$script" + +watchexec -w . -r --filter-file "$files" -- "$script" diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..39110b0 --- /dev/null +++ b/shell.nix @@ -0,0 +1,6 @@ +{ pkgs ? import {} }: + pkgs.mkShell { + nativeBuildInputs = with pkgs; [ + watchexec + ]; +}