initial commit

This commit is contained in:
Jakob Lechner 2025-09-19 16:43:25 +02:00
commit e047b2d567
8 changed files with 483 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use nix

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/.env
.direnv
result

View file

@ -0,0 +1,150 @@
From b01222f1a1c6da4106f725366c54b20b8e8c6808 Mon Sep 17 00:00:00 2001
From: Ignat Korchagin <ignat@cloudflare.com>
Date: Tue, 31 Mar 2020 13:40:17 +0100
Subject: [PATCH] mnt: add support for non-rootfs initramfs
The main need for this is to support container runtimes on stateless Linux
system (pivot_root system call from initramfs).
Normally, the task of initramfs is to mount and switch to a "real" root
filesystem. However, on stateless systems (booting over the network) it is
just convenient to have your "real" filesystem as initramfs from the start.
This, however, breaks different container runtimes, because they usually
use pivot_root system call after creating their mount namespace. But
pivot_root does not work from initramfs, because initramfs runs from
rootfs, which is the root of the mount tree and can't be unmounted.
One workaround is to do:
mount --bind / /
However, that defeats one of the purposes of using pivot_root in the
cloned containers: get rid of host root filesystem, should the code somehow
escapes the chroot.
There is a way to solve this problem from userspace, but it is much more
cumbersome:
* either have to create a multilayered archive for initramfs, where the
outer layer creates a tmpfs filesystem and unpacks the inner layer,
switches root and does not forget to properly cleanup the old rootfs
* or we need to use keepinitrd kernel cmdline option, unpack initramfs
to rootfs, run a script to create our target tmpfs root, unpack the
same initramfs there, switch root to it and again properly cleanup
the old root, thus unpacking the same archive twice and also wasting
memory, because the kernel stores compressed initramfs image
indefinitely.
With this change we can ask the kernel (by specifying nonroot_initramfs
kernel cmdline option) to create a "leaf" tmpfs mount for us and switch
root to it before the initramfs handling code, so initramfs gets unpacked
directly into the "leaf" tmpfs with rootfs being empty and no need to
clean up anything.
This also bring the behaviour in line with the older style initrd, where
the initrd is located on some leaf filesystem in the mount tree and rootfs
remaining empty.
Co-developed-by: Graham Christensen <graham@determinate.systems>
Signed-off-by: Graham Christensen <graham@determinate.systems>
Tested-by: Graham Christensen <graham@determinate.systems>
Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
---
.../admin-guide/kernel-parameters.txt | 7 +++
fs/namespace.c | 48 +++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4ad60e127e04..0c76cc7a4fc5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4024,6 +4024,13 @@
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
+ nonroot_initramfs
+ [KNL] Create an additional tmpfs filesystem under rootfs
+ and unpack initramfs there instead of the rootfs itself.
+ This is useful for stateless systems, which run directly
+ from initramfs, create mount namespaces and use
+ "pivot_root" system call.
+
nopat [X86,EARLY] Disable PAT (page attribute table extension of
pagetables) support.
diff --git a/fs/namespace.c b/fs/namespace.c
index e04a9e9e3f14..2f93940910aa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -18,6 +18,7 @@
#include <linux/cred.h>
#include <linux/idr.h>
#include <linux/init.h> /* init_rootfs */
+#include <linux/init_syscalls.h> /* init_chdir, init_chroot, init_mkdir */
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/file.h>
@@ -4403,6 +4404,49 @@ static void __init init_mount_tree(void)
set_fs_root(current->fs, &root);
}
+#if IS_ENABLED(CONFIG_TMPFS)
+static int __initdata nonroot_initramfs;
+
+static int __init nonroot_initramfs_param(char *str)
+{
+ if (*str)
+ return 0;
+ nonroot_initramfs = 1;
+ return 1;
+}
+__setup("nonroot_initramfs", nonroot_initramfs_param);
+
+static void __init init_nonroot_initramfs(void)
+{
+ int err;
+
+ if (!nonroot_initramfs)
+ return;
+
+ err = init_mkdir("/root", 0700);
+ if (err < 0)
+ goto out;
+
+ err = init_mount("tmpfs", "/root", "tmpfs", 0, NULL);
+ if (err)
+ goto out;
+
+ err = init_chdir("/root");
+ if (err)
+ goto out;
+
+ err = init_mount(".", "/", NULL, MS_MOVE, NULL);
+ if (err)
+ goto out;
+
+ err = init_chroot(".");
+ if (!err)
+ return;
+out:
+ pr_warn("Failed to create a non-root filesystem for initramfs\n");
+}
+#endif /* IS_ENABLED(CONFIG_TMPFS) */
+
void __init mnt_init(void)
{
int err;
@@ -4436,6 +4480,10 @@ void __init mnt_init(void)
shmem_init();
init_rootfs();
init_mount_tree();
+
+#if IS_ENABLED(CONFIG_TMPFS)
+ init_nonroot_initramfs();
+#endif
}
void put_mnt_ns(struct mnt_namespace *ns)
--
2.43.0

27
flake.lock generated Normal file
View file

@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1759735786,
"narHash": "sha256-a0+h02lyP2KwSNrZz4wLJTu9ikujNsTWIC874Bv7IJ0=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "20c4598c84a671783f741e02bf05cbfaf4907cff",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-25.05",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

31
flake.nix Normal file
View file

@ -0,0 +1,31 @@
{
description = "Pixiecore VM boot system (flake version)";
inputs = {
nixpkgs.url = "github:nixos/nixpkgs/nixos-25.05";
};
outputs = { nixpkgs, ... }:
let
system = "x86_64-linux";
pkgs = import nixpkgs {
inherit system;
};
in
{
packages.${system}.pxe = import ./pxe.nix { inherit pkgs nixpkgs; };
/*
# Wenn du auch `nixos-rebuild` nutzen willst:
nixosConfigurations.vm = nixpkgs.lib.nixosSystem {
inherit system;
modules = [
({ pkgs, ... }: {
imports = [];
})
];
};
*/
};
}

241
pxe.nix Normal file
View file

@ -0,0 +1,241 @@
{ pkgs, nixpkgs }:
let
sys = nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
modules = [
({ pkgs, config, lib, ... }: {
# dein komplettes NixOS-Modul bleibt unverändert …
config = let
path = pkgs.lib.makeBinPath config.boot.initrd.systemd.initrdBin;
in
{
console.keyMap = "neo";
fileSystems."/" = {
fsType = "tmpfs";
options = [ "mode=0755" ];
};
systemd.repart.partitions = {
"10-esp" = {
Type = "esp";
SizeMinBytes = "1G";
SizeMaxBytes = "1G";
};
"20-btrfs" = {
Type = "linux-generic";
};
};
boot.loader.grub.enable = false;
boot.initrd = {
availableKernelModules = [
"ahci"
"ata_piix"
"nvme"
"pata_marvell"
"sata_nv"
"sata_sis"
"sata_uli"
"sata_via"
"scsi_mod"
"sd_mod"
"sg"
"virtio_blk"
"virtio_pci"
"virtio_scsi"
"virtio_net"
"qxl"
];
kernelModules = [
"loop"
"btrfs"
"zram"
];
systemd = {
enable = true;
repart = {
enable = true;
empty = "require";
#empty = "force";
device = "/dev/vda";
discard = true;
};
root = "fstab";
initrdBin = [
pkgs.btrfs-progs
pkgs.nixStatic
pkgs.systemd
pkgs.util-linux
pkgs.git
];
storePaths = [
{
source = "${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt";
target = "/etc/ssl/certs/ca-bundle.crt";
}
{
source = "${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt";
target = "/etc/ssl/certs/ca-certificates.crt";
}
{
source = "${pkgs.ncurses}/share/terminfo";
target = "/run/current-system/sw/share/terminfo";
}
{
source = pkgs.writeText "nix.conf" ''
experimental-features = nix-command flakes
download-buffer-size = 536870912
max-jobs = 1
build-users-group =
sandbox = true
'';
target = "/etc/nix/nix.conf";
}
pkgs.git
];
network = {
enable = true;
wait-online.enable = true;
networks."10-lan" = {
enable = true;
matchConfig.Name = "enp1s0";
address = [ "192.168.122.110/24" ];
gateway = [ "192.168.122.1" ];
linkConfig.RequiredForOnline = "routable";
};
};
mounts = [
{
type = "btrfs";
what = "/dev/vda2";
where = "/sysroot/nix";
options = "nodev,noatime,compress-force=zstd:1,discard=async,subvol=@nix";
before = [ "initrd-root-fs.target" ];
}
{
type = "btrfs";
what = "/dev/vda2";
where = "/sysroot/root";
options = "nodev,noatime,compress-force=zstd:1,discard=async,subvol=@root";
before = [ "initrd-root-fs.target" ];
}
];
services = {
initrd-parse-etc = {
after = ["initrd-find-nixos-closure.service"];
};
systemd-tmpfiles-setup-sysroot = {
after = ["initrd-find-nixos-closure.service"];
unitConfig.RequiresMountsFor = lib.mkForce ["/sysroot" "/sysroot/nix" ];
};
zramswap = {
description = "Create zram swap";
before = ["initrd-find-nixos-closure.service"];
script = ''
export PATH="$PATH:${path}"
mem_total_kb=$(sed -n -r 's/^MemTotal:\s*([0-9]*) kB$/\1/p' /proc/meminfo)
zramctl /dev/zram0 --algorithm zstd --size "$((mem_total_kb / 10 * 6))KiB"
mkswap -U clear /dev/zram0
swapon --discard --priority 100 /dev/zram0
'';
};
emergency.serviceConfig.AmbientCapabilities = "~";
initrd-find-nixos-closure = {
description = lib.mkForce "Build NixOS closure";
after = [ "dbus.service" ];
bindsTo = ["initrd-root-fs.target"];
requires = [ "dbus.service" ];
unitConfig.RequiresMountsFor = lib.mkForce ["/sysroot/nix" "/sysroot/root"];
serviceConfig = {
KillMode = "process";
RemainAfterExit = true;
StandardOutput = "tty";
TTYPath = "/dev/tty1";
};
script = lib.mkForce ''
export PATH="$PATH:${path}"
set -e
export HOME=~
mount --bind /sysroot/root "$HOME"
# Figure out what closure to boot
closure=
for o in $(< /proc/cmdline); do
case $o in
init=*)
closure="$(echo "$o" | cut -d= -f 2-)"
;;
esac
done
# Sanity check
if [ -z "''${closure:-}" ]; then
echo 'No init= parameter on the kernel command line' >&2
exit 1
fi
nix build \
-o /sysroot/nixos-closure \
--store /sysroot \
--refresh \
"$closure"
umount "$HOME"
'';
};
};
};
};
};
})
];
};
run-pixiecore = let
inherit (sys.config.system) build;
inherit (sys) pkgs;
inherit (pkgs) lib;
kernel = pkgs.linux.overrideAttrs (old: {
patches = old.patches ++ [./0001-mnt-add-support-for-non-rootfs-initramfs.patch];
});
kernelImage = "${kernel}/bzImage";
initrd = "${build.initialRamdisk}/initrd";
cmdLine = lib.strings.concatStringsSep " " [
"loglevel=4"
"systemd.setenv=SYSTEMD_SULOGIN_FORCE=1"
"rd.systemd.debug_shell"
"nonroot_initramfs"
"init=git+https://git.jalr.de/jalr/nixos-configuration.git?ref=vm#nixosConfigurations.vm.config.system.build.toplevel"
];
in (
pkgs.writeShellApplication {
name = "pixiecore";
runtimeInputs = [ pkgs.pixiecore ];
text = lib.strings.concatStringsSep " " [
"exec ${pkgs.pixiecore}/bin/pixiecore"
"boot ${kernelImage} ${initrd}"
"--cmdline '${cmdLine}'"
"--debug"
"--dhcp-no-bind"
"--port 64172"
"--status-port 64172"
''"$@"''
];
}
).overrideAttrs (old: {
meta.mainProgram = "pixiecore";
});
in
run-pixiecore

24
run.sh Executable file
View file

@ -0,0 +1,24 @@
#!/usr/bin/env bash
source .env
script="$(mktemp)"
files="$(mktemp)"
result="$(mktemp -d)"
git ls-files > "$files"
trap 'rm -f "$script" "$files"; rm -rf "$result"' EXIT
cat > "$script" << EOF
if nix build .#pxe -o "$result/out"; then
[ "$LIBVIRT_DOMAIN" ] && virsh reset $LIBVIRT_DOMAIN
"$result/out/bin/pixiecore"
else
echo "Build failed" >&2
exit 1
fi
EOF
chmod +x "$script"
watchexec -w . -r --filter-file "$files" -- "$script"

6
shell.nix Normal file
View file

@ -0,0 +1,6 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
nativeBuildInputs = with pkgs; [
watchexec
];
}