stateful image issue with nvme during parted

325 views
Skip to first unread message

Matt

unread,
Mar 2, 2021, 5:13:20 PM3/2/21
to Warewulf
Hello,

We are trying to deploy an aarch64 cluster with warewulf using stateful images.  We have been working through the  cross-sms on github and have an image built.  We have it to a point  where we can boot a node stateless at least but now are running into a problem when we try stateful.  We have an nvme disk in each node but having a problem during the partitioning and format of the drive.

Is there a way to turn on some verbose logging as the drive is being partitioned?

Here is the test node with the print all showing the  partition info

#### node cedar-cn03.cluster ##################################################
      35: NAME       = cedar-cn03,cedar-cn03.cluster
      35: ARCH       = aarch64
      35: BOOTLOADER = nvme0n1
      35: BOOTSTRAPID = 22
      35: DOMAIN     = cluster
      35: FILEIDS    = 14,15
      35: FS         = fstab p1 /boot/efi vfat defaults 0 0,fstab p2 /boot ext4 defaults 0 0,fstab p3 swap swap defaults 0 0,fstab p4 / ext4 defaults 0 0,fstab p5 /data ext4 defaults 0 0,mkfs p1 vfat -n ESP,mkfs p2 ext4 -L boot,mkfs p3 swap,mkfs p4 ext4 -L root,mkfs p5 ext4 -L data,mklabel gpt,mkpart ESP fat32 1MiB 513MiB,mkpart boot ext4 513MiB 1557MiB,mkpart data ext4 69921MiB 100%,mkpart linux-swap 1557MiB 17921MiB,mkpart root ext4 17921MiB 69921MiB,name 1 ESP,name 2 boot,name 3 swap,name 4 root,name 5 data,select /dev/nvme0n1,set 1 boot on


When the node boots it looks like it makes the vfat partition but then fails on the ext4's.
I had to tuck a modprobe in so the nvme drivers get loaded so at least they show up now but now having partition problems.

Hope someone can offer some insight or at least what to modify to see what is actually going on.   Thank you.

Starting the provision handler:

* adhoc-pre                                                                 OK

* filesystems                                                          RUNNING

   * running parted on /dev/nvme0n1                                          OK

   * formatting /dev/nvme0n1p1                                               OK

   * formatting /dev/nvme0n1p2                                            ERROR

                                                                          ERROR

* getvnfs                                                              RUNNING

   * fetching centos8.2 (ID:29)                                              OK

   * extracting                                                           ERROR

   * checksum 81c359a73839814fa2b392c14847763c                          SKIPPED

      * /newroot/sbin/init could not be verified. Retrying.   * fetching centOK8.2 (ID:29)

   * extracting                                                           ERROR

   * checksum 81c359a73839814fa2b392c14847763c                          SKIPPED

      * /newroot/sbin/init could not be verified. Retrying.   * fetching centos8.2 (ID:29)




Matt

unread,
Mar 2, 2021, 6:32:15 PM3/2/21
to Warewulf, Matt
Here is the  partition commands files

[root@cedar-deploy filesystem]# cat fx700disk.cmds
# EFI / GPT Example for NVME device
# Note that partitions on NVME drives are named by appending "pN" to
# the device name, where N is the partition index, and not just the
# partition index.
# Parted specific commands
select /dev/nvme0n1
mklabel gpt
mkpart ESP fat32 1MiB 513MiB
mkpart boot ext4 513MiB 1557MiB
mkpart linux-swap 1557MiB 17921MiB
mkpart root ext4 17921MiB 69921MiB
mkpart data ext4 69921MiB 100%
name 1 ESP
name 2 boot
name 3 swap
name 4 root
name 5 data
set 1 boot on
# mkfs NUMBER FS-TYPE [ARGS...]
mkfs p1 vfat -n ESP
mkfs p2 ext4 -L boot
mkfs p3 swap
mkfs p4 ext4 -L root
mkfs p5 ext4 -L data
# fstab NUMBER fs_file fs_vfstype fs_mntops fs_freq fs_passno
fstab p1 /boot/efi vfat defaults 0 0
fstab p2 /boot ext4 defaults 0 0
fstab p3 swap swap defaults 0 0
fstab p4 / ext4 defaults 0 0
fstab p5 /data ext4 defaults 0 0

I got into recovery console and see the errors 

+ echo select /dev/nvme0n1

+ cut -f 2 -d 

+ SELECT_DISK=/dev/nvme0n1

+ SELECT_RETRY=0

+ [ -z  ]

+ [ ! -b /dev/nvme0n1 ]

+ [ 0 -gt 0 ]

+ [ -z /dev/nvme0n1 ]

+ [ -z ]

+ PARTED_CMDS=mklabel gpt

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100%

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data

+ [ -z /dev/nvme0n1 ]

+ [ -z mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data ]

+ PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data set 1 boot on

+ [ -z /dev/nvme0n1 ]

+ run_parted /dev/nvme0n1 mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data set 1 boot on

+ local SELECT_DISK=/dev/nvme0n1

+ local PARTED_CMDS=mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data set 1 boot on

+ [ -n mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data set 1 boot on ]

+ msg_gray    * running parted on /dev/nvme0n1

+ echo -ne \033[1;30m

+ echo -ne    * running parted on /dev/nvme0n1

+ echo -ne \033[0;39m

+ /usr/sbin/parted --align=optimal --script /dev/nvme0n1 mklabel gpt mkpart ESP fat32 1MiB 513MiB mkpart boot ext4 513MiB 1557MiB mkpart linux-swap 1557MiB 17921MiB mkpart root ext4 17921MiB 69921MiB mkpart data ext4 69921MiB 100% name 1 ESP name 2 boot name 3 swap name 4 root name 5 data set 1 boot on

+ wwsuccess

+ echo -en \033[78G

+ msg_green OK\n

+ echo -ne \033[1;32m

+ echo -ne OK\n

+ echo -ne \033[0;39m

+ PARTED_CMDS=

+ unset IFS

+ run_mkfs /dev/nvme0n1 p1 vfat -n ESP

+ local SELECT_DISK=/dev/nvme0n1

+ local PARTITION=p1

+ local TYPE=vfat

+ shift 3

+ local OPTS=-n ESP

+ local DEV=/dev/nvme0n1p1

+ [ -x /usr/bin/udevadm ]

+ msg_gray    * formatting /dev/nvme0n1p1

+ echo -ne \033[1;30m

+ echo -ne    * formatting /dev/nvme0n1p1

+ echo -ne \033[0;39m

+ /sbin/mkfs.vfat -n ESP /dev/nvme0n1p1

+ wwsuccess

+ echo -en \033[78G

+ msg_green OK\n

+ echo -ne \033[1;32m

+ echo -ne OK\n

+ echo -ne \033[0;39m

+ IFS=,

+ [ -z /dev/nvme0n1 ]

+ run_parted /dev/nvme0n1 

+ local SELECT_DISK=/dev/nvme0n1

+ local PARTED_CMDS=

+ [ -n  ]

+ PARTED_CMDS=

+ unset IFS

+ run_mkfs /dev/nvme0n1 p2 ext4 -L boot

+ local SELECT_DISK=/dev/nvme0n1

+ local PARTITION=p2

+ local TYPE=ext4

+ shift 3

+ local OPTS=-L boot

+ local DEV=/dev/nvme0n1p2

+ [ -x /usr/bin/udevadm ]

+ msg_gray    * formatting /dev/nvme0n1p2

+ echo -ne \033[1;30m

+ echo -ne    * formatting /dev/nvme0n1p2

+ echo -ne \033[0;39m

+ /sbin/mkfs.ext4 -q -F -L boot /dev/nvme0n1p2

+ wwfailure

+ echo -en \033[75G

+ msg_red ERROR\n

+ echo -ne \033[1;31m

+ echo -ne ERROR\n

+ echo -ne \033[0;39m

+ exit 2


I don't know why  

+ /sbin/mkfs.ext4 -q -F -L boot /dev/nvme0n1p2


I see the attempt to put EXT4 on  p2  but that fails.  If I manually do it in recovery console I get 

/var/log/warewulf/provision # /sbin/mkfs.ext4 -q -F -L boot /dev/nvme0n1p2

/dev/nvme0n1p2 contains a ext4 file system labelled 'boot'

created on Tue Mar  2 23:08:09 2021

/var/log/warewulf/provision # 



/var/log/warewulf/provision # fdisk -l 

Found valid GPT with protective MBR; using GPT


Disk /dev/nvme0n1: 1000215216 sectors,  962M

Logical sector size: 512

Disk identifier (GUID): a53d5cf1-8015-445f-bef0-0e83fc27310d

Partition table holds up to 128 entries

First usable sector is 34, last usable sector is 1000215182


Number  Start (sector)    End (sector)  Size Name

     1            2048         1050623  512M ESP

     2         1050624         3188735 1044M boot

     3         3188736        36702207 15.9G swap

     4        36702208       143198207 50.7G root

     5       143198208      1000214527  408G data

/var/log/warewulf/provision #


Jason Stover

unread,
Mar 3, 2021, 1:11:54 PM3/3/21
to ware...@lbl.gov, Matt
> var/log/warewulf/provision # /sbin/mkfs.ext4 -q -F -L boot /dev/nvme0n1p2
> /dev/nvme0n1p2 contains a ext4 file system labelled 'boot'

What is the return code of that mkfs.ext4 command there? [. echo $? ]

I'm guessing that we're looking for a 0 return code, but because it
already contains a filesystem, it may not be a 0 return code. I don't
recall if parted will automatically format the ext4 filesystem in the
mkpart command, but you _could_ try commenting out the `mkfs` calls
and see if that changes anything.

Just re-set the file if you modify it.

-J
> --
> You received this message because you are subscribed to the Google Groups "Warewulf" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to warewulf+u...@lbl.gov.
> To view this discussion on the web visit https://groups.google.com/a/lbl.gov/d/msgid/warewulf/73acead5-9808-49b0-a558-ffe3e7a987fbn%40lbl.gov.

Matt

unread,
Mar 3, 2021, 2:48:49 PM3/3/21
to Warewulf, jason....@gmail.com, Matt
Hello,

Comment out the mkfs parts and receive this.   
Wondering too shouldn't the command for making swap be mkswap not mkfs.  Here is our commands file 

select /dev/nvme0n1
mklabel gpt
mkpart ESP fat32 1MiB 513MiB
mkpart primary ext4 513MiB 1557MiB
mkpart primary linux-swap 1557MiB 17921MiB
mkpart primary ext4 17921MiB 69921MiB
mkpart primary ext4 69921MiB 100%
name 1 ESP
name 2 boot
name 3 swap
name 4 root
name 5 data
set 1 boot on
# mkfs NUMBER FS-TYPE [ARGS...]
#mkfs  p1 vfat -n ESP
#mkfs  p2 ext4 -L boot
#mkswap  p3 swap
#mkfs  p4 ext4 -L root
#mkfs  p5 ext4 -L data
# fstab NUMBER fs_file fs_vfstype fs_mntops fs_freq fs_passno
fstab p1 /boot/efi vfat defaults 0 0
fstab p2 /boot ext4 defaults 0 0
fstab p3 swap swap defaults 0
fstab p4 / ext4 defaults 0 0
fstab p5 /data ext4 defaults 0 0





Now Booting Warewulf...

Setting the hostname (cedar-cn02.cedar.cluster):                             OK
Loading drivers: uhci-hcd ohci-hcd ehci-hcd whci-hcd isp116x-hcd isp1362-hcd xhci-hcd sl811-hcd sd_mod virtio virtio_ring virtio_blk virtio_net virtio_pci nvme nvme_core                                                                    OK
Detecting hardware: igb mlx5_core                                            OK
Bringing up local loopback network:                                          OK
Checking for network device: eth0 (eth0)                                     OK
Configuring eth0 (eth0) statically: (172.21.3.102/255.255.255.0)             OK
Creating network initialization files: (eth0)                                OK
Trying to reach the master node at 172.21.3.251 .                            OK
Probing for HW Address: (2c:d4:44:ce:90:cc)                                  OK
Starting syslogd:                                                            OK
Getting base node configuration:                                             OK
Starting the provision handler:
BusyBox v1.26.2 (2020-07-29 01:43:57 UTC[   79.418336] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000100
[   79.418336]
)[ m ul ti -c7al9l .bi4n30912] CPU: 5 PID: 1 Comm: switch_root Not tainted 4.18.0-193.28.1.el8_2.aarch64 #1
ary.

Usage: switch_root [-c /dev/console] NEW_ROOT NEW_INIT [ARGS]

Free initramfs and switch to another root fs:
chroot to NEW_ROOT, delete all in /, move NEW_ROOT to /,
execute NEW_INIT. PID must be 1. NEW_ROOT must be a mountpoint.

-c DEV Reopen stdio to DEV after switch
[   79.467473] Hardware name: FUJITSU FX700/CMUA        , BIOS 1.4.0 Jun 17 2020
[   79.475158] Call trace:
[   79.477807]  dump_backtrace+0x0/0x188
[   79.481664]  show_stack+0x24/0x30
[   79.485174]  dump_stack+0x90/0xb4
[   79.488875]  panic+0x12c/0x2f4
[   79.492119]  do_exit+0xaf4/0xaf8
[   79.495543]  do_group_exit+0x3c/0xd0
[   79.499308]  __arm64_sys_exit_group+0x24/0x28
[   79.504052]  el0_svc_handler+0xb4/0x188
[   79.508077]  el0_svc+0x8/0xc
[   79.511205] SMP: stopping secondary CPUs
[   79.515313] Kernel Offset: disabled
[   79.519171] CPU features: 0x0002,2ae08a38
[   79.523358] Memory Limit: none
[   79.526612] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000100
[   79.526612]  ]---

Jason Stover

unread,
Mar 3, 2021, 3:38:59 PM3/3/21
to Matt, Warewulf
> Wondering too shouldn't the command for making swap be mkswap not mkfs.

Maybe? I'm not great with the parted syntax...

From what it shows, the Filesystem portion completed... whether
correct or not is a different matter. Can you add in a POSTSHELL ?
That will drop the bootstrap into a shell before it tries running the
switch_root command. If you look at the mounts and partitions does it
all look correct?

-J

Matt

unread,
Mar 3, 2021, 3:55:04 PM3/3/21
to Warewulf, jason....@gmail.com, Warewulf, Matt
I'll have to dig in to see how to add post shell.  We did get a node to boot all of the way by deleting all partitions when we dropped into recovery shell and manually rm the partitions in parted.  We uncommented the mkfs commands from the filesystem file too and got something that looks better now.

Is there a way to wipe the disk  with parted command or wipefs before the filesystem is layed out again?

Thanks.

Jason Stover

unread,
Mar 3, 2021, 4:59:46 PM3/3/21
to Matt, Warewulf
> Is there a way to wipe the disk with parted command or wipefs before the filesystem is layed out again?

The filesystem script *used* to run something like:

dd if=/dev/zero of=/dev/sda bs=512 count=10

I honestly don't know the internals of how the current system runs
well enough to say off the top of my head whether you can add in a
`rm` command to the filesystems file or not.

> I'll have to dig in to see how to add post shell.

IIRC, the command is: wwsh object modify -s POSTSHELL=1

I can't remember if we ever added in a `--postshell` or `--preshell`
option to the provision or node commands.

-Jason

Ben Allen

unread,
Mar 3, 2021, 6:29:36 PM3/3/21
to ware...@lbl.gov
Looking back at your original dump of the node config, this is messed up:

35: FS = fstab p1 /boot/efi vfat defaults 0 0,fstab p2 /boot ext4 defaults 0 0,fstab p3 swap swap defaults 0 0,fstab p4 / ext4 defaults 0 0,fstab p5 /data ext4 defaults 0 0,mkfs p1 vfat -n ESP,mkfs p2 ext4 -L boot,mkfs p3 swap,mkfs p4 ext4 -L root,mkfs p5 ext4 -L data,mklabel gpt,mkpart ESP fat32 1MiB 513MiB,mkpart boot ext4 513MiB 1557MiB,mkpart data ext4 69921MiB 100%,mkpart linux-swap 1557MiB 17921MiB,mkpart root ext4 17921MiB 69921MiB,name 1 ESP,name 2 boot,name 3 swap,name 4 root,name 5 data,select /dev/nvme0n1,set 1 boot on

There's no select, mklabel after mkfs cmds, etc. Your later emails show debug output of something that looks correct however.

Regarding swap, no it should be "swap" not mkswap. Some of these commands are literal parted commands, and some are interpreted where parted lacks the needed functionality. mkfs is one of those. Basically its running mkfs.<fs> except for swap, see https://github.com/warewulf/warewulf3/blob/development/provision/initramfs/capabilities/setup-filesystems/20-filesystems.

I'd suggest running butting back into the bootstrap postshell and trying:

/sbin/mkfs.ext4 -F -L boot /dev/nvme0n1p2

Reporting the output and then if it still doesn't want to overwrite what appears to be an existing filesystem do:

/sbin/mkfs.ext4 -F -F -L boot /dev/nvme0n1p2

Otherwise, I'd suggest booting the node into a diskless mode and running wipefs on this disk. We don't have wipefs in the bootstrap.

Depending on where mkfs.ext4 is being pulled from, I suppose it is possible there might some new options to force overwriting an existing filesystem. If this is indeed whats going on.

Ben

Matt

unread,
Mar 9, 2021, 1:45:49 PM3/9/21
to Warewulf, bsa...@alcf.anl.gov
We worked around the issue by booting the nodes to stateless image
Ran this modified 20-filesystems script on all of the nodes (sored on the nfs pub mount from the warewulf master node)  which layed out the partitions and filesystems.  

THEN changed the nodes to stateful image , booted and continued working.  Maybe John from Fujitsu will chime in with more details.  I think there is a bug with the way the  FS parameters are read , something to do with the " ' "'s.

#!/bin/sh
#
# Copyright (c) 2001-2003 Gregory M. Kurtzer
#
# Copyright (c) 2003-2017, The Regents of the University of California,
# through Lawrence Berkeley National Laboratory (subject to receipt of any
# required approvals from the U.S. Dept. of Energy).  All rights reserved.
#
# Copyright (c) 2017 Benjamin S. Allen

WWFS="select /dev/nvme0n1,mklabel gpt,mkpart primary fat32 1MiB 513MiB,mkpart primary ext4 513MiB 1557MiB,mkpart primary linux-swap 1557MiB 17921MiB,mkpart primary ext4 17921MiB 120325MiB,mkpart primary ext4 120325MiB 100%,name 1 ESP,name 2 boot,name 3 swap,name 4 root,name 5 data,set 1 boot on,mkfs p1 vfat -n ESP,mkfs p2 ext4 -L boot,mkfs p3 swap -L swap,mkfs p4 ext4 -L root,mkfs p5 ext4 -L data"

## Example input:
# WWFS="select /dev/sda,mklabel gpt,mkpart primary ext4 0% 20%,mkpart primary linux-swap 20% 50%,mkpart primary ext4 50% 100%,name 1 boot,name 2 swap,name 3 root,mkfs 1 ext4 '-L boot',mkfs 2 '-L swap' swap,mkfs 3 ext4 '-L root',fstab 3 / ext4 defaults 0 0,fstab 1 /boot ext4 defaults 0 0,fstab 2 swap swap defaults 0 0"
# Exit on any non-zero exit code
set -o errexit

# Pipeline's return status is the value of the last (rightmost) command
# to exit with a non-zero status, or zero if all commands exit successfully.
set -o pipefail
wwfailure() {
echo FAILED
}
wwsuccess() {
echo SUCCESS
}
[ -f /etc/functions ] && . /etc/functions

# By default mount a tmpfs volumne for root
WWFS=${WWFS:="select tmpfs,fstab "" / tmpfs rw:relatime:mode=555 0 0"}

run_parted() {
  local SELECT_DISK="${1}"
  local PARTED_CMDS="${2}"
  case $SELECT_DISK in
    /*)
      if [ -n "${PARTED_CMDS}" ]; then
        echo "   * running parted on ${SELECT_DISK}"
        {
# wipe whole disk -mbettinger
   /usr/sbin/wipefs -a  "${SELECT_DISK}"
          /usr/sbin/parted --align=optimal --script "${SELECT_DISK}" "${PARTED_CMDS}" && wwsuccess
        } || {
          wwfailure
          exit 2
        }
      fi
      ;;
    *)
      return
      ;;
  esac

}

run_mkfs() {
  local SELECT_DISK="${1}"
  local PARTITION="${2}"
  local TYPE="${3}"
  shift 3
  local OPTS="${*:-}"

  local DEV="${SELECT_DISK}${PARTITION}"

  if [ -x /usr/bin/udevadm ]; then
    /usr/bin/udevadm settle --timeout=15 --exit-if-exists="${DEV}"
  fi

  echo "   * formatting ${DEV}"

  case $TYPE in
    swap)
      {
        /sbin/mkswap ${OPTS} "${DEV}" && wwsuccess
      } || {
        wwfailure
        exit 2
      }
    ;;
    ext2)
      {
        /sbin/mkfs.ext2 -q -F ${OPTS} "${DEV}" >/dev/null 2>&1 && wwsuccess
      } || {
        wwfailure
        exit 2
      }
    ;;
    ext3)
      {
        /sbin/mkfs.ext3 -q -F ${OPTS} "${DEV}" >/dev/null 2>&1 && wwsuccess
      } || {
        wwfailure
        exit 2
      }
    ;;
    ext4)
      {
        /sbin/mkfs.ext4 -q -F ${OPTS} "${DEV}"  && wwsuccess
      } || {
        wwfailure
        exit 2
      }
    ;;
    vfat)
      {
        /sbin/mkfs.vfat ${OPTS} "${DEV}"  && wwsuccess
      } || {
        wwfailure
        exit 2
      }
    ;;
    *)
      wwlogger "Unsupported filesystem type in mkfs command: ${TYPE}"
      wwskipped
    ;;
  esac
}

update_fstab_and_mount() {
  local SELECT_DISK="${1}"
  local ENTRY="${2}"
  local NEWROOT="${3}"

  if [ "${ENTRY:0:5}" != "UUID=" ] && [ "${ENTRY:0:6}" != "LABEL=" ]; then
    ENTRY="${SELECT_DISK}${ENTRY}"
  fi

  # Add entry to fstab that will be moved into the NEWROOT chroot, transform options field ":" delim to commas
  echo "${ENTRY}" | /usr/bin/awk '{ gsub(/:/,",",$4); print }' >> "${TEMPDIR}/fstab"

  MOUNTPOINT=$(echo "${ENTRY}" | /usr/bin/awk '{ print $2 }')
  echo "   * mounting ${MOUNTPOINT}"
  if [ "${MOUNTPOINT:0:1}" = "/" ]; then
    # Prepend NEWROOT to mount points, transform options field ":" delim to fstab format with commas
    echo "${ENTRY}" | /usr/bin/awk -v NEWROOT="${NEWROOT}" '{ $2 = NEWROOT $2; gsub(/:/,",",$4); print }' >> /etc/fstab
    {
      mkdir -p "${NEWROOT}${MOUNTPOINT}" && \
      mount "${NEWROOT}${MOUNTPOINT}" && wwsuccess
    } || {
      wwfailure
      exit 2
    }
  else
    wwskipped
  fi
}

{
  SELECT_DISK=""
  PARTED_CMDS=""
  FIRST_DISK=1
  NEWROOT=${NEWROOT:=/newroot}
  TEMPDIR=${TEMPDIR:=/tmp}
  WWSELECTDISKWAIT=${WWSELECTDISKWAIT:=5}

  IFS=","


  if [ ! -e /etc/mtab ]; then
    ln -s /proc/mounts /etc/mtab
  fi

  for FSCMD in ${WWFS}; do
    case ${FSCMD} in

    # Don't pass select commands directly to parted as parted --script
    # needs a device specified on the CLI
    select*)
      if [ "${FIRST_DISK}" ]; then
        FIRST_DISK=0
      else
        # Run any accumulated parted commands before changing selected disk
        run_parted "${SELECT_DISK}" "${PARTED_CMDS}"
        PARTED_CMDS=""
        SELECT_DISK=""
      fi
      SELECT_DISK=$(echo "${FSCMD}" | cut -f 2 -d' ')

      SELECT_RETRY=0

      if [ -z "${SELECT_DISK##*/dev/*}" ]; then
        while [ ! -b "${SELECT_DISK}" ]; do
          if [ "${SELECT_RETRY}" -lt "${WWSELECTDISKWAIT}" ]; then
            if [ "${SELECT_RETRY}" -eq 0 ]; then
              echo "   * ${SELECT_DISK} not ready, retrying."
            else
              echo "."
            fi
            SELECT_RETRY="$((SELECT_RETRY+1))"
            sleep 1
          else
            wwlogger "Disk: ${SELECT_DISK} not found!"
            exit 2
          fi
        done
        if [ "${SELECT_RETRY}" -gt 0 ]; then
          wwsuccess
        fi
      fi
    ;;

    fstab*)
      if [ -z "${SELECT_DISK}" ]; then
        wwlogger "No select disk set before fstab command: ${FSCMD:6}"
        exit 2
      fi
      # Run any accumulated parted commands before continuing with fstab
      run_parted "${SELECT_DISK}" "${PARTED_CMDS}"
      PARTED_CMDS=""
      update_fstab_and_mount "${SELECT_DISK}" "${FSCMD:6}" "${NEWROOT}"
    ;;

    mkfs*)
      if  [ -z "${SELECT_DISK}" ]; then
        wwlogger "No select disk set before mkfs command: ${FSCMD:5}"
        exit 2
      fi

      # Run any accumulated parted commands before continuing with mkfs
      run_parted "${SELECT_DISK}" "${PARTED_CMDS}"
      PARTED_CMDS=""

      # Need FSCMD to be split into individual arguments for run_mkfs
      unset IFS
      run_mkfs "${SELECT_DISK}" ${FSCMD:5}
      IFS=","
    ;;

    # All parted command except select
    align-check*|help*|mklabel*|mktable*|mkpart*|name*|print*|quit|rescue*|rm*|disk_set*|disk_toggle*|set*|toggle*|unit*|version)
      if [ -z "${SELECT_DISK}" ]; then
        wwlogger "No selected disk set before command: ${FSCMD}"
        exit 2
      fi

      if [ -z $PARTED_CMDS ]; then
        PARTED_CMDS="${FSCMD}"
      else
        PARTED_CMDS="${PARTED_CMDS} ${FSCMD}"
      fi
    ;;

    *)
      wwlogger "Unknown parted command: ${FSCMD}"
      exit 2
    ;;
    esac

  done
  echo " * filesystems"
}

Matt

unread,
Mar 9, 2021, 1:52:56 PM3/9/21
to Warewulf, Matt, bsa...@alcf.anl.gov
sored/shared

So when we boot the stateful image we  are using this for --filesystem
[root@cedar-deploy filesystem]# cat fx700disk.cmds
select /dev/nvme0n1
mklabel gpt
mkpart ESP fat32 1MiB 513MiB
mkpart primary ext4 513MiB 1557MiB
mkpart primary linux-swap 1557MiB 17921MiB
mkpart primary ext4 17921MiB 120325MiB
mkpart primary ext4 120325MiB 100%
name 1 ESP
name 2 boot
name 3 swap
name 4 root
name 5 data
set 1 boot on
#mkfs p1 vfat -n ESP
#mkfs p2 ext4 -L boot
#mkfs p3 swap
#mkfs p4 ext4 -L root
#mkfs p5 ext4 -L data
# fstab NUMBER fs_file fs_vfstype fs_mntops fs_freq fs_passno
fstab p1 /boot/efi vfat defaults 0 0
fstab p2 /boot ext4 defaults 0 0
fstab p3 swap swap defaults 0 0
fstab p4 / ext4 defaults 0 0
fstab p5 /data ext4 defaults 0 0

Reply all
Reply to author
Forward
0 new messages