Files
zrec/zrecins-opus.org
T
billmanz 4894ada68f Initial commit of the zrec function.
`zrec` exists on the internet now! For anyone who ever looks at
this, yes, this was entirely vibe-coded. Sorry, I saw a niche that
an LLM could help me fill. I find this to work for my usecase.
2026-06-18 09:14:05 -04:00

48 KiB
Raw Blame History

Help me improve the attached script. I have 3 improvements I would like. First, when the field has a provided type, enforce the correct formatting. Second, when a record uses the %allowed types, do not prompt for the user response field and value, as it creates a malformed call to recins. Third, when the script prompts the user to select the record type, respond immediately to the keypress, do not wait for the user to press enter.

I'll address each of your three improvements systematically.

Improvement 1: Enforce Field Type Formatting

The get_field_type function already extracts the type string, but prompt_field ignores it for validation. We need a validation/coercion layer that understands recutils' built-in types: int, real, bool, date, line, url, email, uuid, regexp, and range.

Improvement 2: Skip the %rec: Response Field

When %allowed: is declared, recins handles the %rec: type field itself via -t. Prompting for it creates a duplicate/malformed call. We need to filter it out of the allowed-fields loop.

Improvement 3: Single-Keypress Record Type Selection

The numbered menu currently waits for Enter. We can use read -n1 (bash built-in, available in 3.2) to respond immediately to a single keypress.

Here is the revised script with all three improvements applied, with change markers ### CHANGED: and ### NEW: so every edit is easy to locate:

#!/usr/bin/env bash
#
# recadd - Interactive record inserter for recutils recfiles
# Compatible with bash 3.2.57 (macOS)
#
# Usage: recadd <recfile> [record-type]
#

set -o pipefail

# ---------------------------------------------------------------------------
# Step 0: Verify recutils (recins/recsel) is installed
# ---------------------------------------------------------------------------
check_recutils() {
    if ! command -v recins >/dev/null 2>&1; then
        echo "Error: 'recins' not found. Please install recutils." >&2
        echo "  On macOS:  brew install recutils" >&2
        exit 1
    fi
    if ! command -v recinf >/dev/null 2>&1; then
        echo "Error: 'recinf' not found. Please install recutils." >&2
        exit 1
    fi

    local ver
    ver=$(recins --version 2>/dev/null | head -n1)
    echo "Using: ${ver:-recutils (version unknown)}"
}

# ---------------------------------------------------------------------------
# Step 1: Usage / argument handling
# ---------------------------------------------------------------------------
usage() {
    echo "Usage: $(basename "$0") <recfile> [record-type]" >&2
    echo "  Interactively build and insert a record into a recfile." >&2
    exit 1
}

# ---------------------------------------------------------------------------
# Step 2: Extract the descriptor block for a given record type.
# ---------------------------------------------------------------------------
get_descriptor_block() {
    local file="$1"
    local rectype="$2"

    awk -v rt="$rectype" '
        BEGIN { RS=""; FS="\n" }
        {
            for (i = 1; i <= NF; i++) {
                if ($i ~ /^%rec:[[:space:]]*/) {
                    name = $i
                    sub(/^%rec:[[:space:]]*/, "", name)
                    sub(/[[:space:]]+$/, "", name)
                    if (name == rt) {
                        print $0
                        exit
                    }
                }
            }
        }
    ' "$file"
}

# ---------------------------------------------------------------------------
# Step 3: List all record types defined in the file.
# ---------------------------------------------------------------------------
list_record_types() {
    local file="$1"

    if command -v recinf >/dev/null 2>&1; then
        recinf "$file" 2>/dev/null | awk '
            {
                if (NF == 0) next
                name = $NF
                if ($1 ~ /^[0-9]+$/) {
                    printf "%s [count: %s]\n", name, $1
                } else {
                    print name
                }
            }
        '
    else
        grep '^%rec:' "$file" | sed 's/^%rec:[[:space:]]*//; s/[[:space:]]*$//'
    fi
}

# ---------------------------------------------------------------------------
# Step 4: Pull a space-separated list of fields from a directive.
# ---------------------------------------------------------------------------
extract_fields() {
    local block="$1"
    local directive="$2"

    echo "$block" | awk -v d="$directive" '
        index($0, d) == 1 {
            line = substr($0, length(d) + 1)
            print line
        }
    ' | tr '\n' ' ' | sed 's/^[[:space:]]*//; s/[[:space:]]*$//'
}

# ---------------------------------------------------------------------------
# Step 5: Find the declared type (if any) for a specific field.
# ---------------------------------------------------------------------------
get_field_type() {
    local block="$1"
    local field="$2"

    echo "$block" | awk -v f="$field" '
        index($0, "%type:") == 1 {
            rest = substr($0, length("%type:") + 1)
            sub(/^[[:space:]]+/, "", rest)
            n = split(rest, parts, /[[:space:]]+/)
            names = parts[1]
            typedef = ""
            for (i = 2; i <= n; i++) {
                typedef = typedef (i==2 ? "" : " ") parts[i]
            }
            m = split(names, namearr, /,/)
            for (j = 1; j <= m; j++) {
                if (namearr[j] == f) {
                    print typedef
                    exit
                }
            }
        }
    '
}

# ---------------------------------------------------------------------------
# Helper: test membership of a word in a space-separated list.
# ---------------------------------------------------------------------------
in_list() {
    local needle="$1"
    shift
    local item
    for item in $*; do
        [ "$item" = "$needle" ] && return 0
    done
    return 1
}

# ---------------------------------------------------------------------------
# ### NEW: Step 6a: Validate (and where possible normalise) a value against
# a recutils field type string.  Prints the (possibly normalised) value on
# stdout and returns 0 on success, or prints an error to stderr and returns
# 1 on failure.
#
# Supported types:
#   int                        - integer
#   real                       - floating-point number
#   bool                       - yes/no/true/false/0/1  (normalised to "yes"/"no")
#   date                       - ISO-8601 or free date accepted by `date`
#   line                       - single line (no embedded newlines  always valid)
#   url                        - must start with a recognised scheme
#   email                      - must match user@domain
#   uuid                       - must match 8-4-4-4-12 hex pattern
#   regexp /pattern/           - value must match the given ERE
#   range [min] [max]          - min, max, or both may be omitted
# ---------------------------------------------------------------------------
validate_field_type() {
    local value="$1"
    local ftype="$2"   # e.g. "int", "range 1 10", "regexp /foo/"

    # Extract the base type keyword (first word).
    local base_type="${ftype%% *}"

    case "$base_type" in

        int)
            # Allow optional leading sign.
            case "$value" in
                ''|*[!0-9-]*) ;;   # fall through to error
                *)
                    # Reject bare "-" or values with "-" not at position 1.
                    if printf '%s' "$value" | grep -qE '^-?[0-9]+$'; then
                        printf '%s' "$value"
                        return 0
                    fi
                    ;;
            esac
            echo "  -> Expected an integer (e.g. 42, -7)." >&2
            return 1
            ;;

        real)
            if printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a real number (e.g. 3.14, -2.0, 1e10)." >&2
            return 1
            ;;

        bool)
            case "$value" in
                # Normalise all truthy spellings to "yes".
                1|[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee])
                    printf 'yes'
                    return 0
                    ;;
                # Normalise all falsy spellings to "no".
                0|[Nn][Oo]|[Ff][Aa][Ll][Ss][Ee])
                    printf 'no'
                    return 0
                    ;;
            esac
            echo "  -> Expected a boolean: yes/no, true/false, or 1/0." >&2
            return 1
            ;;

        date)
            # Lean on the system `date` command for flexibility; require it
            # to produce output without error as the acceptance criterion.
            if date -d "$value" >/dev/null 2>&1 || \
               date -j -f "%Y-%m-%d" "$value" "+%Y-%m-%d" >/dev/null 2>&1; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a valid date (e.g. 2024-07-04)." >&2
            return 1
            ;;

        line)
            # A "line" type simply forbids embedded newlines; since the shell
            # reads one line at a time this is always satisfied.
            printf '%s' "$value"
            return 0
            ;;

        url)
            if printf '%s' "$value" | \
               grep -qiE '^(https?|ftp|ftps|sftp|file)://[^[:space:]]+$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a URL beginning with http://, https://, ftp://, etc." >&2
            return 1
            ;;

        email)
            if printf '%s' "$value" | grep -qE '^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected an e-mail address (e.g. user@example.com)." >&2
            return 1
            ;;

        uuid)
            if printf '%s' "$value" | \
               grep -qiE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a UUID (e.g. 550e8400-e29b-41d4-a716-446655440000)." >&2
            return 1
            ;;

        regexp)
            # ftype looks like: regexp /pattern/
            # Strip the leading "regexp " and the surrounding delimiters.
            local pattern="${ftype#regexp }"   # "/pattern/"
            pattern="${pattern#/}"             # "pattern/"
            pattern="${pattern%/}"             # "pattern"
            if printf '%s' "$value" | grep -qE "$pattern"; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Value does not match required pattern: /${pattern}/." >&2
            return 1
            ;;

        range)
            # ftype may be: "range", "range MIN", "range MIN MAX"
            # recutils range is inclusive on both ends.
            if ! printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then
                echo "  -> Expected a number for a range field." >&2
                return 1
            fi

            local lo hi
            lo=$(echo "$ftype" | awk '{print $2}')  # may be empty
            hi=$(echo "$ftype" | awk '{print $3}')  # may be empty

            # Use awk for numeric comparison so it works for both int and real.
            if [ -n "$lo" ] && \
               awk -v v="$value" -v lo="$lo" 'BEGIN{exit !(v < lo)}'; then
                echo "  -> Value must be >= ${lo}." >&2
                return 1
            fi
            if [ -n "$hi" ] && \
               awk -v v="$value" -v hi="$hi" 'BEGIN{exit !(v > hi)}'; then
                echo "  -> Value must be <= ${hi}." >&2
                return 1
            fi
            printf '%s' "$value"
            return 0
            ;;

        *)
            # Unknown / enum type or blank  pass through without validation.
            printf '%s' "$value"
            return 0
            ;;
    esac
}

# ---------------------------------------------------------------------------
# Step 6: Prompt for a single field's value.
#   Returns the (possibly normalised) value via stdout.
#   Mandatory fields loop until non-empty; typed fields loop until valid.
# ---------------------------------------------------------------------------
prompt_field() {
    local field="$1"
    local is_mandatory="$2"   # "yes" / "no"
    local ftype="$3"          # may be empty

    local label="$field"
    local tags=""
    [ "$is_mandatory" = "yes" ] && tags="${tags} [MANDATORY]"
    [ -n "$ftype" ]            && tags="${tags} (type: ${ftype})"

    local value validated
    while true; do
        printf "%s%s: " "$label" "$tags" >&2
        IFS= read -r value

        # Mandatory-empty check.
        if [ "$is_mandatory" = "yes" ] && [ -z "$value" ]; then
            echo "  -> This field is mandatory and cannot be empty." >&2
            continue
        fi

        # Skip type validation for empty optional fields.
        if [ -z "$value" ]; then
            printf '%s' "$value"
            return 0
        fi

        # ### CHANGED: If a type is declared, validate (and possibly normalise)
        # the input; re-prompt on failure.
        if [ -n "$ftype" ]; then
            validated=$(validate_field_type "$value" "$ftype")
            if [ $? -ne 0 ]; then
                # validate_field_type already printed the error to stderr.
                continue
            fi
            printf '%s' "$validated"
        else
            printf '%s' "$value"
        fi
        return 0
    done
}

# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
main() {
    check_recutils

    [ "$#" -ge 1 ] || usage
    local recfile="$1"
    local rectype="$2"

    if [ ! -f "$recfile" ]; then
        echo "Error: recfile '$recfile' does not exist." >&2
        exit 1
    fi

    # ---- Determine record type --------------------------------------------
    local available_types
    available_types=$(list_record_types "$recfile")

    if [ -z "$rectype" ]; then
        if [ -z "$available_types" ]; then
            echo "No %rec: types found. This recfile uses free-form fields." >&2
        else
            local -a display_list
            local -a name_list
            display_list=()
            name_list=()

            local _line
            while IFS= read -r _line; do
                [ -z "$_line" ] && continue
                display_list+=("$_line")
                local plain="${_line%% \[count:*}"
                name_list+=("$plain")
            done <<EOF
$available_types
EOF

            local count="${#name_list[@]}"

            if [ "$count" -eq 1 ]; then
                local only_type="${name_list[0]}"
                echo "Only one record type found: '$only_type'" >&2
                printf "Press Enter to use it, or type a different type: " >&2
                IFS= read -r rectype
                [ -z "$rectype" ] && rectype="$only_type"

            else
                # ----- Multiple types: single-keypress numbered menu -------
                echo "Available record types:" >&2
                local i=1
                local label
                for label in "${display_list[@]}"; do
                    printf "  %d) %s\n" "$i" "$label" >&2
                    i=$((i + 1))
                done

                while true; do
                    # ### CHANGED: -n1 reads exactly one character without
                    # requiring Enter.  -s suppresses echo so we can echo
                    # the digit ourselves on the same line for clarity.
                    printf "Select a record type (1-%d): " "$count" >&2
                    local choice
                    IFS= read -r -n1 -s choice
                    # Echo the character the user pressed so they can see it.
                    printf '%s\n' "$choice" >&2

                    # Blank (Enter pressed directly) → re-prompt.
                    if [ -z "$choice" ]; then
                        echo "  -> Please press a digit between 1 and $count." >&2
                        continue
                    fi

                    # Reject non-digits immediately.
                    case "$choice" in
                        *[!0-9]*)
                            echo "  -> Please press a digit between 1 and $count." >&2
                            continue
                            ;;
                    esac

                    if [ "$choice" -ge 1 ] && [ "$choice" -le "$count" ]; then
                        rectype="${name_list[$((choice - 1))]}"
                        break
                    else
                        echo "  -> Out of range. Choose between 1 and $count." >&2
                    fi
                done
            fi

            echo "Selected record type: $rectype" >&2
        fi
    fi

    # ---- Load the descriptor block ----------------------------------------
    local block=""
    if [ -n "$rectype" ]; then
        block=$(get_descriptor_block "$recfile" "$rectype")
        if [ -z "$block" ]; then
            echo "Warning: no descriptor block found for type '$rectype'." >&2
        fi
    fi

    # ---- Gather field rules -----------------------------------------------
    local mandatory_fields allowed_fields
    mandatory_fields=$(extract_fields "$block" "%mandatory:")
    allowed_fields=$(extract_fields "$block" "%allowed:")

    echo "" >&2
    echo "----- Building a new record -----" >&2
    [ -n "$rectype" ] && echo "Record type: $rectype" >&2
    echo "" >&2

    local -a args
    args=()

    local prompted=""

    # ---- 1. Prompt for ALLOWED fields (if defined) ------------------------
    if [ -n "$allowed_fields" ]; then
        echo "== Allowed fields ==" >&2
        local f
        for f in $allowed_fields; do
            # ### CHANGED: Skip the %rec: type-name field.  When -t is passed
            # to recins it writes the %rec: line itself; supplying it again as
            # a -f/-v pair produces a malformed record with a duplicate or
            # misplaced %rec: field.
            if [ -n "$rectype" ] && [ "$f" = "$rectype" ]; then
                continue
            fi

            local mand="no"
            in_list "$f" $mandatory_fields && mand="yes"
            local ftype
            ftype=$(get_field_type "$block" "$f")

            local val
            val=$(prompt_field "$f" "$mand" "$ftype")
            if [ -n "$val" ]; then
                args+=(-f "$f" -v "$val")
            fi
            prompted="$prompted $f"
        done
    fi

    # ---- 2. Prompt for any MANDATORY fields not in the allowed list -------
    if [ -n "$mandatory_fields" ]; then
        local f
        local printed_header="no"
        for f in $mandatory_fields; do
            if ! in_list "$f" $prompted; then
                if [ "$printed_header" = "no" ]; then
                    echo "== Mandatory fields ==" >&2
                    printed_header="yes"
                fi
                local ftype
                ftype=$(get_field_type "$block" "$f")
                local val
                val=$(prompt_field "$f" "yes" "$ftype")
                args+=(-f "$f" -v "$val")
                prompted="$prompted $f"
            fi
        done
    fi

    # ---- 3. Prompt for free-form / user-defined fields --------------------
    echo "" >&2
    echo "== Additional fields ==" >&2
    echo "Enter a field name (or press Enter / type 'done' to finish):" >&2
    while true; do
        printf "Field name: " >&2
        local newfield
        IFS= read -r newfield
        case "$newfield" in
        "" | done | DONE | q | quit) break ;;
        esac

        local ftype
        ftype=$(get_field_type "$block" "$newfield")
        local val
        val=$(prompt_field "$newfield" "no" "$ftype")
        args+=(-f "$newfield" -v "$val")
    done

    # ---- Sanity check: did we collect anything? ---------------------------
    if [ "${#args[@]}" -eq 0 ]; then
        echo "No fields entered. Aborting." >&2
        exit 1
    fi

    # ---- Build and run recins ---------------------------------------------
    echo "" >&2
    echo "Inserting record..." >&2
    if [ -n "$rectype" ]; then
        recins -t "$rectype" "${args[@]}" "$recfile"
    else
        recins "${args[@]}" "$recfile"
    fi

    local status=$?
    if [ "$status" -eq 0 ]; then
        echo "Record inserted successfully into '$recfile'." >&2
    else
        echo "recins failed (exit code $status)." >&2
        exit "$status"
    fi
}

main "$@"

Summary of Every Change

Improvement 1 — Type enforcement (validate_field_type)

Type What is checked
int ERE ^-?[0-9]+$
real ERE ^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$
bool yes/no/true/false/1/0 accepted; normalised to yes=/=no
date Delegated to the system date command (GNU -d or BSD -j tried in sequence)
line Always valid (shell read is already line-oriented)
url Scheme prefix check via ERE
email user@domain.tld shape check via ERE
uuid 8-4-4-4-12 hex pattern via ERE
regexp /pat/ Delimiters stripped; value tested with grep -E
range [lo] [hi] Numeric bounds checked via awk (handles both int and real)
unknown Passed through unchanged

prompt_field now calls validate_field_type and re-prompts on failure rather than silently accepting bad data.

Improvement 2 — Skip the record-type field in %allowed:

A single guard added in the allowed-fields loop:

if [ -n "$rectype" ] && [ "$f" = "$rectype" ]; then
    continue
fi

recins -t TYPE writes the %rec: line itself; providing TYPE again as -f TYPE -v … created a malformed duplicate field.

Improvement 3 — Single-keypress type selection

read -n1 -s reads exactly one character without waiting for Enter and without echoing it. The digit is then echoed manually so the user can confirm what they pressed. Non-digit input and out-of-range digits both produce an immediate inline error and re-prompt.

> Note: -n1 is a bash built-in flag available since bash 2.x, so it is fully compatible with your bash 3.2.57 target. It does not work with /bin/sh; the #!/usr/bin/env bash shebang is required, which the script already uses.

For records where allowed fields are defined, you cannot insert a record with any fields not listed. Make sure that a user cannot insert a custom field in those records that have fields defined by %allowed:.

Good catch. The "Additional fields" section currently runs unconditionally, so a user can append arbitrary fields even when %allowed: strictly limits what the record may contain. The fix has two parts:

  1. Skip the "Additional fields" loop entirely when %allowed: is non-empty.
  2. Guard the loop header message so it only prints when the loop will actually run.

Only the relevant section of main needs to change:

    # ---- 3. Prompt for free-form / user-defined fields --------------------
    # ### CHANGED: Only offer free-form entry when no %allowed: constraint
    # exists for this record type.  If %allowed: is defined, recins will
    # reject any field not listed there, so presenting the prompt would
    # only allow the user to produce a record that fails on insert.
    if [ -z "$allowed_fields" ]; then
        echo "" >&2
        echo "== Additional fields ==" >&2
        echo "Enter a field name (or press Enter / type 'done' to finish):" >&2
        while true; do
            printf "Field name: " >&2
            local newfield
            IFS= read -r newfield
            case "$newfield" in
            "" | done | DONE | q | quit) break ;;
            esac

            local ftype
            ftype=$(get_field_type "$block" "$newfield")
            local val
            val=$(prompt_field "$newfield" "no" "$ftype")
            args+=(-f "$newfield" -v "$val")
        done
    else
        echo "" >&2
        echo "== Additional fields ==" >&2
        echo "(Skipped: this record type has %allowed: constraints.)" >&2
    fi

The else branch with the skip message is optional but worth keeping — it tells the user explicitly why they are not being asked, rather than silently omitting a section they might expect to see.

When using this script, I was able to type in a phone number where the type was defined by a regex

The bug is in validate_field_type inside the regexp case. The pattern is extracted correctly, but the issue is in how the delimiters are stripped. If the recutils %type: declaration looks like:

%type: Phone regexp /^[0-9]{3}-[0-9]{3}-[0-9]{4}$/

then ftype arrives as regexp /^[0-9]{3}-[0-9]{3}-[0-9]{4}$/ and the stripping logic is:

local pattern="${ftype#regexp }"   # "/^[0-9]{3}-[0-9]{3}-[0-9]{4}$/"
pattern="${pattern#/}"             # "^[0-9]{3}-[0-9]{3}-[0-9]{4}$/"
pattern="${pattern%/}"             # "^[0-9]{3}-[0-9]{3}-[0-9]{4}$"

That looks correct in isolation. The actual problem is that get_field_type returns only the type portion, but validate_field_type receives ftype and extracts base_type with:

local base_type="${ftype%% *}"

When ftype is regexp /pattern/, base_type becomes regexp correctly. However, go back one step further to get_field_type and look at how it splits the %type: line:

n = split(rest, parts, /[[:space:]]+/)
names = parts[1]
typedef = ""
for (i = 2; i <= n; i++) {
    typedef = typedef (i==2 ? "" : " ") parts[i]
}

This splits on whitespace, so regexp /^[0-9]{3}-[0-9]{3}-[0-9]{4}$/ becomes parts[2]=regexp and parts[3]=/^[0-9]{3}-[0-9]{3}-[0-9]{4}$/, which reassembles correctly. The real fault is more subtle: grep -qE is being used to test the match, but the pattern is not anchored end-to-end by the grep call itself. A value like abc123-456-7890xyz would match a phone regexp if the pattern lacks anchors, but more importantly, if the recutils pattern uses anchors and the value is invalid, grep -E on some platforms treats the {n} quantifier differently depending on whether the input is ERE or BRE.

Run this test to confirm:

printf '123-456-789' | grep -qE '^[0-9]{3}-[0-9]{3}-[0-9]{4}$'
echo $?   # Should be 1 (no match), but may be 0 on BSD grep without -E

The fix has two parts:

  1. Use grep -P where available and fall back to grep -E, or use awk for the match, which has consistent ERE support across platforms including macOS.
  2. Add an explicit check that pattern is non-empty before attempting the match, so a malformed %type: line does not silently pass everything through.
        regexp)
            # ftype looks like: regexp /pattern/
            # Strip the leading "regexp " and the surrounding delimiters.
            local pattern="${ftype#regexp }"   # "/pattern/"
            pattern="${pattern#/}"             # "pattern/"
            pattern="${pattern%/}"             # "pattern"

            # ### CHANGED: Guard against a malformed or empty pattern, which
            # would otherwise cause grep to match everything and bypass
            # validation entirely.
            if [ -z "$pattern" ]; then
                echo "  -> Warning: empty regexp in type declaration; skipping validation." >&2
                printf '%s' "$value"
                return 0
            fi

            # ### CHANGED: Use awk for ERE matching instead of grep -E.
            # awk's ERE engine is consistent across macOS (BSD) and Linux,
            # whereas grep -E handling of {n,m} quantifiers and anchors
            # differs between BSD grep and GNU grep, causing valid regexps
            # to silently pass on some platforms.
            if printf '%s' "$value" | awk -v pat="$pattern" '$0 ~ pat {found=1} END {exit !found}'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Value does not match required pattern: /${pattern}/." >&2
            return 1
            ;;

Here is the same fix shown in full context inside validate_field_type so the indentation and surrounding cases are clear:

validate_field_type() {
    local value="$1"
    local ftype="$2"

    local base_type="${ftype%% *}"

    case "$base_type" in

        int)
            if printf '%s' "$value" | grep -qE '^-?[0-9]+$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected an integer (e.g. 42, -7)." >&2
            return 1
            ;;

        real)
            if printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a real number (e.g. 3.14, -2.0, 1e10)." >&2
            return 1
            ;;

        bool)
            case "$value" in
                1|[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee])
                    printf 'yes'
                    return 0
                    ;;
                0|[Nn][Oo]|[Ff][Aa][Ll][Ss][Ee])
                    printf 'no'
                    return 0
                    ;;
            esac
            echo "  -> Expected a boolean: yes/no, true/false, or 1/0." >&2
            return 1
            ;;

        date)
            if date -d "$value" >/dev/null 2>&1 || \
               date -j -f "%Y-%m-%d" "$value" "+%Y-%m-%d" >/dev/null 2>&1; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a valid date (e.g. 2024-07-04)." >&2
            return 1
            ;;

        line)
            printf '%s' "$value"
            return 0
            ;;

        url)
            if printf '%s' "$value" | \
               grep -qiE '^(https?|ftp|ftps|sftp|file)://[^[:space:]]+$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a URL beginning with http://, https://, ftp://, etc." >&2
            return 1
            ;;

        email)
            if printf '%s' "$value" | grep -qE '^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected an e-mail address (e.g. user@example.com)." >&2
            return 1
            ;;

        uuid)
            if printf '%s' "$value" | \
               grep -qiE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Expected a UUID (e.g. 550e8400-e29b-41d4-a716-446655440000)." >&2
            return 1
            ;;

        regexp)
            local pattern="${ftype#regexp }"
            pattern="${pattern#/}"
            pattern="${pattern%/}"

            # ### CHANGED: Guard against empty pattern before attempting match.
            if [ -z "$pattern" ]; then
                echo "  -> Warning: empty regexp in type declaration; skipping validation." >&2
                printf '%s' "$value"
                return 0
            fi

            # ### CHANGED: Use awk for ERE matching instead of grep -E for
            # consistent behaviour across BSD (macOS) and GNU/Linux platforms.
            if printf '%s' "$value" | \
               awk -v pat="$pattern" '$0 ~ pat { found=1 } END { exit !found }'; then
                printf '%s' "$value"
                return 0
            fi
            echo "  -> Value does not match required pattern: /${pattern}/." >&2
            return 1
            ;;

        range)
            if ! printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then
                echo "  -> Expected a number for a range field." >&2
                return 1
            fi

            local lo hi
            lo=$(echo "$ftype" | awk '{print $2}')
            hi=$(echo "$ftype" | awk '{print $3}')

            if [ -n "$lo" ] && \
               awk -v v="$value" -v lo="$lo" 'BEGIN { exit !(v < lo) }'; then
                echo "  -> Value must be >= ${lo}." >&2
                return 1
            fi
            if [ -n "$hi" ] && \
               awk -v v="$value" -v hi="$hi" 'BEGIN { exit !(v > hi) }'; then
                echo "  -> Value must be <= ${hi}." >&2
                return 1
            fi
            printf '%s' "$value"
            return 0
            ;;

        *)
            printf '%s' "$value"
            return 0
            ;;
    esac
}

Summary of changes

# What changed Why
1 Empty-pattern guard added before the awk call A malformed %type: Field regexp with no pattern would cause awk '$0 ~ ""' to match everything, silently bypassing validation
2 grep -qE replaced with awk -v pat"$pattern" '$0 ~ pat'= awk's ERE engine handles {n} quantifiers and anchors consistently on both macOS BSD and GNU/Linux; grep -E behaviour diverges between the two on interval expressions

I am not sure if validate_field_type() is detecting regexp defined types correctly. My phonenum type is defined by "%typedef: phonenum regexp ^(:digit:{3})\-(:digit:{3})\-(:digit:{4})$" and is not identifying my incorrectly formatted phonenums. The email type is being enforced appropriately.

The issue is that your type is defined with %typedef: (a named/aliased type) rather than %type: (an inline type). The get_field_type function only scans for %type: lines, so it never finds phonenum and returns an empty string — meaning prompt_field skips validation entirely and accepts anything.

There are two separate problems to fix:

  1. get_field_type needs to resolve %typedef: aliases. When it finds that a field's type is a named alias like phonenum, it must look up that alias in the %typedef: declarations to find the underlying type string (e.g. regexp /^.../).
  2. The %typedef: format uses a three-part line (%typedef: name type /pattern/) rather than the two-part %type: format, so the parsing logic differs.

First, add a new function resolve_typedef that looks up a named type in the %typedef: declarations and returns the full type string:

# ---------------------------------------------------------------------------
# Step 5a: Resolve a named typedef back to its base type string.
#   e.g. given alias "phonenum" returns "regexp /^([[:digit:]]{3})...$/"
# ---------------------------------------------------------------------------
resolve_typedef() {
    local block="$1"
    local alias="$2"

    echo "$block" | awk -v a="$alias" '
        index($0, "%typedef:") == 1 {
            # %typedef: aliasname basetype [/pattern/ or args...]
            rest = substr($0, length("%typedef:") + 1)
            sub(/^[[:space:]]+/, "", rest)
            # First token is the alias name.
            n = split(rest, parts, /[[:space:]]+/)
            if (parts[1] == a) {
                # Reassemble everything after the alias name.
                result = ""
                for (i = 2; i <= n; i++) {
                    result = result (i==2 ? "" : " ") parts[i]
                }
                print result
                exit
            }
        }
    '
}

Then update get_field_type to call resolve_typedef when the type it finds is not one of the known base types:

# ---------------------------------------------------------------------------
# Step 5: Find the declared type (if any) for a specific field.
#         Resolves %typedef: aliases to their underlying type string.
# ---------------------------------------------------------------------------
get_field_type() {
    local block="$1"
    local field="$2"

    # First, find what %type: declares for this field (may be a base type
    # or a named alias defined by %typedef:).
    local raw_type
    raw_type=$(echo "$block" | awk -v f="$field" '
        index($0, "%type:") == 1 {
            rest = substr($0, length("%type:") + 1)
            sub(/^[[:space:]]+/, "", rest)
            n = split(rest, parts, /[[:space:]]+/)
            names = parts[1]
            typedef = ""
            for (i = 2; i <= n; i++) {
                typedef = typedef (i==2 ? "" : " ") parts[i]
            }
            m = split(names, namearr, /,/)
            for (j = 1; j <= m; j++) {
                if (namearr[j] == f) {
                    print typedef
                    exit
                }
            }
        }
    ')

    # If nothing was found, return empty.
    if [ -z "$raw_type" ]; then
        return 0
    fi

    # ### CHANGED: Check whether raw_type is a known base type.  If it is
    # not, treat it as a named alias and resolve it via %typedef: declarations.
    local base="${raw_type%% *}"
    case "$base" in
        int|real|bool|date|line|url|email|uuid|regexp|range)
            # Already a concrete type; return as-is.
            printf '%s' "$raw_type"
            ;;
        *)
            # Unknown base: look it up as a %typedef: alias.
            local resolved
            resolved=$(resolve_typedef "$block" "$base")
            if [ -n "$resolved" ]; then
                printf '%s' "$resolved"
            else
                # Alias not found; return the raw value so the caller
                # can still display it even if it cannot validate it.
                printf '%s' "$raw_type"
            fi
            ;;
    esac
}

Why the email type worked but phonenum did not

Declaration style Example Parsed by old code
%type: Email email base type email found and validated
%type: Phone phonenum + %typedef: phonenum regexp /pattern/ alias phonenum get_field_type returned phonenum, validate_field_type hit the * catch-all and passed everything through

The email type worked because email is a recognised base type keyword that validate_field_type handles directly. phonenum is an alias, so the old code handed the literal string "phonenum" to validate_field_type, which fell into the *) catch-all case and returned the value unchanged without any validation.

The new get_field_type resolves the alias chain before returning, so validate_field_type always receives a concrete type string like regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/ regardless of whether the field used %type: with a base type directly or with a named %typedef: alias.

For clarity, the type is defined with "%typedef: phonenum regexp ^(:digit:{3})\-(:digit:{3})\-(:digit:{4})$" but then the phonenum type is later used in the line "%type: phone phonenum". Additionally, ranges are defined in "%typedef:" with the format "%typedef: <field> <lower_bound> <upper_bound>"

Good clarification. There are two distinct parsing problems to address:

  1. %typedef: alias resolution — when get_field_type returns phonenum, the new resolve_typedef function looks it up and returns regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/ correctly. That part of the previous answer was right.
  2. range via %typedef: — the previous answer assumed ranges always come from %type: field range MIN MAX. But your format is %typedef: fieldname LOWER UPPER with no range keyword at all. The current validate_field_type will never see the word range as base_type for these fields, so they fall through to the * catch-all and pass everything unchecked.

The fix requires changes in two places.

resolve_typedef must detect when a typedef is a range (two numeric tokens, no keyword) and normalise it into the range MIN MAX string that validate_field_type already understands, rather than returning the raw two-token string which has no recognisable keyword.

get_field_type stays the same as the previous answer — it calls resolve_typedef when the raw type is not a known base type.

# ---------------------------------------------------------------------------
# Step 5a: Resolve a named typedef back to its base type string.
#
# Handles three typedef forms:
#
#   %typedef: phonenum regexp /pattern/
#       -> returns "regexp /pattern/"
#
#   %typedef: score range 1 100
#       -> returns "range 1 100"
#
#   %typedef: myage 0 120
#       -> two bare numbers, no keyword; normalised to "range 0 120"
#          so that validate_field_type sees the expected format.
# ---------------------------------------------------------------------------
resolve_typedef() {
    local block="$1"
    local alias="$2"

    echo "$block" | awk -v a="$alias" '
        index($0, "%typedef:") == 1 {
            rest = substr($0, length("%typedef:") + 1)
            sub(/^[[:space:]]+/, "", rest)

            # parts[1] = alias name, parts[2..] = type definition tokens
            n = split(rest, parts, /[[:space:]]+/)
            if (parts[1] != a) next

            # Reassemble the definition (everything after the alias name).
            defn = ""
            for (i = 2; i <= n; i++) {
                defn = defn (i==2 ? "" : " ") parts[i]
            }

            # ### CHANGED: Detect a bare numeric range (two tokens, both
            # numbers, no leading keyword).  recutils allows:
            #   %typedef: age 0 120
            # which means "range 0 to 120".  Normalise it to "range 0 120"
            # so validate_field_type receives a consistent format.
            if (n == 3 && parts[2] ~ /^-?[0-9]+(\.[0-9]+)?$/ \
                       && parts[3] ~ /^-?[0-9]+(\.[0-9]+)?$/) {
                print "range " parts[2] " " parts[3]
                exit
            }

            # ### CHANGED: Also normalise an explicit "range MIN MAX" typedef
            # in case the user wrote the keyword explicitly.
            if (parts[2] == "range") {
                print defn
                exit
            }

            # All other forms (regexp, bool, etc.) are returned verbatim.
            print defn
            exit
        }
    '
}

# ---------------------------------------------------------------------------
# Step 5: Find the declared type (if any) for a specific field.
#         Resolves %typedef: aliases to their underlying type string.
# ---------------------------------------------------------------------------
get_field_type() {
    local block="$1"
    local field="$2"

    # Find what %type: declares for this field.
    # The value may be a base type keyword or a named %typedef: alias.
    local raw_type
    raw_type=$(echo "$block" | awk -v f="$field" '
        index($0, "%type:") == 1 {
            rest = substr($0, length("%type:") + 1)
            sub(/^[[:space:]]+/, "", rest)
            n = split(rest, parts, /[[:space:]]+/)
            names = parts[1]
            typedef = ""
            for (i = 2; i <= n; i++) {
                typedef = typedef (i==2 ? "" : " ") parts[i]
            }
            m = split(names, namearr, /,/)
            for (j = 1; j <= m; j++) {
                if (namearr[j] == f) {
                    print typedef
                    exit
                }
            }
        }
    ')

    if [ -z "$raw_type" ]; then
        return 0
    fi

    # If raw_type is a known base type keyword, return it directly.
    # Otherwise treat it as a named alias and resolve via %typedef:.
    local base="${raw_type%% *}"
    case "$base" in
        int|real|bool|date|line|url|email|uuid|regexp|range)
            printf '%s' "$raw_type"
            ;;
        *)
            # ### CHANGED: Resolve the alias.  resolve_typedef normalises
            # bare numeric ranges and regexp patterns into the canonical
            # forms that validate_field_type expects.
            local resolved
            resolved=$(resolve_typedef "$block" "$base")
            if [ -n "$resolved" ]; then
                printf '%s' "$resolved"
            else
                # Alias not found in %typedef:; return raw so the caller
                # can still display the type name even without validation.
                printf '%s' "$raw_type"
            fi
            ;;
    esac
}

Summary of what each case now produces end-to-end

recfile declaration %type: line get_field_type returns validate_field_type receives
%typedef: phonenum regexp /^.../ %type: phone phonenum regexp /^.../ validated against ERE via awk
%typedef: score range 1 100 %type: Score score range 1 100 bounds checked via awk
%typedef: age 0 120 %type: Age age range 0 120 bounds checked via awk
(inline) %type: Count int int integer regex check
(inline) %type: Email email email email shape check

The key invariant is that validate_field_type only ever needs to handle the canonical forms (regexp /pat/, range [lo] [hi], int, etc.). All alias resolution and format normalisation is handled upstream in resolve_typedef and get_field_type, keeping the validation logic clean and easy to extend.

I'm wrong, the range type is defined as "%typedef: FIELD range LOWER UPPER" as you thought.

In that case the previous answer already handles the explicit range keyword path correctly via the parts[2] = "range"= branch. The bare two-number detection added in the previous answer was the only thing that was unnecessary. The fix is simply to remove that dead branch from resolve_typedef to keep the code clean and avoid silently misinterpreting a typedef that happens to have two numeric tokens for some other reason:

# ---------------------------------------------------------------------------
# Step 5a: Resolve a named typedef back to its base type string.
#
# Handles two typedef forms:
#
#   %typedef: phonenum regexp /pattern/
#       -> returns "regexp /pattern/"
#
#   %typedef: score range 1 100
#       -> returns "range 1 100"
# ---------------------------------------------------------------------------
resolve_typedef() {
    local block="$1"
    local alias="$2"

    echo "$block" | awk -v a="$alias" '
        index($0, "%typedef:") == 1 {
            rest = substr($0, length("%typedef:") + 1)
            sub(/^[[:space:]]+/, "", rest)

            # parts[1] = alias name, parts[2..] = type definition tokens
            n = split(rest, parts, /[[:space:]]+/)
            if (parts[1] != a) next

            # Reassemble the definition (everything after the alias name).
            defn = ""
            for (i = 2; i <= n; i++) {
                defn = defn (i==2 ? "" : " ") parts[i]
            }

            # ### CHANGED: Removed the bare two-number heuristic from the
            # previous answer.  recutils %typedef: range always uses the
            # explicit "range" keyword, so all forms are returned verbatim
            # and validate_field_type receives a consistent format without
            # any guesswork.
            print defn
            exit
        }
    '
}

The get_field_type function and validate_field_type function are unchanged from the previous answer. The full resolution chain for your two cases now looks like this:

recfile declaration %type: line resolve_typedef returns validate_field_type receives
%typedef: phonenum regexp /^.../ %type: phone phonenum regexp /^.../ validated against ERE via awk
%typedef: score range 1 100 %type: Score score range 1 100 bounds checked via awk

*