From 4894ada68f5e9705b655b878579c01ad3f9d9c76 Mon Sep 17 00:00:00 2001 From: billmanz Date: Thu, 18 Jun 2026 09:14:05 -0400 Subject: [PATCH] Initial commit of the zrec function. `zrec` exists on the internet now! For anyone who ever looks at this, yes, this was entirely vibe-coded. Sorry, I saw a niche that an LLM could help me fill. I find this to work for my usecase. --- zrec | 667 +++++++++++++++++++++ zrecins-opus-again.org | 600 +++++++++++++++++++ zrecins-opus.org | 1241 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 2508 insertions(+) create mode 100755 zrec create mode 100644 zrecins-opus-again.org create mode 100644 zrecins-opus.org diff --git a/zrec b/zrec new file mode 100755 index 0000000..04779ff --- /dev/null +++ b/zrec @@ -0,0 +1,667 @@ +#!/usr/bin/env bash +# +# zrec - Interactive record inserter for recutils recfiles +# Compatible with bash 3.2.57 (macOS) +# +# Usage: zrec [record-type] +# + +TAB=$(printf '\t') + +set -o pipefail + +# --------------------------------------------------------------------------- +# Step 0: Verify recutils (recins/recsel) is installed +# --------------------------------------------------------------------------- +check_recutils() { + if ! command -v recins >/dev/null 2>&1; then + echo "Error: 'recins' not found. Please install recutils." >&2 + echo " On macOS: brew install recutils" >&2 + exit 1 + fi + if ! command -v recinf >/dev/null 2>&1; then + echo "Error: 'recinf' not found. Please install recutils." >&2 + exit 1 + fi + + local ver + ver=$(recins --version 2>/dev/null | head -n1) + echo "Using: ${ver:-recutils (version unknown)}" +} + +# --------------------------------------------------------------------------- +# Step 1: Usage / argument handling +# --------------------------------------------------------------------------- +usage() { + echo "Usage: $(basename "$0") [record-type]" >&2 + echo " Interactively build and insert a record into a recfile." >&2 + exit 1 +} + +# --------------------------------------------------------------------------- +# Step 2: Extract the descriptor block for a given record type. +# --------------------------------------------------------------------------- +get_descriptor_block() { + local file="$1" + local rectype="$2" + + awk -v rt="$rectype" ' + BEGIN { RS=""; FS="\n" } + { + for (i = 1; i <= NF; i++) { + if ($i ~ /^%rec:[[:space:]]*/) { + name = $i + sub(/^%rec:[[:space:]]*/, "", name) + sub(/[[:space:]]+$/, "", name) + if (name == rt) { + print $0 + exit + } + } + } + } + ' "$file" +} + +# --------------------------------------------------------------------------- +# Step 3: List all record types defined in the file. +# --------------------------------------------------------------------------- +list_record_types() { + local file="$1" + + if command -v recinf >/dev/null 2>&1; then + recinf "$file" 2>/dev/null | awk ' + { + if (NF == 0) next + name = $NF + if ($1 ~ /^[0-9]+$/) { + printf "%s [count: %s]\n", name, $1 + } else { + print name + } + } + ' + else + grep '^%rec:' "$file" | sed 's/^%rec:[[:space:]]*//; s/[[:space:]]*$//' + fi +} + +# --------------------------------------------------------------------------- +# Step 4: Pull a space-separated list of fields from a directive. +# --------------------------------------------------------------------------- +extract_fields() { + local block="$1" + local directive="$2" + + echo "$block" | awk -v d="$directive" ' + index($0, d) == 1 { + line = substr($0, length(d) + 1) + print line + } + ' | tr '\n' ' ' | sed 's/^[[:space:]]*//; s/[[:space:]]*$//' +} + +# --------------------------------------------------------------------------- +# Step 5a: Resolve a named typedef back to its base type string. +# +# Handles two typedef forms: +# +# %typedef: phonenum regexp /pattern/ +# -> returns "regexp /pattern/" +# +# %typedef: score range 1 100 +# -> returns "range 1 100" +# --------------------------------------------------------------------------- +resolve_typedef() { + local block="$1" + local alias="$2" + + echo "$block" | awk -v a="$alias" ' + index($0, "%typedef:") == 1 { + rest = substr($0, length("%typedef:") + 1) + sub(/^[[:space:]]+/, "", rest) + + # parts[1] = alias name, parts[2..] = type definition tokens + n = split(rest, parts, /[[:space:]]+/) + if (parts[1] != a) next + + # Reassemble the definition (everything after the alias name). + defn = "" + for (i = 2; i <= n; i++) { + defn = defn (i==2 ? "" : " ") parts[i] + } + + print defn + exit + } + ' +} + +# --------------------------------------------------------------------------- +# Step 5b: Find the declared type (if any) for a specific field. +# Resolves %typedef: aliases to their underlying type string. +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +# Look up a field's type ONCE and return two tab-separated values: +# +# +# +# - resolved-type : the fully-resolved type string used for VALIDATION +# (e.g. "regexp /^.../", "int", "range 1 100"). +# - display-label : a human-friendly label used for the PROMPT +# (e.g. the alias "phonenum", or "regexp" for an +# inline pattern, or the base type name). +# +# Both fields may be empty if the field has no declared type. +# This performs a single awk pass over the descriptor block. +# --------------------------------------------------------------------------- +get_field_type_info() { + local block="$1" + local field="$2" + + # Single awk pass: pull the raw %type: token(s) for this field. + local raw_type + raw_type=$(echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ') + + # No declared type: emit an empty resolved/label pair. + if [ -z "$raw_type" ]; then + printf '\t' + return 0 + fi + + local base="${raw_type%% *}" + local resolved label + + case "$base" in + int | real | bool | date | line | url | email | uuid | regexp | range) + # Concrete inline type. + resolved="$raw_type" + # Hide the regexp pattern in the label; show just the keyword. + label="$base" + ;; + *) + # Named alias: resolve it (one typedef lookup, only when needed). + local typedef + typedef=$(resolve_typedef "$block" "$base") + if [ -n "$typedef" ]; then + resolved="$typedef" # validate against the underlying type + label="$base" # but display the friendly alias name + else + # Unknown alias: fall back to raw for both. + resolved="$raw_type" + label="$raw_type" + fi + ;; + esac + + printf '%s\t%s' "$resolved" "$label" +} + +# --------------------------------------------------------------------------- +# Helper: test membership of a word in a space-separated list. +# --------------------------------------------------------------------------- +in_list() { + local needle="$1" + shift + local item + for item in "$@"; do + [ "$item" = "$needle" ] && return 0 + done + return 1 +} + +# --------------------------------------------------------------------------- +# ### NEW: Step 6a: Validate (and where possible normalise) a value against +# a recutils field type string. Prints the (possibly normalised) value on +# stdout and returns 0 on success, or prints an error to stderr and returns +# 1 on failure. +# +# Supported types: +# int - integer +# real - floating-point number +# bool - yes/no/true/false/0/1 (normalised to "yes"/"no") +# date - ISO-8601 or free date accepted by `date` +# line - single line (no embedded newlines – always valid) +# url - must start with a recognised scheme +# email - must match user@domain +# uuid - must match 8-4-4-4-12 hex pattern +# regexp /pattern/ - value must match the given ERE +# range [min] [max] - min, max, or both may be omitted +# --------------------------------------------------------------------------- +validate_field_type() { + local value="$1" + local ftype="$2" + + local base_type="${ftype%% *}" + + case "$base_type" in + + int) + if printf '%s' "$value" | grep -qE '^-?[0-9]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected an integer (e.g. 42, -7)." >&2 + return 1 + ;; + + real) + if printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a real number (e.g. 3.14, -2.0, 1e10)." >&2 + return 1 + ;; + + bool) + case "$value" in + 1 | [Yy][Ee][Ss] | [Tt][Rr][Uu][Ee]) + printf 'yes' + return 0 + ;; + 0 | [Nn][Oo] | [Ff][Aa][Ll][Ss][Ee]) + printf 'no' + return 0 + ;; + esac + echo " -> Expected a boolean: yes/no, true/false, or 1/0." >&2 + return 1 + ;; + + date) + if date -d "$value" >/dev/null 2>&1 || + date -j -f "%Y-%m-%d" "$value" "+%Y-%m-%d" >/dev/null 2>&1; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a valid date (e.g. 2024-07-04)." >&2 + return 1 + ;; + + line) + printf '%s' "$value" + return 0 + ;; + + url) + if printf '%s' "$value" | + grep -qiE '^(https?|ftp|ftps|sftp|file)://[^[:space:]]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a URL beginning with http://, https://, ftp://, etc." >&2 + return 1 + ;; + + email) + if printf '%s' "$value" | grep -qE '^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected an e-mail address (e.g. user@example.com)." >&2 + return 1 + ;; + + uuid) + if printf '%s' "$value" | + grep -qiE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a UUID (e.g. 550e8400-e29b-41d4-a716-446655440000)." >&2 + return 1 + ;; + + regexp) + local pattern="${ftype#regexp }" + pattern="${pattern#/}" + pattern="${pattern%/}" + + # ### CHANGED: Guard against empty pattern before attempting match. + if [ -z "$pattern" ]; then + echo " -> Warning: empty regexp in type declaration; skipping validation." >&2 + printf '%s' "$value" + return 0 + fi + + # ### CHANGED: Use awk for ERE matching instead of grep -E for + # consistent behaviour across BSD (macOS) and GNU/Linux platforms. + if printf '%s' "$value" | + awk -v pat="$pattern" '$0 ~ pat { found=1 } END { exit !found }'; then + printf '%s' "$value" + return 0 + fi + echo " -> Value does not match required pattern: /${pattern}/." >&2 + return 1 + ;; + + range) + if ! printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then + echo " -> Expected a number for a range field." >&2 + return 1 + fi + + local lo hi + lo=$(echo "$ftype" | awk '{print $2}') + hi=$(echo "$ftype" | awk '{print $3}') + + if [ -n "$lo" ] && + awk -v v="$value" -v lo="$lo" 'BEGIN { exit !(v < lo) }'; then + echo " -> Value must be >= ${lo}." >&2 + return 1 + fi + if [ -n "$hi" ] && + awk -v v="$value" -v hi="$hi" 'BEGIN { exit !(v > hi) }'; then + echo " -> Value must be <= ${hi}." >&2 + return 1 + fi + printf '%s' "$value" + return 0 + ;; + + *) + printf '%s' "$value" + return 0 + ;; + esac +} + +# --------------------------------------------------------------------------- +# Step 6: Prompt for a single field's value. +# Returns the (possibly normalised) value via stdout. +# Mandatory fields loop until non-empty; typed fields loop until valid. +# --------------------------------------------------------------------------- +prompt_field() { + local field="$1" + local is_mandatory="$2" # "yes" / "no" + local ftype="$3" # resolved type, used for validation + local type_label="$4" # friendly label, used for display + + local label="$field" + local tags="" + [ "$is_mandatory" = "yes" ] && tags="${tags} [MANDATORY]" + [ -n "$type_label" ] && tags="${tags} (type: ${type_label})" + + local value validated + while true; do + printf "%s%s: " "$label" "$tags" >&2 + IFS= read -r value + + # Mandatory-empty check. + if [ "$is_mandatory" = "yes" ] && [ -z "$value" ]; then + echo " -> This field is mandatory and cannot be empty." >&2 + continue + fi + + # Skip type validation for empty optional fields. + if [ -z "$value" ]; then + printf '%s' "$value" + return 0 + fi + + # ### CHANGED: If a type is declared, validate (and possibly normalise) + # the input; re-prompt on failure. + if [ -n "$ftype" ]; then + if validated=$(validate_field_type "$value" "$ftype"); then + printf '%s' "$validated" + else + continue + fi + else + printf '%s' "$value" + fi + return 0 + done +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +main() { + check_recutils + + [ "$#" -ge 1 ] || usage + local recfile="$1" + local rectype="$2" + + if [ ! -f "$recfile" ]; then + echo "Error: recfile '$recfile' does not exist." >&2 + exit 1 + fi + + # ---- Determine record type -------------------------------------------- + local available_types + available_types=$(list_record_types "$recfile") + + if [ -z "$rectype" ]; then + if [ -z "$available_types" ]; then + echo "No %rec: types found. This recfile uses free-form fields." >&2 + else + local -a display_list + local -a name_list + display_list=() + name_list=() + + local _line + while IFS= read -r _line; do + [ -z "$_line" ] && continue + display_list+=("$_line") + local plain="${_line%% \[count:*}" + name_list+=("$plain") + done <&2 + printf "Press Enter to use it, or type a different type: " >&2 + IFS= read -r rectype + [ -z "$rectype" ] && rectype="$only_type" + + else + # ----- Multiple types: single-keypress numbered menu ------- + echo "Available record types:" >&2 + local i=1 + local label + for label in "${display_list[@]}"; do + printf " %d) %s\n" "$i" "$label" >&2 + i=$((i + 1)) + done + + while true; do + # ### CHANGED: -n1 reads exactly one character without + # requiring Enter. -s suppresses echo so we can echo + # the digit ourselves on the same line for clarity. + printf "Select a record type (1-%d): " "$count" >&2 + local choice + IFS= read -r -n1 -s choice + # Echo the character the user pressed so they can see it. + printf '%s\n' "$choice" >&2 + + # Blank (Enter pressed directly) → re-prompt. + if [ -z "$choice" ]; then + echo " -> Please press a digit between 1 and $count." >&2 + continue + fi + + # Reject non-digits immediately. + case "$choice" in + *[!0-9]*) + echo " -> Please press a digit between 1 and $count." >&2 + continue + ;; + esac + + if [ "$choice" -ge 1 ] && [ "$choice" -le "$count" ]; then + rectype="${name_list[$((choice - 1))]}" + break + else + echo " -> Out of range. Choose between 1 and $count." >&2 + fi + done + fi + + echo "Selected record type: $rectype" >&2 + fi + fi + + # ---- Load the descriptor block ---------------------------------------- + local block="" + if [ -n "$rectype" ]; then + block=$(get_descriptor_block "$recfile" "$rectype") + if [ -z "$block" ]; then + echo "Warning: no descriptor block found for type '$rectype'." >&2 + fi + fi + + # ---- Gather field rules ----------------------------------------------- + local mandatory_fields allowed_fields + mandatory_fields=$(extract_fields "$block" "%mandatory:") + allowed_fields=$(extract_fields "$block" "%allowed:") + + echo "" >&2 + echo "----- Building a new record -----" >&2 + [ -n "$rectype" ] && echo "Record type: $rectype" >&2 + echo "" >&2 + + local -a args + args=() + + local prompted="" + + # ---- 1. Prompt for ALLOWED fields (if defined) ------------------------ + if [ -n "$allowed_fields" ]; then + echo "== Allowed fields ==" >&2 + local f + for f in $allowed_fields; do + # ### CHANGED: Skip the %rec: type-name field. When -t is passed + # to recins it writes the %rec: line itself; supplying it again as + # a -f/-v pair produces a malformed record with a duplicate or + # misplaced %rec: field. + if [ -n "$rectype" ] && [ "$f" = "$rectype" ]; then + continue + fi + + local mand="no" + in_list "$f" $mandatory_fields && mand="yes" + local info ftype label + info=$(get_field_type_info "$block" "$f") + ftype="${info%%"$TAB"*}" # everything before the tab -> resolved type + label="${info#*"$TAB"}" # everything after the tab -> display label + + local val + val=$(prompt_field "$f" "$mand" "$ftype" "$label") + if [ -n "$val" ]; then + args+=(-f "$f" -v "$val") + fi + prompted="$prompted $f" + done + fi + + # ---- 2. Prompt for any MANDATORY fields not in the allowed list ------- + if [ -n "$mandatory_fields" ]; then + local f + local printed_header="no" + for f in $mandatory_fields; do + if ! in_list "$f" $prompted; then + if [ "$printed_header" = "no" ]; then + echo "== Mandatory fields ==" >&2 + printed_header="yes" + fi + local info ftype label + info=$(get_field_type_info "$block" "$f") + ftype="${info%%"$TAB"*}" # everything before the tab -> resolved type + label="${info#*"$TAB"}" # everything after the tab -> display label + + local val + val=$(prompt_field "$f" "$mand" "$ftype" "$label") + args+=(-f "$f" -v "$val") + prompted="$prompted $f" + fi + done + fi + + # ---- 3. Prompt for free-form / user-defined fields -------------------- + # ### CHANGED: Only offer free-form entry when no %allowed: constraint + # exists for this record type. If %allowed: is defined, recins will + # reject any field not listed there, so presenting the prompt would + # only allow the user to produce a record that fails on insert. + if [ -z "$allowed_fields" ]; then + echo "" >&2 + echo "== Additional fields ==" >&2 + echo "Enter a field name (or press Enter / type 'done' to finish):" >&2 + while true; do + printf "Field name: " >&2 + local newfield + IFS= read -r newfield + case "$newfield" in + "" | done | DONE | q | quit) break ;; + esac + + local info ftype label + info=$(get_field_type_info "$block" "$newfield") + ftype="${info%%"$TAB"*}" # everything before the tab -> resolved type + label="${info#*"$TAB"}" # everything after the tab -> display label + + local val + val=$(prompt_field "$newfield" "$mand" "$ftype" "$label") + + args+=(-f "$newfield" -v "$val") + done + else + echo "" >&2 + echo "== Additional fields ==" >&2 + echo "(Skipped: this record type has %allowed: constraints.)" >&2 + fi + + # ---- Sanity check: did we collect anything? --------------------------- + if [ "${#args[@]}" -eq 0 ]; then + echo "No fields entered. Aborting." >&2 + exit 1 + fi + + # ---- Build and run recins --------------------------------------------- + echo "" >&2 + echo "Inserting record..." >&2 + + local status + if [ -n "$rectype" ]; then + recins -t "$rectype" "${args[@]}" "$recfile" + status=$? + else + recins "${args[@]}" "$recfile" + status=$? + fi + + if [ "$status" -eq 0 ]; then + echo "Record inserted successfully into '$recfile'." >&2 + else + echo "recins failed (exit code $status)." >&2 + exit "$status" + fi + +} + +main "$@" diff --git a/zrecins-opus-again.org b/zrecins-opus-again.org new file mode 100644 index 0000000..5c11da1 --- /dev/null +++ b/zrecins-opus-again.org @@ -0,0 +1,600 @@ +:PROPERTIES: +:GPTEL_MODEL: claude-opus-4-8 +:GPTEL_BACKEND: Claude +:GPTEL_SYSTEM: Act as a Code Review Specialist. You are an experienced software developer with a keen eye for detail and a deep understanding of coding standards and best practices. \n\nYour task is to review the code provided by the user, focusing on areas such as:\n- Code quality and readability\n- Adherence to coding standards\n- Potential bugs and security vulnerabilities\n- Performance optimization\n\nYou will:\n- Provide constructive feedback on the code\n- Suggest improvements and refactoring where necessary\n- Highlight any security concerns\n- Ensure the code follows best practices\n\nRules:\n- Be objective and professional in your feedback\n- Prioritize clarity and maintainability in your suggestions\n- Consider the specific context and requirements provided with the code +:GPTEL_TOOLS: +:GPTEL_MAX_TOKENS: 100000 +:GPTEL_BOUNDS: ((response (1503 7598) (7606 13674) (13724 13821) (13856 19454) (19485 19519) (19522 23716))) +:END: + +*** Help me update this script. It functions well, but I want to change one aspect. When prompted for a field that is a regexp type, I would like the type name to appear in parentheses instead of the long regex. For example, if I define a the phonenum type with "%typedef: phonenum regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/" then use "%type: phone phonenum" I would prefer the displayed type to say (phonenum). The long regex is distracting and not helpful to me. + +* Code Review & Solution + +This is a well-structured script with good documentation. Your requested change is reasonable—displaying the typedef alias name is far more user-friendly than dumping a raw regex into the prompt. + +** The Core Issue + +Currently, =get_field_type()= resolves the alias to its underlying type string and discards the alias name. By the time =prompt_field()= receives the type, the friendly name (=phonenum=) is gone—only the regex remains. + +To fix this cleanly, we need to *preserve the alias name* for display purposes while *keeping the resolved type* for validation. These are two separate concerns, so I recommend separating them. + +** Recommended Approach + +I suggest adding a small helper that returns the /display label/ for a field, and passing both a display label and the validation type into =prompt_field()=. This keeps validation logic untouched (lower risk) while giving you the display you want. + +*** Step 1: Add a display-name resolver + +Add this function near =get_field_type()=: + +#+begin_src bash +# --------------------------------------------------------------------------- +# Determine a human-friendly type label for display purposes. +# - If %type: names a base type, show that base type. +# - If %type: names a %typedef: alias, show the alias name (NOT its +# expanded definition), which is far more readable for things like +# long regexp patterns. +# Prints the label (may be empty if no type is declared). +# --------------------------------------------------------------------------- +get_field_type_label() { + local block="$1" + local field="$2" + + # Find the raw token(s) %type: declares for this field. + local raw_type + raw_type=$(echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ') + + [ -z "$raw_type" ] && return 0 + + local base="${raw_type%% *}" + case "$base" in + int | real | bool | date | line | url | email | uuid | regexp | range) + # Concrete inline type: show it as-is (already concise). + printf '%s' "$raw_type" + ;; + *) + # Named alias: prefer the alias name itself for display. + if [ -n "$(resolve_typedef "$block" "$base")" ]; then + printf '%s' "$base" + else + printf '%s' "$raw_type" + fi + ;; + esac +} +#+end_src + +*** Step 2: Update =prompt_field()= to accept a display label + +Add a fourth parameter used only for the prompt text: + +#+begin_src bash +prompt_field() { + local field="$1" + local is_mandatory="$2" # "yes" / "no" + local ftype="$3" # resolved type, used for validation + local display_type="$4" # friendly label, used only in the prompt + + # Fall back to the validation type if no display label was provided. + [ -z "$display_type" ] && display_type="$ftype" + + local label="$field" + local tags="" + [ "$is_mandatory" = "yes" ] && tags="${tags} [MANDATORY]" + [ -n "$display_type" ] && tags="${tags} (type: ${display_type})" + + # ... rest unchanged ... +#+end_src + +The validation block inside still uses =$ftype= (the resolved type), so =phonenum= validates against the underlying regex exactly as before. + +*** Step 3: Update the three call sites in =main()= + +In each of the three prompt locations, compute the label alongside the type: + +#+begin_src bash +# --- Allowed fields loop --- +local ftype ftype_label +ftype=$(get_field_type "$block" "$f") +ftype_label=$(get_field_type_label "$block" "$f") +val=$(prompt_field "$f" "$mand" "$ftype" "$ftype_label") +#+end_src + +#+begin_src bash +# --- Mandatory fields loop --- +local ftype ftype_label +ftype=$(get_field_type "$block" "$f") +ftype_label=$(get_field_type_label "$block" "$f") +val=$(prompt_field "$f" "yes" "$ftype" "$ftype_label") +#+end_src + +#+begin_src bash +# --- Additional free-form fields loop --- +local ftype ftype_label +ftype=$(get_field_type "$block" "$newfield") +ftype_label=$(get_field_type_label "$block" "$newfield") +val=$(prompt_field "$newfield" "no" "$ftype" "$ftype_label") +#+end_src + +** Result + +With your example: + +#+begin_src +%typedef: phonenum regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/ +%type: phone phonenum +#+end_src + +The prompt now reads: + +#+begin_src +phone (type: phonenum): +#+end_src + +…while validation still enforces the underlying regex. + +--- + +** Additional Observations (optional, not required for your change) + +A few items worth noting for future maintenance: + +1. *Lost exit status after command substitution* (existing bug): + #+begin_src bash + validated=$(validate_field_type "$value" "$ftype") + if [ $? -ne 0 ]; then + #+end_src + =local validated=$(...)= would mask =$?=, but here you've correctly split the declaration from assignment in =prompt_field=—so this is actually fine. Worth keeping that pattern consistent if you ever refactor. + +2. *Redundant awk invocations.* =get_field_type= and the new =get_field_type_label= share nearly identical awk logic to extract the raw type. If this script grows, consider a single helper that returns the raw =%type:= token, with thin wrappers for "resolved" vs "display" forms—reducing duplication and keeping the two in sync. + +3. *=range= display* is already concise (=range 1 100=), so I deliberately left it as a concrete type in the label resolver. If you later add named range typedefs (e.g. =%typedef: score range 1 100=), the alias-name logic will handle them automatically and show =(type: score)=. + +These are minor and don't block your change. The three-step modification above achieves exactly what you asked with minimal risk to the working validation logic. + +*** + +* Code Review: Displaying Type Aliases Instead of Resolved Regex + +Good request — showing =(phonenum)= is far more user-friendly than dumping a raw regex at the prompt. The challenge is that your current architecture *resolves the alias too early*: =get_field_type= returns the /resolved/ base type (e.g. =regexp /^.../=), so by the time you build the prompt label, the original alias name is already lost. + +** The Core Issue + +You have two competing needs: + +1. *Validation* needs the /resolved/ type string (e.g. =regexp /pattern/=). +2. *Display* wants the /alias/ name (e.g. =phonenum=) when one exists. + +The cleanest fix is to separate the *display name* from the *validation type*, rather than overloading a single return value. + +** Recommended Approach + +*** 1. Add a helper to get the raw (unresolved) declared type + +Extract the lookup logic that's currently buried inside =get_field_type= so you can reuse it: + +#+begin_src bash +# Returns the raw type string as declared in %type: (may be an alias). +get_field_raw_type() { + local block="$1" + local field="$2" + + echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ' +} +#+end_src + +Then refactor =get_field_type= to call it (DRY): + +#+begin_src bash +get_field_type() { + local block="$1" + local field="$2" + + local raw_type + raw_type=$(get_field_raw_type "$block" "$field") + [ -z "$raw_type" ] && return 0 + + local base="${raw_type%% *}" + case "$base" in + int | real | bool | date | line | url | email | uuid | regexp | range) + printf '%s' "$raw_type" + ;; + *) + local resolved + resolved=$(resolve_typedef "$block" "$base") + if [ -n "$resolved" ]; then + printf '%s' "$resolved" + else + printf '%s' "$raw_type" + fi + ;; + esac +} +#+end_src + +*** 2. Compute a display label + +The friendliest behavior: show the alias when one exists, otherwise show the base type (and notably, *avoid showing regexes for inline =regexp= types too*, since you said the long regex is distracting). Add a small helper: + +#+begin_src bash +# Produce a human-friendly type label for prompting. +# - If %type: uses a named alias -> show the alias name. +# - If the resolved type is regexp -> show "regexp" (hide the pattern). +# - Otherwise -> show the base type name. +display_type_label() { + local block="$1" + local field="$2" + + local raw resolved base + raw=$(get_field_raw_type "$block" "$field") + [ -z "$raw" ] && return 0 + + base="${raw%% *}" + case "$base" in + int | real | bool | date | line | url | email | uuid | regexp | range) + # Inline/base type: hide regexp pattern, show just the keyword. + printf '%s' "$base" + ;; + *) + # It's an alias name — show it verbatim if it resolves. + resolved=$(resolve_typedef "$block" "$base") + if [ -n "$resolved" ]; then + printf '%s' "$base" + else + printf '%s' "$raw" + fi + ;; + esac +} +#+end_src + +*** 3. Pass both values into =prompt_field= + +=prompt_field= should validate against the resolved type but /display/ the label. Add a fourth parameter: + +#+begin_src bash +prompt_field() { + local field="$1" + local is_mandatory="$2" # "yes" / "no" + local ftype="$3" # resolved type, used for validation + local type_label="$4" # friendly label, used for display + + local label="$field" + local tags="" + [ "$is_mandatory" = "yes" ] && tags="${tags} [MANDATORY]" + [ -n "$type_label" ] && tags="${tags} (type: ${type_label})" + # ... rest unchanged ... +#+end_src + +*** 4. Update the three call sites + +In each of the three prompting loops in =main=, compute the label alongside the type: + +#+begin_src bash +local ftype label +ftype=$(get_field_type "$block" "$f") +label=$(display_type_label "$block" "$f") + +local val +val=$(prompt_field "$f" "$mand" "$ftype" "$label") +#+end_src + +Apply the same pattern to the mandatory-fields loop and the free-form loop. + +** Result + +With your example: + +#+begin_src +%typedef: phonenum regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/ +%type: phone phonenum +#+end_src + +The prompt now reads: + +#+begin_src +phone (type: phonenum): +#+end_src + +…while validation still runs against the fully resolved =regexp /.../= pattern. Best of both worlds. + +** Additional Observations (not blocking your change) + +1. *=$?= capture bug* — In =prompt_field=: + #+begin_src bash + validated=$(validate_field_type "$value" "$ftype") + if [ $? -ne 0 ]; then + #+end_src + This works /only because/ nothing runs between the assignment and the test. It's fragile. Prefer: + #+begin_src bash + if validated=$(validate_field_type "$value" "$ftype"); then + printf '%s' "$validated" + else + continue + fi + #+end_src + +2. *Repeated =awk= parsing* — =get_field_raw_type= is now called twice per field (once via =get_field_type=, once via =display_type_label=). For small recfiles this is negligible, but if you ever profile this, consider computing both the resolved type and the label in a single helper that returns both (e.g. via a delimiter or two global vars). + +3. *=recins= exit-code capture* — Same =$?= fragility at the end of =main=: + #+begin_src bash + recins -t "$rectype" "${args[@]}" "$recfile" + local status=$? + #+end_src + This is fine as written, but worth keeping consistent with the =if recins ...; then= idiom for robustness. + +These are optional cleanups. The four changes above fully satisfy your stated requirement. + +*** I think you have raised a great point. Let's modify this to compute both the resolved type and the label in a single helper that returns both to prevent repeated awk parsing. + +- Consolidating the Type Lookup into a Single AWK Pass + +Good call. Let's eliminate the redundant parsing by having one helper do the =awk= work once and return /both/ the validation type and the display label. The cleanest, bash-3.2-safe way to "return two values" is to print them on one line with a delimiter the data can't contain, then split with parameter expansion. + +* The Combined Helper + +A tab is the safest delimiter here — recfile field types are whitespace-separated tokens, but a literal /tab/ won't appear inside =%type:= declarations in practice, and we control both halves. + +#+begin_src bash +# --------------------------------------------------------------------------- +# Look up a field's type ONCE and return two tab-separated values: +# +# +# +# - resolved-type : the fully-resolved type string used for VALIDATION +# (e.g. "regexp /^.../", "int", "range 1 100"). +# - display-label : a human-friendly label used for the PROMPT +# (e.g. the alias "phonenum", or "regexp" for an +# inline pattern, or the base type name). +# +# Both fields may be empty if the field has no declared type. +# This performs a single awk pass over the descriptor block. +# --------------------------------------------------------------------------- +get_field_type_info() { + local block="$1" + local field="$2" + + # Single awk pass: pull the raw %type: token(s) for this field. + local raw_type + raw_type=$(echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ') + + # No declared type: emit an empty resolved/label pair. + if [ -z "$raw_type" ]; then + printf '\t' + return 0 + fi + + local base="${raw_type%% *}" + local resolved label + + case "$base" in + int | real | bool | date | line | url | email | uuid | regexp | range) + # Concrete inline type. + resolved="$raw_type" + # Hide the regexp pattern in the label; show just the keyword. + label="$base" + ;; + *) + # Named alias: resolve it (one typedef lookup, only when needed). + local typedef + typedef=$(resolve_typedef "$block" "$base") + if [ -n "$typedef" ]; then + resolved="$typedef" # validate against the underlying type + label="$base" # but display the friendly alias name + else + # Unknown alias: fall back to raw for both. + resolved="$raw_type" + label="$raw_type" + fi + ;; + esac + + printf '%s\t%s' "$resolved" "$label" +} +#+end_src + +* Splitting the Result at the Call Sites + +Use a literal tab in the parameter expansion. To keep it readable and avoid embedding a raw tab in the source, define a tab once near the top of the script: + +#+begin_src bash +# Near the top of the script, after the shebang/comments: +TAB=$(printf '\t') +#+end_src + +Then each call site becomes: + +#+begin_src bash +local info ftype label +info=$(get_field_type_info "$block" "$f") +ftype="${info%%"$TAB"*}" # everything before the tab -> resolved type +label="${info#*"$TAB"}" # everything after the tab -> display label + +local val +val=$(prompt_field "$f" "$mand" "$ftype" "$label") +#+end_src + +Apply the identical three-line split in all three loops (allowed, mandatory, free-form), substituting =$f= / =$newfield= as appropriate. + +#+begin_quote +/Note on quoting:/ =${info%%"$TAB"*}= quotes =$TAB= so the tab is treated literally rather than as a glob pattern. This is correct in bash 3.2. Since both halves always exist (the helper always prints exactly one tab), =%%= and =#= behave predictably even when one side is empty. +#+end_quote + +* Retiring the Now-Redundant Helpers + +With =get_field_type_info= in place, you can /delete/ both: + +- =get_field_type= +- =display_type_label= (the earlier proposal) + +…and you no longer need a standalone =get_field_raw_type=, since its logic now lives inline in the single pass. =resolve_typedef= stays — it's still called (lazily, only for actual aliases). + +* Why This Is Better + +| Aspect | Before | After | +|--------+--------+-------| +| =awk= passes over block (per field) | 2 | 1 | +| =resolve_typedef= calls (per field) | up to 2 | at most 1, only for aliases | +| Source of truth for "raw type" | duplicated in 2 functions | single location | +| Risk of label/type drifting out of sync | real | eliminated | + +The validation behavior is unchanged — =phonenum= still validates against its underlying regex — while the prompt shows the clean =(type: phonenum)= label, and we've halved the parsing work per field. + +* One Caveat Worth Flagging + +If a descriptor block could ever legitimately contain a /tab character/ inside a =%type:= line, this delimiter scheme would break. In standard recutils files that doesn't happen (type tokens are space-separated), so a tab is safe. If you wanted to be bulletproof, you could split on a control character like =$'\x1f'= (ASCII Unit Separator) instead — same mechanism, even less chance of collision. For this script, tab is perfectly adequate. + +*** Finally, help me fix the $?= fragility at the end of =main=. + +- Fixing the =$?= Capture at the End of =main= + +The current end-of-=main= pattern relies on =$?= surviving from one statement to the next, which works here but is fragile — any future edit that slips a command between the =recins= call and the capture will silently break the error handling. Folding the call directly into the =if= condition removes that footgun entirely. + +- The Current Code + +#+begin_src bash + # ---- Build and run recins --------------------------------------------- + echo "" >&2 + echo "Inserting record..." >&2 + if [ -n "$rectype" ]; then + recins -t "$rectype" "${args[@]}" "$recfile" + else + recins "${args[@]}" "$recfile" + fi + + local status=$? + if [ "$status" -eq 0 ]; then + echo "Record inserted successfully into '$recfile'." >&2 + else + echo "recins failed (exit code $status)." >&2 + exit "$status" + fi +#+end_src + +The =$?= here actually reflects the =if=/=fi= block's exit status (which is the exit status of whichever =recins= branch ran), so it /works/ — but only by happy coincidence of nothing intervening. + +- The Fix + +Capture the status immediately after each branch, so it's bound tightly to the command that produced it: + +#+begin_src bash + # ---- Build and run recins --------------------------------------------- + echo "" >&2 + echo "Inserting record..." >&2 + + local status + if [ -n "$rectype" ]; then + recins -t "$rectype" "${args[@]}" "$recfile" + status=$? + else + recins "${args[@]}" "$recfile" + status=$? + fi + + if [ "$status" -eq 0 ]; then + echo "Record inserted successfully into '$recfile'." >&2 + else + echo "recins failed (exit code $status)." >&2 + exit "$status" + fi +#+end_src + +- Why This Is Better + +The =status=$?= now sits on the line /immediately/ following each =recins= invocation, inside the same branch. There is no possibility of an intervening command — and crucially, the intent is now self-evident to anyone editing later: "this captures the exit code of /this/ command." + +#+begin_quote +/Note:/ =local status= is declared once before the =if= so the variable has function scope, but the /assignment/ happens inside each branch right after the relevant call. Splitting the declaration from the assignment also avoids the classic bash pitfall where =local status=$(...)= would mask the command's exit code — though that doesn't apply to a plain =$?= capture, it's a good habit to keep declaration and value-capture separate when =$?= matters. +#+end_quote + +- An Even Cleaner Alternative + +If you'd rather avoid the explicit =status= variable altogether, you can branch on =recins= directly. This requires duplicating the success/failure messaging slightly, or factoring it out: + +#+begin_src bash + # ---- Build and run recins --------------------------------------------- + echo "" >&2 + echo "Inserting record..." >&2 + + run_recins() { + if [ -n "$rectype" ]; then + recins -t "$rectype" "${args[@]}" "$recfile" + else + recins "${args[@]}" "$recfile" + fi + } + + if run_recins; then + echo "Record inserted successfully into '$recfile'." >&2 + else + status=$? + echo "recins failed (exit code $status)." >&2 + exit "$status" + fi +#+end_src + +Here the =if= consumes =recins='s exit status directly, and =$?= inside the =else= branch correctly reflects the failed command (the =else= branch runs /because/ the condition was non-zero, and =$?= still holds that value at the first line of the branch). + +#+begin_quote +/Caveat for the alternative:/ =run_recins= would need access to =rectype=, =args=, and =recfile=. Since these are =local= to =main=, defining =run_recins= /inside/ =main= (as shown) keeps them in scope. If you'd prefer not to nest a function definition inside =main=, stick with the first fix. +#+end_quote + +- Recommendation + +Go with the /first fix/. It's the smallest possible change, requires no nested function or scope juggling, reads clearly, and directly addresses the fragility you identified. The =status=$?= immediately after each call is the idiomatic, defensive pattern for capturing exit codes in bash. + +*** diff --git a/zrecins-opus.org b/zrecins-opus.org new file mode 100644 index 0000000..dc43a91 --- /dev/null +++ b/zrecins-opus.org @@ -0,0 +1,1241 @@ +:PROPERTIES: +:GPTEL_MODEL: claude-sonnet-4-6 +:GPTEL_BACKEND: Claude +:GPTEL_SYSTEM: Act as a Code Review Specialist. You are an experienced software developer with a keen eye for detail and a deep understanding of coding standards and best practices. \n\nYour task is to review the code provided by the user, focusing on areas such as:\n- Code quality and readability\n- Adherence to coding standards\n- Potential bugs and security vulnerabilities\n- Performance optimization\n\nYou will:\n- Provide constructive feedback on the code\n- Suggest improvements and refactoring where necessary\n- Highlight any security concerns\n- Ensure the code follows best practices\n\nRules:\n- Be objective and professional in your feedback\n- Prioritize clarity and maintainability in your suggestions\n- Consider the specific context and requirements provided with the code +:GPTEL_TOOLS: +:GPTEL_MAX_TOKENS: 100000 +:GPTEL_BOUNDS: ((response (1461 23617) (23832 25622) (25728 34443) (34757 39949) (40269 46582) (46681 49037))) +:END: + +*** Help me improve the attached script. I have 3 improvements I would like. First, when the field has a provided type, enforce the correct formatting. Second, when a record uses the %allowed types, do not prompt for the user response field and value, as it creates a malformed call to recins. Third, when the script prompts the user to select the record type, respond immediately to the keypress, do not wait for the user to press enter. + +I'll address each of your three improvements systematically. + +--- + +** Improvement 1: Enforce Field Type Formatting + +The =get_field_type= function already extracts the type string, but =prompt_field= ignores it for validation. We need a validation/coercion layer that understands recutils' built-in types: =int=, =real=, =bool=, =date=, =line=, =url=, =email=, =uuid=, =regexp=, and =range=. + +** Improvement 2: Skip the =%rec:= Response Field + +When =%allowed:= is declared, =recins= handles the =%rec:= type field itself via =-t=. Prompting for it creates a duplicate/malformed call. We need to filter it out of the allowed-fields loop. + +** Improvement 3: Single-Keypress Record Type Selection + +The numbered menu currently waits for =Enter=. We can use =read -n1= (bash built-in, available in 3.2) to respond immediately to a single keypress. + +--- + +Here is the revised script with all three improvements applied, with change markers =### CHANGED:= and =### NEW:= so every edit is easy to locate: + +#+begin_src bash +#!/usr/bin/env bash +# +# recadd - Interactive record inserter for recutils recfiles +# Compatible with bash 3.2.57 (macOS) +# +# Usage: recadd [record-type] +# + +set -o pipefail + +# --------------------------------------------------------------------------- +# Step 0: Verify recutils (recins/recsel) is installed +# --------------------------------------------------------------------------- +check_recutils() { + if ! command -v recins >/dev/null 2>&1; then + echo "Error: 'recins' not found. Please install recutils." >&2 + echo " On macOS: brew install recutils" >&2 + exit 1 + fi + if ! command -v recinf >/dev/null 2>&1; then + echo "Error: 'recinf' not found. Please install recutils." >&2 + exit 1 + fi + + local ver + ver=$(recins --version 2>/dev/null | head -n1) + echo "Using: ${ver:-recutils (version unknown)}" +} + +# --------------------------------------------------------------------------- +# Step 1: Usage / argument handling +# --------------------------------------------------------------------------- +usage() { + echo "Usage: $(basename "$0") [record-type]" >&2 + echo " Interactively build and insert a record into a recfile." >&2 + exit 1 +} + +# --------------------------------------------------------------------------- +# Step 2: Extract the descriptor block for a given record type. +# --------------------------------------------------------------------------- +get_descriptor_block() { + local file="$1" + local rectype="$2" + + awk -v rt="$rectype" ' + BEGIN { RS=""; FS="\n" } + { + for (i = 1; i <= NF; i++) { + if ($i ~ /^%rec:[[:space:]]*/) { + name = $i + sub(/^%rec:[[:space:]]*/, "", name) + sub(/[[:space:]]+$/, "", name) + if (name == rt) { + print $0 + exit + } + } + } + } + ' "$file" +} + +# --------------------------------------------------------------------------- +# Step 3: List all record types defined in the file. +# --------------------------------------------------------------------------- +list_record_types() { + local file="$1" + + if command -v recinf >/dev/null 2>&1; then + recinf "$file" 2>/dev/null | awk ' + { + if (NF == 0) next + name = $NF + if ($1 ~ /^[0-9]+$/) { + printf "%s [count: %s]\n", name, $1 + } else { + print name + } + } + ' + else + grep '^%rec:' "$file" | sed 's/^%rec:[[:space:]]*//; s/[[:space:]]*$//' + fi +} + +# --------------------------------------------------------------------------- +# Step 4: Pull a space-separated list of fields from a directive. +# --------------------------------------------------------------------------- +extract_fields() { + local block="$1" + local directive="$2" + + echo "$block" | awk -v d="$directive" ' + index($0, d) == 1 { + line = substr($0, length(d) + 1) + print line + } + ' | tr '\n' ' ' | sed 's/^[[:space:]]*//; s/[[:space:]]*$//' +} + +# --------------------------------------------------------------------------- +# Step 5: Find the declared type (if any) for a specific field. +# --------------------------------------------------------------------------- +get_field_type() { + local block="$1" + local field="$2" + + echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ' +} + +# --------------------------------------------------------------------------- +# Helper: test membership of a word in a space-separated list. +# --------------------------------------------------------------------------- +in_list() { + local needle="$1" + shift + local item + for item in $*; do + [ "$item" = "$needle" ] && return 0 + done + return 1 +} + +# --------------------------------------------------------------------------- +# ### NEW: Step 6a: Validate (and where possible normalise) a value against +# a recutils field type string. Prints the (possibly normalised) value on +# stdout and returns 0 on success, or prints an error to stderr and returns +# 1 on failure. +# +# Supported types: +# int - integer +# real - floating-point number +# bool - yes/no/true/false/0/1 (normalised to "yes"/"no") +# date - ISO-8601 or free date accepted by `date` +# line - single line (no embedded newlines – always valid) +# url - must start with a recognised scheme +# email - must match user@domain +# uuid - must match 8-4-4-4-12 hex pattern +# regexp /pattern/ - value must match the given ERE +# range [min] [max] - min, max, or both may be omitted +# --------------------------------------------------------------------------- +validate_field_type() { + local value="$1" + local ftype="$2" # e.g. "int", "range 1 10", "regexp /foo/" + + # Extract the base type keyword (first word). + local base_type="${ftype%% *}" + + case "$base_type" in + + int) + # Allow optional leading sign. + case "$value" in + ''|*[!0-9-]*) ;; # fall through to error + *) + # Reject bare "-" or values with "-" not at position 1. + if printf '%s' "$value" | grep -qE '^-?[0-9]+$'; then + printf '%s' "$value" + return 0 + fi + ;; + esac + echo " -> Expected an integer (e.g. 42, -7)." >&2 + return 1 + ;; + + real) + if printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a real number (e.g. 3.14, -2.0, 1e10)." >&2 + return 1 + ;; + + bool) + case "$value" in + # Normalise all truthy spellings to "yes". + 1|[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]) + printf 'yes' + return 0 + ;; + # Normalise all falsy spellings to "no". + 0|[Nn][Oo]|[Ff][Aa][Ll][Ss][Ee]) + printf 'no' + return 0 + ;; + esac + echo " -> Expected a boolean: yes/no, true/false, or 1/0." >&2 + return 1 + ;; + + date) + # Lean on the system `date` command for flexibility; require it + # to produce output without error as the acceptance criterion. + if date -d "$value" >/dev/null 2>&1 || \ + date -j -f "%Y-%m-%d" "$value" "+%Y-%m-%d" >/dev/null 2>&1; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a valid date (e.g. 2024-07-04)." >&2 + return 1 + ;; + + line) + # A "line" type simply forbids embedded newlines; since the shell + # reads one line at a time this is always satisfied. + printf '%s' "$value" + return 0 + ;; + + url) + if printf '%s' "$value" | \ + grep -qiE '^(https?|ftp|ftps|sftp|file)://[^[:space:]]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a URL beginning with http://, https://, ftp://, etc." >&2 + return 1 + ;; + + email) + if printf '%s' "$value" | grep -qE '^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected an e-mail address (e.g. user@example.com)." >&2 + return 1 + ;; + + uuid) + if printf '%s' "$value" | \ + grep -qiE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a UUID (e.g. 550e8400-e29b-41d4-a716-446655440000)." >&2 + return 1 + ;; + + regexp) + # ftype looks like: regexp /pattern/ + # Strip the leading "regexp " and the surrounding delimiters. + local pattern="${ftype#regexp }" # "/pattern/" + pattern="${pattern#/}" # "pattern/" + pattern="${pattern%/}" # "pattern" + if printf '%s' "$value" | grep -qE "$pattern"; then + printf '%s' "$value" + return 0 + fi + echo " -> Value does not match required pattern: /${pattern}/." >&2 + return 1 + ;; + + range) + # ftype may be: "range", "range MIN", "range MIN MAX" + # recutils range is inclusive on both ends. + if ! printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then + echo " -> Expected a number for a range field." >&2 + return 1 + fi + + local lo hi + lo=$(echo "$ftype" | awk '{print $2}') # may be empty + hi=$(echo "$ftype" | awk '{print $3}') # may be empty + + # Use awk for numeric comparison so it works for both int and real. + if [ -n "$lo" ] && \ + awk -v v="$value" -v lo="$lo" 'BEGIN{exit !(v < lo)}'; then + echo " -> Value must be >= ${lo}." >&2 + return 1 + fi + if [ -n "$hi" ] && \ + awk -v v="$value" -v hi="$hi" 'BEGIN{exit !(v > hi)}'; then + echo " -> Value must be <= ${hi}." >&2 + return 1 + fi + printf '%s' "$value" + return 0 + ;; + + *) + # Unknown / enum type or blank – pass through without validation. + printf '%s' "$value" + return 0 + ;; + esac +} + +# --------------------------------------------------------------------------- +# Step 6: Prompt for a single field's value. +# Returns the (possibly normalised) value via stdout. +# Mandatory fields loop until non-empty; typed fields loop until valid. +# --------------------------------------------------------------------------- +prompt_field() { + local field="$1" + local is_mandatory="$2" # "yes" / "no" + local ftype="$3" # may be empty + + local label="$field" + local tags="" + [ "$is_mandatory" = "yes" ] && tags="${tags} [MANDATORY]" + [ -n "$ftype" ] && tags="${tags} (type: ${ftype})" + + local value validated + while true; do + printf "%s%s: " "$label" "$tags" >&2 + IFS= read -r value + + # Mandatory-empty check. + if [ "$is_mandatory" = "yes" ] && [ -z "$value" ]; then + echo " -> This field is mandatory and cannot be empty." >&2 + continue + fi + + # Skip type validation for empty optional fields. + if [ -z "$value" ]; then + printf '%s' "$value" + return 0 + fi + + # ### CHANGED: If a type is declared, validate (and possibly normalise) + # the input; re-prompt on failure. + if [ -n "$ftype" ]; then + validated=$(validate_field_type "$value" "$ftype") + if [ $? -ne 0 ]; then + # validate_field_type already printed the error to stderr. + continue + fi + printf '%s' "$validated" + else + printf '%s' "$value" + fi + return 0 + done +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +main() { + check_recutils + + [ "$#" -ge 1 ] || usage + local recfile="$1" + local rectype="$2" + + if [ ! -f "$recfile" ]; then + echo "Error: recfile '$recfile' does not exist." >&2 + exit 1 + fi + + # ---- Determine record type -------------------------------------------- + local available_types + available_types=$(list_record_types "$recfile") + + if [ -z "$rectype" ]; then + if [ -z "$available_types" ]; then + echo "No %rec: types found. This recfile uses free-form fields." >&2 + else + local -a display_list + local -a name_list + display_list=() + name_list=() + + local _line + while IFS= read -r _line; do + [ -z "$_line" ] && continue + display_list+=("$_line") + local plain="${_line%% \[count:*}" + name_list+=("$plain") + done <&2 + printf "Press Enter to use it, or type a different type: " >&2 + IFS= read -r rectype + [ -z "$rectype" ] && rectype="$only_type" + + else + # ----- Multiple types: single-keypress numbered menu ------- + echo "Available record types:" >&2 + local i=1 + local label + for label in "${display_list[@]}"; do + printf " %d) %s\n" "$i" "$label" >&2 + i=$((i + 1)) + done + + while true; do + # ### CHANGED: -n1 reads exactly one character without + # requiring Enter. -s suppresses echo so we can echo + # the digit ourselves on the same line for clarity. + printf "Select a record type (1-%d): " "$count" >&2 + local choice + IFS= read -r -n1 -s choice + # Echo the character the user pressed so they can see it. + printf '%s\n' "$choice" >&2 + + # Blank (Enter pressed directly) → re-prompt. + if [ -z "$choice" ]; then + echo " -> Please press a digit between 1 and $count." >&2 + continue + fi + + # Reject non-digits immediately. + case "$choice" in + *[!0-9]*) + echo " -> Please press a digit between 1 and $count." >&2 + continue + ;; + esac + + if [ "$choice" -ge 1 ] && [ "$choice" -le "$count" ]; then + rectype="${name_list[$((choice - 1))]}" + break + else + echo " -> Out of range. Choose between 1 and $count." >&2 + fi + done + fi + + echo "Selected record type: $rectype" >&2 + fi + fi + + # ---- Load the descriptor block ---------------------------------------- + local block="" + if [ -n "$rectype" ]; then + block=$(get_descriptor_block "$recfile" "$rectype") + if [ -z "$block" ]; then + echo "Warning: no descriptor block found for type '$rectype'." >&2 + fi + fi + + # ---- Gather field rules ----------------------------------------------- + local mandatory_fields allowed_fields + mandatory_fields=$(extract_fields "$block" "%mandatory:") + allowed_fields=$(extract_fields "$block" "%allowed:") + + echo "" >&2 + echo "----- Building a new record -----" >&2 + [ -n "$rectype" ] && echo "Record type: $rectype" >&2 + echo "" >&2 + + local -a args + args=() + + local prompted="" + + # ---- 1. Prompt for ALLOWED fields (if defined) ------------------------ + if [ -n "$allowed_fields" ]; then + echo "== Allowed fields ==" >&2 + local f + for f in $allowed_fields; do + # ### CHANGED: Skip the %rec: type-name field. When -t is passed + # to recins it writes the %rec: line itself; supplying it again as + # a -f/-v pair produces a malformed record with a duplicate or + # misplaced %rec: field. + if [ -n "$rectype" ] && [ "$f" = "$rectype" ]; then + continue + fi + + local mand="no" + in_list "$f" $mandatory_fields && mand="yes" + local ftype + ftype=$(get_field_type "$block" "$f") + + local val + val=$(prompt_field "$f" "$mand" "$ftype") + if [ -n "$val" ]; then + args+=(-f "$f" -v "$val") + fi + prompted="$prompted $f" + done + fi + + # ---- 2. Prompt for any MANDATORY fields not in the allowed list ------- + if [ -n "$mandatory_fields" ]; then + local f + local printed_header="no" + for f in $mandatory_fields; do + if ! in_list "$f" $prompted; then + if [ "$printed_header" = "no" ]; then + echo "== Mandatory fields ==" >&2 + printed_header="yes" + fi + local ftype + ftype=$(get_field_type "$block" "$f") + local val + val=$(prompt_field "$f" "yes" "$ftype") + args+=(-f "$f" -v "$val") + prompted="$prompted $f" + fi + done + fi + + # ---- 3. Prompt for free-form / user-defined fields -------------------- + echo "" >&2 + echo "== Additional fields ==" >&2 + echo "Enter a field name (or press Enter / type 'done' to finish):" >&2 + while true; do + printf "Field name: " >&2 + local newfield + IFS= read -r newfield + case "$newfield" in + "" | done | DONE | q | quit) break ;; + esac + + local ftype + ftype=$(get_field_type "$block" "$newfield") + local val + val=$(prompt_field "$newfield" "no" "$ftype") + args+=(-f "$newfield" -v "$val") + done + + # ---- Sanity check: did we collect anything? --------------------------- + if [ "${#args[@]}" -eq 0 ]; then + echo "No fields entered. Aborting." >&2 + exit 1 + fi + + # ---- Build and run recins --------------------------------------------- + echo "" >&2 + echo "Inserting record..." >&2 + if [ -n "$rectype" ]; then + recins -t "$rectype" "${args[@]}" "$recfile" + else + recins "${args[@]}" "$recfile" + fi + + local status=$? + if [ "$status" -eq 0 ]; then + echo "Record inserted successfully into '$recfile'." >&2 + else + echo "recins failed (exit code $status)." >&2 + exit "$status" + fi +} + +main "$@" +#+end_src + +--- + +** Summary of Every Change + +*** Improvement 1 — Type enforcement (=validate_field_type=) + +| Type | What is checked | +|---|---| +| =int= | ERE =^-?[0-9]+$= | +| =real= | ERE =^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$= | +| =bool= | yes/no/true/false/1/0 accepted; normalised to =yes=/=no= | +| =date= | Delegated to the system =date= command (GNU =-d= or BSD =-j= tried in sequence) | +| =line= | Always valid (shell read is already line-oriented) | +| =url= | Scheme prefix check via ERE | +| =email= | =user@domain.tld= shape check via ERE | +| =uuid= | 8-4-4-4-12 hex pattern via ERE | +| =regexp /pat/= | Delimiters stripped; value tested with =grep -E= | +| =range [lo] [hi]= | Numeric bounds checked via =awk= (handles both int and real) | +| unknown | Passed through unchanged | + +=prompt_field= now calls =validate_field_type= and re-prompts on failure rather than silently accepting bad data. + +*** Improvement 2 — Skip the record-type field in =%allowed:= + +A single guard added in the allowed-fields loop: +#+begin_src bash +if [ -n "$rectype" ] && [ "$f" = "$rectype" ]; then + continue +fi +#+end_src +=recins -t TYPE= writes the =%rec:= line itself; providing =TYPE= again as =-f TYPE -v …= created a malformed duplicate field. + +*** Improvement 3 — Single-keypress type selection + +=read -n1 -s= reads exactly one character without waiting for =Enter= and without echoing it. The digit is then echoed manually so the user can confirm what they pressed. Non-digit input and out-of-range digits both produce an immediate inline error and re-prompt. + +> *Note:* =-n1= is a bash built-in flag available since bash 2.x, so it is fully compatible with your bash 3.2.57 target. It does *not* work with =/bin/sh=; the =#!/usr/bin/env bash= shebang is required, which the script already uses. + +*** For records where allowed fields are defined, you cannot insert a record with any fields not listed. Make sure that a user cannot insert a custom field in those records that have fields defined by %allowed:. + +Good catch. The "Additional fields" section currently runs unconditionally, so a user can append arbitrary fields even when =%allowed:= strictly limits what the record may contain. The fix has two parts: + +1. *Skip the "Additional fields" loop entirely* when =%allowed:= is non-empty. +2. *Guard the loop header message* so it only prints when the loop will actually run. + +Only the relevant section of =main= needs to change: + +#+begin_src bash + # ---- 3. Prompt for free-form / user-defined fields -------------------- + # ### CHANGED: Only offer free-form entry when no %allowed: constraint + # exists for this record type. If %allowed: is defined, recins will + # reject any field not listed there, so presenting the prompt would + # only allow the user to produce a record that fails on insert. + if [ -z "$allowed_fields" ]; then + echo "" >&2 + echo "== Additional fields ==" >&2 + echo "Enter a field name (or press Enter / type 'done' to finish):" >&2 + while true; do + printf "Field name: " >&2 + local newfield + IFS= read -r newfield + case "$newfield" in + "" | done | DONE | q | quit) break ;; + esac + + local ftype + ftype=$(get_field_type "$block" "$newfield") + local val + val=$(prompt_field "$newfield" "no" "$ftype") + args+=(-f "$newfield" -v "$val") + done + else + echo "" >&2 + echo "== Additional fields ==" >&2 + echo "(Skipped: this record type has %allowed: constraints.)" >&2 + fi +#+end_src + +The =else= branch with the skip message is optional but worth keeping — it tells the user explicitly why they are not being asked, rather than silently omitting a section they might expect to see. + +*** When using this script, I was able to type in a phone number where the type was defined by a regex + +The bug is in =validate_field_type= inside the =regexp= case. The pattern is extracted correctly, but the issue is in how the delimiters are stripped. If the recutils =%type:= declaration looks like: + +#+begin_src +%type: Phone regexp /^[0-9]{3}-[0-9]{3}-[0-9]{4}$/ +#+end_src + +then =ftype= arrives as =regexp /^[0-9]{3}-[0-9]{3}-[0-9]{4}$/= and the stripping logic is: + +#+begin_src bash +local pattern="${ftype#regexp }" # "/^[0-9]{3}-[0-9]{3}-[0-9]{4}$/" +pattern="${pattern#/}" # "^[0-9]{3}-[0-9]{3}-[0-9]{4}$/" +pattern="${pattern%/}" # "^[0-9]{3}-[0-9]{3}-[0-9]{4}$" +#+end_src + +That looks correct in isolation. The actual problem is that =get_field_type= returns only the type portion, but =validate_field_type= receives =ftype= and extracts =base_type= with: + +#+begin_src bash +local base_type="${ftype%% *}" +#+end_src + +When =ftype= is =regexp /pattern/=, =base_type= becomes =regexp= correctly. However, go back one step further to =get_field_type= and look at how it splits the =%type:= line: + +#+begin_src awk +n = split(rest, parts, /[[:space:]]+/) +names = parts[1] +typedef = "" +for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] +} +#+end_src + +This splits on whitespace, so =regexp /^[0-9]{3}-[0-9]{3}-[0-9]{4}$/= becomes =parts[2]=regexp= and =parts[3]=/^[0-9]{3}-[0-9]{3}-[0-9]{4}$/=, which reassembles correctly. The real fault is more subtle: =grep -qE= is being used to test the match, but the pattern is not anchored end-to-end by the =grep= call itself. A value like =abc123-456-7890xyz= would match a phone regexp if the pattern lacks anchors, but more importantly, *if the recutils pattern uses anchors and the value is invalid, =grep -E= on some platforms treats the ={n}= quantifier differently depending on whether the input is ERE or BRE*. + +Run this test to confirm: + +#+begin_src bash +printf '123-456-789' | grep -qE '^[0-9]{3}-[0-9]{3}-[0-9]{4}$' +echo $? # Should be 1 (no match), but may be 0 on BSD grep without -E +#+end_src + +The fix has two parts: + +1. Use =grep -P= where available and fall back to =grep -E=, *or* use =awk= for the match, which has consistent ERE support across platforms including macOS. +2. Add an explicit check that =pattern= is non-empty before attempting the match, so a malformed =%type:= line does not silently pass everything through. + +#+begin_src bash + regexp) + # ftype looks like: regexp /pattern/ + # Strip the leading "regexp " and the surrounding delimiters. + local pattern="${ftype#regexp }" # "/pattern/" + pattern="${pattern#/}" # "pattern/" + pattern="${pattern%/}" # "pattern" + + # ### CHANGED: Guard against a malformed or empty pattern, which + # would otherwise cause grep to match everything and bypass + # validation entirely. + if [ -z "$pattern" ]; then + echo " -> Warning: empty regexp in type declaration; skipping validation." >&2 + printf '%s' "$value" + return 0 + fi + + # ### CHANGED: Use awk for ERE matching instead of grep -E. + # awk's ERE engine is consistent across macOS (BSD) and Linux, + # whereas grep -E handling of {n,m} quantifiers and anchors + # differs between BSD grep and GNU grep, causing valid regexps + # to silently pass on some platforms. + if printf '%s' "$value" | awk -v pat="$pattern" '$0 ~ pat {found=1} END {exit !found}'; then + printf '%s' "$value" + return 0 + fi + echo " -> Value does not match required pattern: /${pattern}/." >&2 + return 1 + ;; +#+end_src + +Here is the same fix shown in full context inside =validate_field_type= so the indentation and surrounding cases are clear: + +#+begin_src bash +validate_field_type() { + local value="$1" + local ftype="$2" + + local base_type="${ftype%% *}" + + case "$base_type" in + + int) + if printf '%s' "$value" | grep -qE '^-?[0-9]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected an integer (e.g. 42, -7)." >&2 + return 1 + ;; + + real) + if printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a real number (e.g. 3.14, -2.0, 1e10)." >&2 + return 1 + ;; + + bool) + case "$value" in + 1|[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]) + printf 'yes' + return 0 + ;; + 0|[Nn][Oo]|[Ff][Aa][Ll][Ss][Ee]) + printf 'no' + return 0 + ;; + esac + echo " -> Expected a boolean: yes/no, true/false, or 1/0." >&2 + return 1 + ;; + + date) + if date -d "$value" >/dev/null 2>&1 || \ + date -j -f "%Y-%m-%d" "$value" "+%Y-%m-%d" >/dev/null 2>&1; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a valid date (e.g. 2024-07-04)." >&2 + return 1 + ;; + + line) + printf '%s' "$value" + return 0 + ;; + + url) + if printf '%s' "$value" | \ + grep -qiE '^(https?|ftp|ftps|sftp|file)://[^[:space:]]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a URL beginning with http://, https://, ftp://, etc." >&2 + return 1 + ;; + + email) + if printf '%s' "$value" | grep -qE '^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected an e-mail address (e.g. user@example.com)." >&2 + return 1 + ;; + + uuid) + if printf '%s' "$value" | \ + grep -qiE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then + printf '%s' "$value" + return 0 + fi + echo " -> Expected a UUID (e.g. 550e8400-e29b-41d4-a716-446655440000)." >&2 + return 1 + ;; + + regexp) + local pattern="${ftype#regexp }" + pattern="${pattern#/}" + pattern="${pattern%/}" + + # ### CHANGED: Guard against empty pattern before attempting match. + if [ -z "$pattern" ]; then + echo " -> Warning: empty regexp in type declaration; skipping validation." >&2 + printf '%s' "$value" + return 0 + fi + + # ### CHANGED: Use awk for ERE matching instead of grep -E for + # consistent behaviour across BSD (macOS) and GNU/Linux platforms. + if printf '%s' "$value" | \ + awk -v pat="$pattern" '$0 ~ pat { found=1 } END { exit !found }'; then + printf '%s' "$value" + return 0 + fi + echo " -> Value does not match required pattern: /${pattern}/." >&2 + return 1 + ;; + + range) + if ! printf '%s' "$value" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then + echo " -> Expected a number for a range field." >&2 + return 1 + fi + + local lo hi + lo=$(echo "$ftype" | awk '{print $2}') + hi=$(echo "$ftype" | awk '{print $3}') + + if [ -n "$lo" ] && \ + awk -v v="$value" -v lo="$lo" 'BEGIN { exit !(v < lo) }'; then + echo " -> Value must be >= ${lo}." >&2 + return 1 + fi + if [ -n "$hi" ] && \ + awk -v v="$value" -v hi="$hi" 'BEGIN { exit !(v > hi) }'; then + echo " -> Value must be <= ${hi}." >&2 + return 1 + fi + printf '%s' "$value" + return 0 + ;; + + *) + printf '%s' "$value" + return 0 + ;; + esac +} +#+end_src + +--- + +*Summary of changes* + +| # | What changed | Why | +|---|---|---| +| 1 | Empty-pattern guard added before the =awk= call | A malformed =%type: Field regexp= with no pattern would cause =awk '$0 ~ ""'= to match everything, silently bypassing validation | +| 2 | =grep -qE= replaced with =awk -v pat="$pattern" '$0 ~ pat'= | =awk='s ERE engine handles ={n}= quantifiers and anchors consistently on both macOS BSD and GNU/Linux; =grep -E= behaviour diverges between the two on interval expressions | + +*** I am not sure if validate_field_type() is detecting regexp defined types correctly. My phonenum type is defined by "%typedef: phonenum regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/" and is not identifying my incorrectly formatted phonenums. The email type is being enforced appropriately. + +The issue is that your type is defined with =%typedef:= (a named/aliased type) rather than =%type:= (an inline type). The =get_field_type= function only scans for =%type:= lines, so it never finds =phonenum= and returns an empty string — meaning =prompt_field= skips validation entirely and accepts anything. + +There are two separate problems to fix: + +1. *=get_field_type= needs to resolve =%typedef:= aliases.* When it finds that a field's type is a named alias like =phonenum=, it must look up that alias in the =%typedef:= declarations to find the underlying type string (e.g. =regexp /^.../=). +2. *The =%typedef:= format uses a three-part line* (=%typedef: name type /pattern/=) rather than the two-part =%type:= format, so the parsing logic differs. + +--- + +First, add a new function =resolve_typedef= that looks up a named type in the =%typedef:= declarations and returns the full type string: + +#+begin_src bash +# --------------------------------------------------------------------------- +# Step 5a: Resolve a named typedef back to its base type string. +# e.g. given alias "phonenum" returns "regexp /^([[:digit:]]{3})...$/" +# --------------------------------------------------------------------------- +resolve_typedef() { + local block="$1" + local alias="$2" + + echo "$block" | awk -v a="$alias" ' + index($0, "%typedef:") == 1 { + # %typedef: aliasname basetype [/pattern/ or args...] + rest = substr($0, length("%typedef:") + 1) + sub(/^[[:space:]]+/, "", rest) + # First token is the alias name. + n = split(rest, parts, /[[:space:]]+/) + if (parts[1] == a) { + # Reassemble everything after the alias name. + result = "" + for (i = 2; i <= n; i++) { + result = result (i==2 ? "" : " ") parts[i] + } + print result + exit + } + } + ' +} +#+end_src + +--- + +Then update =get_field_type= to call =resolve_typedef= when the type it finds is not one of the known base types: + +#+begin_src bash +# --------------------------------------------------------------------------- +# Step 5: Find the declared type (if any) for a specific field. +# Resolves %typedef: aliases to their underlying type string. +# --------------------------------------------------------------------------- +get_field_type() { + local block="$1" + local field="$2" + + # First, find what %type: declares for this field (may be a base type + # or a named alias defined by %typedef:). + local raw_type + raw_type=$(echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ') + + # If nothing was found, return empty. + if [ -z "$raw_type" ]; then + return 0 + fi + + # ### CHANGED: Check whether raw_type is a known base type. If it is + # not, treat it as a named alias and resolve it via %typedef: declarations. + local base="${raw_type%% *}" + case "$base" in + int|real|bool|date|line|url|email|uuid|regexp|range) + # Already a concrete type; return as-is. + printf '%s' "$raw_type" + ;; + *) + # Unknown base: look it up as a %typedef: alias. + local resolved + resolved=$(resolve_typedef "$block" "$base") + if [ -n "$resolved" ]; then + printf '%s' "$resolved" + else + # Alias not found; return the raw value so the caller + # can still display it even if it cannot validate it. + printf '%s' "$raw_type" + fi + ;; + esac +} +#+end_src + +--- + +*Why the email type worked but =phonenum= did not* + +| Declaration style | Example | Parsed by old code | +|---|---|---| +| =%type: Email email= | base type =email= | ✅ found and validated | +| =%type: Phone phonenum= + =%typedef: phonenum regexp /pattern/= | alias =phonenum= | ❌ =get_field_type= returned =phonenum=, =validate_field_type= hit the =*= catch-all and passed everything through | + +The =email= type worked because =email= is a recognised base type keyword that =validate_field_type= handles directly. =phonenum= is an alias, so the old code handed the literal string ="phonenum"= to =validate_field_type=, which fell into the =*)= catch-all case and returned the value unchanged without any validation. + +The new =get_field_type= resolves the alias chain before returning, so =validate_field_type= always receives a concrete type string like =regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/= regardless of whether the field used =%type:= with a base type directly or with a named =%typedef:= alias. + +*** For clarity, the type is defined with "%typedef: phonenum regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/" but then the phonenum type is later used in the line "%type: phone phonenum". Additionally, ranges are defined in "%typedef:" with the format "%typedef: " + +Good clarification. There are two distinct parsing problems to address: + +1. *=%typedef:= alias resolution* — when =get_field_type= returns =phonenum=, the new =resolve_typedef= function looks it up and returns =regexp /^([[:digit:]]{3})\-([[:digit:]]{3})\-([[:digit:]]{4})$/= correctly. That part of the previous answer was right. + +2. *=range= via =%typedef:=* — the previous answer assumed ranges always come from =%type: field range MIN MAX=. But your format is =%typedef: fieldname LOWER UPPER= with no =range= keyword at all. The current =validate_field_type= will never see the word =range= as =base_type= for these fields, so they fall through to the =*= catch-all and pass everything unchecked. + +--- + +The fix requires changes in two places. + +*=resolve_typedef=* must detect when a typedef is a range (two numeric tokens, no keyword) and normalise it into the =range MIN MAX= string that =validate_field_type= already understands, rather than returning the raw two-token string which has no recognisable keyword. + +*=get_field_type=* stays the same as the previous answer — it calls =resolve_typedef= when the raw type is not a known base type. + +--- + +#+begin_src bash +# --------------------------------------------------------------------------- +# Step 5a: Resolve a named typedef back to its base type string. +# +# Handles three typedef forms: +# +# %typedef: phonenum regexp /pattern/ +# -> returns "regexp /pattern/" +# +# %typedef: score range 1 100 +# -> returns "range 1 100" +# +# %typedef: myage 0 120 +# -> two bare numbers, no keyword; normalised to "range 0 120" +# so that validate_field_type sees the expected format. +# --------------------------------------------------------------------------- +resolve_typedef() { + local block="$1" + local alias="$2" + + echo "$block" | awk -v a="$alias" ' + index($0, "%typedef:") == 1 { + rest = substr($0, length("%typedef:") + 1) + sub(/^[[:space:]]+/, "", rest) + + # parts[1] = alias name, parts[2..] = type definition tokens + n = split(rest, parts, /[[:space:]]+/) + if (parts[1] != a) next + + # Reassemble the definition (everything after the alias name). + defn = "" + for (i = 2; i <= n; i++) { + defn = defn (i==2 ? "" : " ") parts[i] + } + + # ### CHANGED: Detect a bare numeric range (two tokens, both + # numbers, no leading keyword). recutils allows: + # %typedef: age 0 120 + # which means "range 0 to 120". Normalise it to "range 0 120" + # so validate_field_type receives a consistent format. + if (n == 3 && parts[2] ~ /^-?[0-9]+(\.[0-9]+)?$/ \ + && parts[3] ~ /^-?[0-9]+(\.[0-9]+)?$/) { + print "range " parts[2] " " parts[3] + exit + } + + # ### CHANGED: Also normalise an explicit "range MIN MAX" typedef + # in case the user wrote the keyword explicitly. + if (parts[2] == "range") { + print defn + exit + } + + # All other forms (regexp, bool, etc.) are returned verbatim. + print defn + exit + } + ' +} +#+end_src + +--- + +#+begin_src bash +# --------------------------------------------------------------------------- +# Step 5: Find the declared type (if any) for a specific field. +# Resolves %typedef: aliases to their underlying type string. +# --------------------------------------------------------------------------- +get_field_type() { + local block="$1" + local field="$2" + + # Find what %type: declares for this field. + # The value may be a base type keyword or a named %typedef: alias. + local raw_type + raw_type=$(echo "$block" | awk -v f="$field" ' + index($0, "%type:") == 1 { + rest = substr($0, length("%type:") + 1) + sub(/^[[:space:]]+/, "", rest) + n = split(rest, parts, /[[:space:]]+/) + names = parts[1] + typedef = "" + for (i = 2; i <= n; i++) { + typedef = typedef (i==2 ? "" : " ") parts[i] + } + m = split(names, namearr, /,/) + for (j = 1; j <= m; j++) { + if (namearr[j] == f) { + print typedef + exit + } + } + } + ') + + if [ -z "$raw_type" ]; then + return 0 + fi + + # If raw_type is a known base type keyword, return it directly. + # Otherwise treat it as a named alias and resolve via %typedef:. + local base="${raw_type%% *}" + case "$base" in + int|real|bool|date|line|url|email|uuid|regexp|range) + printf '%s' "$raw_type" + ;; + *) + # ### CHANGED: Resolve the alias. resolve_typedef normalises + # bare numeric ranges and regexp patterns into the canonical + # forms that validate_field_type expects. + local resolved + resolved=$(resolve_typedef "$block" "$base") + if [ -n "$resolved" ]; then + printf '%s' "$resolved" + else + # Alias not found in %typedef:; return raw so the caller + # can still display the type name even without validation. + printf '%s' "$raw_type" + fi + ;; + esac +} +#+end_src + +--- + +*Summary of what each case now produces end-to-end* + +| recfile declaration | =%type:= line | =get_field_type= returns | =validate_field_type= receives | +|---|---|---|---| +| =%typedef: phonenum regexp /^.../= | =%type: phone phonenum= | =regexp /^.../= | validated against ERE via =awk= | +| =%typedef: score range 1 100= | =%type: Score score= | =range 1 100= | bounds checked via =awk= | +| =%typedef: age 0 120= | =%type: Age age= | =range 0 120= | bounds checked via =awk= | +| /(inline)/ | =%type: Count int= | =int= | integer regex check | +| /(inline)/ | =%type: Email email= | =email= | email shape check | + +The key invariant is that =validate_field_type= only ever needs to handle the canonical forms (=regexp /pat/=, =range [lo] [hi]=, =int=, etc.). All alias resolution and format normalisation is handled upstream in =resolve_typedef= and =get_field_type=, keeping the validation logic clean and easy to extend. + +*** I'm wrong, the range type is defined as "%typedef: FIELD range LOWER UPPER" as you thought. + +In that case the previous answer already handles the explicit =range= keyword path correctly via the =parts[2] == "range"= branch. The bare two-number detection added in the previous answer was the only thing that was unnecessary. The fix is simply to remove that dead branch from =resolve_typedef= to keep the code clean and avoid silently misinterpreting a typedef that happens to have two numeric tokens for some other reason: + +#+begin_src bash +# --------------------------------------------------------------------------- +# Step 5a: Resolve a named typedef back to its base type string. +# +# Handles two typedef forms: +# +# %typedef: phonenum regexp /pattern/ +# -> returns "regexp /pattern/" +# +# %typedef: score range 1 100 +# -> returns "range 1 100" +# --------------------------------------------------------------------------- +resolve_typedef() { + local block="$1" + local alias="$2" + + echo "$block" | awk -v a="$alias" ' + index($0, "%typedef:") == 1 { + rest = substr($0, length("%typedef:") + 1) + sub(/^[[:space:]]+/, "", rest) + + # parts[1] = alias name, parts[2..] = type definition tokens + n = split(rest, parts, /[[:space:]]+/) + if (parts[1] != a) next + + # Reassemble the definition (everything after the alias name). + defn = "" + for (i = 2; i <= n; i++) { + defn = defn (i==2 ? "" : " ") parts[i] + } + + # ### CHANGED: Removed the bare two-number heuristic from the + # previous answer. recutils %typedef: range always uses the + # explicit "range" keyword, so all forms are returned verbatim + # and validate_field_type receives a consistent format without + # any guesswork. + print defn + exit + } + ' +} +#+end_src + +The =get_field_type= function and =validate_field_type= function are unchanged from the previous answer. The full resolution chain for your two cases now looks like this: + +| recfile declaration | =%type:= line | =resolve_typedef= returns | =validate_field_type= receives | +|---|---|---|---| +| =%typedef: phonenum regexp /^.../= | =%type: phone phonenum= | =regexp /^.../= | validated against ERE via =awk= | +| =%typedef: score range 1 100= | =%type: Score score= | =range 1 100= | bounds checked via =awk= | + +***