#!/bin/bash

## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

set -o errexit
set -o nounset
set -o errtrace
set -o pipefail
shopt -s inherit_errexit
shopt -s shift_verbose

# shellcheck disable=SC1091
{
  source /usr/libexec/helper-scripts/log_run_die.sh
  source /usr/libexec/helper-scripts/has.sh
  source /usr/libexec/helper-scripts/not_as_root.sh
  source /usr/libexec/helper-scripts/strings.bsh
}

collect_reqs() {
  local collect cmd
  collect=""
  for cmd in "${@}"; do
    if ! has "$cmd"; then
      collect="${collect:+$collect }$cmd"
    fi
  done
  if test ${#collect} -gt 0; then
    die 1 "${FUNCNAME[0]}: command(s) unavailable: '$collect'" >&2
  fi
}
collect_reqs retry curl jq safe-rm str_replace stprint realpath sponge

check_vars_exist() {
  local var_name
  for var_name in "$@"; do
    check_variable_name "${var_name}" || exit 1
    if [ -z "${!var_name+x}" ]; then
      die 1 "Variable '$var_name' is not set or is empty."
      exit 1
    fi
  done
}

## When DRY_RUN=true, log the would-be write and exit 0 before the API call.
dry_run_skip() {
  if [ "${DRY_RUN:-false}" = "true" ]; then
    log info "[dry-run] would $1 -- skipping server write."
    exit 0
  fi
}

missing_variable() {
  log error "MISSING VARIABLE: $*"
  exit 1
}

error_output() {
  stecho "error_msg: '${1:-none}'
WIKI_URL: '${WIKI_URL-unset}'
WIKI_INDEX: '${WIKI_INDEX-unset}'
TMPFOLDER: '${TMPFOLDER-unset}'
counter_currently: '${counter_currently-unset}'
counter_total: '${counter_total-unset}'
counter_chunk: '${counter_chunk-unset}'
TITLE: '${TITLE-unset}'
page_title: '${page_title-unset}'
page: '${page-unset}'
page_to_approve: '${page_to_approve-unset}'
item_from_all_pages: '${item_from_all_pages-unset}'
mw_file: '${mw_file-unset}'
file_path: '${file_path-unset}'
new_file: '${new_file-unset}'
output_file: '${output_file-unset}'
###" >&2
}

error_handler() {
  local last_exit_code="$?"
  log error "
BASH_COMMAND: '$BASH_COMMAND'
failed with exit code '$last_exit_code'." >&2
  error_output "called-by-error_handler"
  exit 1
}

trap error_handler ERR

## Decide whether the current iteration item should be processed,
## based on a --continue-from value. Manages a caller-supplied state
## variable that flips to "yes" once the target is reached and stays
## "yes" for all subsequent iterations.
##
## Modes:
##   empty CONTINUE_FROM  -> always process (returns 0 immediately).
##   integer N            -> skip until index >= N (1-based, inclusive).
##   other string         -> skip until title matches case-insensitively.
##
## Usage:
##   should_start_processing INDEX TITLE CONTINUE_FROM STATE_VAR_NAME
##   Caller must initialise STATE_VAR_NAME to "no" before the loop.
##   Returns 0 to process the item, 1 to skip it.
should_start_processing() {
  local index="$1" title="$2" continue_from="$3"
  local -n _ssp_state="$4"

  if [ -z "$continue_from" ]; then
    return 0
  fi

  if [ "$_ssp_state" = "yes" ]; then
    return 0
  fi

  if [[ "$continue_from" =~ ^[0-9]+$ ]]; then
    if (( index >= continue_from )); then
      _ssp_state="yes"
      return 0
    fi
    return 1
  fi

  if [ "${title,,}" = "${continue_from,,}" ]; then
    _ssp_state="yes"
    return 0
  fi
  return 1
}

## --- resumable batch state -------------------------------------------
## Optional per-item done/fail tracking for the batch scripts, so a long
## run can (a) continue past per-item failures (--keep-going) instead of
## aborting on the first one, and (b) be resumed (--retry) after a partial
## run without redoing already-completed items.
##
## State lives in a TSV under TMPFOLDER (override with --state-file). One
## "<status>\t<title>" record per line, appended as items finish; on load
## the last record for a title wins. MediaWiki titles cannot contain a tab
## or a newline, so the format is unambiguous, and the file is parsed with
## 'IFS=$'\t' read -r' only (never source/eval/glob), so a hostile title in
## the file cannot lead to code execution.
##
## Defaults below are overridden by each script's option parser.
KEEP_GOING="false"
RETRY="false"
STATE_RESET="false"
STATE_FILE=""
declare -A STATE_DONE_SET=()
state_done_count=0
state_fail_count=0
state_skipped_count=0
state_failed_titles=()

## State tracking is active only when the caller opted in via --keep-going
## or --retry; otherwise every state_* helper is a no-op and the default
## fail-fast behaviour is preserved. It is also disabled under DRY_RUN: a
## preview never actually writes to the server, so recording items as
## "done" would poison a later real --retry run.
state_enabled() {
  [ "${DRY_RUN:-false}" != "true" ] || return 1
  [ "${KEEP_GOING}" = "true" ] || [ "${RETRY}" = "true" ]
}

## Resolve STATE_FILE (unless given via --state-file), honour --reset, and
## load any prior state. Call once, after wiki-config is sourced.
## Usage: state_setup KEY   (KEY is a stable per-target string, e.g. WIKI_URL)
state_setup() {
  local key sanitized status title file_done file_not_done

  state_enabled || return 0

  key="${1:-default}"
  if [ -z "${STATE_FILE}" ]; then
    sanitized="$(printf '%s' "${key}" | LC_ALL=C tr -c 'A-Za-z0-9._-' '_')"
    STATE_FILE="${TMPFOLDER}/state/${0##*/}__${sanitized}.tsv"
  fi
  mkdir --parents -- "${STATE_FILE%/*}"

  if [ "${STATE_RESET}" = "true" ] && [ -f "${STATE_FILE}" ]; then
    safe-rm -f -- "${STATE_FILE}"
    log info "STATE | --reset: cleared prior state file '${STATE_FILE}'."
  fi

  if [ -f "${STATE_FILE}" ]; then
    while IFS=$'\t' read -r status title; do
      [ -n "${title}" ] || continue
      STATE_DONE_SET["${title}"]="${status}"
    done < "${STATE_FILE}"
  fi

  ## Compact the loaded last-wins state back to disk so the file does not
  ## grow without bound across repeated --retry runs. sponge reads its
  ## input fully before writing, so replacing the same file is safe.
  if [ "${#STATE_DONE_SET[@]}" -gt 0 ]; then
    for title in "${!STATE_DONE_SET[@]}"; do
      printf '%s\t%s\n' "${STATE_DONE_SET[${title}]}" "${title}"
    done | sponge "${STATE_FILE}"
  fi

  file_done=0
  file_not_done=0
  if [ "${#STATE_DONE_SET[@]}" -gt 0 ]; then
    for status in "${STATE_DONE_SET[@]}"; do
      if [ "${status}" = "done" ]; then
        file_done=$((file_done + 1))
      else
        file_not_done=$((file_not_done + 1))
      fi
    done
  fi

  log info "STATE | enabled (keep_going=${KEEP_GOING} retry=${RETRY})"
  log info "STATE | file: ${STATE_FILE}"
  if [ "${file_done}" -gt 0 ] || [ "${file_not_done}" -gt 0 ]; then
    log info "STATE | prior state: ${file_done} done, ${file_not_done} not-done."
    if [ "${RETRY}" = "true" ]; then
      log info "STATE | --retry: will skip the ${file_done} already-done item(s)."
    else
      log info "STATE | no --retry: all items processed (pass --retry to skip done items)."
    fi
  fi
}

## Return 0 (skip this item) when --retry is set and the item is already
## recorded done; return 1 (process it) otherwise.
state_should_skip() {
  local title="$1"

  [ "${RETRY}" = "true" ] || return 1
  if [ "${STATE_DONE_SET[${title}]-}" = "done" ]; then
    state_skipped_count=$((state_skipped_count + 1))
    return 0
  fi
  return 1
}

state_record_done() {
  local title="$1"

  state_done_count=$((state_done_count + 1))
  if state_enabled; then
    printf '%s\t%s\n' "done" "${title}" >> "${STATE_FILE}"
    STATE_DONE_SET["${title}"]="done"
  fi
}

state_record_fail() {
  local title="$1" context="${2:-}"

  state_fail_count=$((state_fail_count + 1))
  state_failed_titles+=("${title}")
  if state_enabled; then
    printf '%s\t%s\n' "fail" "${title}" >> "${STATE_FILE}"
    STATE_DONE_SET["${title}"]="fail"
  fi
  if [ -n "${context}" ]; then
    log warn "STATE | fail: '${title}' (${context})"
  fi
}

## Per-item failure handler. Records the failure, then either continues
## (--keep-going) or aborts fail-fast (default). On abort it prints the
## summary first so the operator sees the state-file path for --retry.
## Usage: state_handle_failure TITLE [CONTEXT]
state_handle_failure() {
  local title="$1" context="${2:-}"

  state_record_fail "${title}" "${context}"
  if [ "${KEEP_GOING}" = "true" ]; then
    log warn "STATE | keep-going past failure on '${title}'."
    return 0
  fi
  log error "Failed on '${title}'${context:+ (${context})}. Pass --keep-going to continue past per-item failures."
  state_finish
  exit 1
}

## End-of-run summary. Lists this run's failures and points at the state
## file plus the --retry hint. No-op unless state tracking is enabled.
state_finish() {
  local title

  state_enabled || return 0
  log info "STATE | summary: done=${state_done_count} failed=${state_fail_count} skipped-already-done=${state_skipped_count}"
  if [ "${state_fail_count}" -gt 0 ]; then
    log warn "STATE | ${state_fail_count} item(s) failed this run:"
    for title in "${state_failed_titles[@]}"; do
      log warn "STATE |   fail: '${title}'"
    done
    log warn "STATE | state file: ${STATE_FILE}"
    log warn "STATE | re-run the same command with --retry to retry failed/unfinished items."
  fi
}

exit_handler() {
  local exit_code="$?"
  trap - INT HUP ABRT QUIT EXIT
  if [ "$exit_code" = "0" ]; then
    log info "END: with OK exit code: '$exit_code'"
  else
    log error "END: with ERROR exit code: '$exit_code'"
  fi
  exit "$exit_code"
}

set_curl_binary_default() {
  if has scurl; then
    curl=scurl
  else
    curl=curl
  fi
}

set_backup_page_item() {
  local backup_page_item

  backup_page_item="$(stecho "$1")"
  if [ -z "$backup_page_item" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Encode page name into a filesystem-safe filename component using
  ## standard percent-encoding via Python's urllib.parse.quote.
  ## - Spaces become underscores (MediaWiki convention, same as git-mediawiki).
  ## - '/' becomes '%2F' (preserves subpage structure for round-trip).
  ## - '%', '&', '#', '?' etc. are percent-encoded.
  ## - '_', '.', '-', '~', ':' are left as-is (safe in filenames).
  ## Round-trips with decode_backup_page_item / mw-urlencode --decode-filename-to-page.
  backup_page_item="$(mw-urlencode --encode-page-to-filename "$backup_page_item")"

  check_is_not_empty_and_only_one_line backup_page_item
  if ! validate_safe_filename backup_page_item; then
    log error "${FUNCNAME[0]}: validate_safe_filename backup_page_item failed. Checking '$backup_page_item' for unicode using unicode-show for debugging purposes."
    printf '%s\n' "--- unicode-show start:"
    printf '%s\n' "$backup_page_item" | unicode-show
    printf '%s\n' "--- End of unicode-show."
    return 1
  fi

  stecho "$backup_page_item"
}

set_backup_filename_item() {
  local backup_filename_item

  backup_filename_item="$1"

  if [ -z "$backup_filename_item" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Same format as git-mediawiki: add '.mw' file extension.
  ## '/' is already encoded as '%2F' by set_backup_page_item /
  ## mw-urlencode --encode-page-to-filename, so no extra replacement needed.
  backup_filename_item="${backup_filename_item}.mw"

  check_is_not_empty_and_only_one_line backup_filename_item
  if ! validate_safe_filename backup_filename_item; then
    log error "${FUNCNAME[0]}: validate_safe_filename backup_filename_item failed."
    printf '%s\n' "$backup_filename_item" | unicode-show
    return 1
  fi

  stecho "$backup_filename_item"
}

## WARNING: decoded output restores '/' characters. The result must
## only be used as a MediaWiki API page title parameter, NEVER to
## construct local file paths (directory traversal risk).
decode_backup_page_item() {
  local decoded

  decoded="$(stecho "$1")"
  if [ -z "$decoded" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Standard percent-decoding via Python's urllib.parse.unquote.
  ## Reverses set_backup_page_item / mw-urlencode --encode-page-to-filename.
  mw-urlencode --decode-filename-to-page "$decoded"
}

decode_backup_filename_item() {
  local filename decoded

  filename="$(stecho "$1")"
  if [ -z "$filename" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Remove .mw extension.
  decoded="${filename%.mw}"

  decode_backup_page_item "$decoded"
}

## Defense-in-depth against directory traversal.
## Verify that a target file path is contained within the expected
## base directory. Catches path traversal via '..', symlinks, or any
## encoding bypass.
##
## Uses the conventional canonicalize-then-check-prefix approach:
## realpath resolves '..', symlinks, and redundant slashes into an
## absolute path, then a prefix match ensures containment.
## '-m' (--canonicalize-missing) is required because the target file
## typically does not exist yet at the time of the check.
##
## Usage: assert_path_within_dir BASE_DIR TARGET_PATH
assert_path_within_dir() {
  local base_dir target_path resolved_base resolved_target

  base_dir="$1"
  target_path="$2"

  if [ -z "$base_dir" ]; then
    die 1 "${FUNCNAME[0]}: base_dir is empty!"
  fi

  if [ -z "$target_path" ]; then
    die 1 "${FUNCNAME[0]}: target_path is empty!"
  fi

  resolved_base="$(realpath -- "$base_dir")"
  resolved_target="$(realpath -m -- "$target_path")"

  case "$resolved_target" in
    "$resolved_base"/*)
      return 0
      ;;
    *)
      die 1 "${FUNCNAME[0]}: path traversal blocked! target: '$resolved_target' is outside base: '$resolved_base'"
      ;;
  esac
}

curl_get_output_parameter() {
  local loop_counter loop_max
  loop_counter=0
  loop_max=50

  output_file=""

  while true; do
    (( loop_counter++ )) || true
    if (( loop_counter >= loop_max )); then
      die 1 "${FUNCNAME[0]}: loop_max reached!"
    fi

    case "${1-}" in
      "")
        break
        ;;
      --)
        shift || true
        break
        ;;
      --output)
        if [ -z "${2-}" ]; then
          die 1 "${FUNCNAME[0]}: --output given but missing value"
        fi
        output_file="$2"
        shift 2 || true
        ;;
      --output=*)
        output_file="${1#--output=}"
        shift || true
        ;;
      -o)
        if [ -z "${2-}" ]; then
          die 1 "${FUNCNAME[0]}: -o given but missing value"
        fi
        output_file="$2"
        shift 2 || true
        ;;
      -o*)
        ## supports: -oFILE
        output_file="${1#-o}"
        shift || true
        ;;
      *)
        shift || true
        ;;
    esac
  done
}

curl_run() {
  local curl_run_no_encode args url curl_exit_code encoded_url output_file
  local created_temp_output output_is_stdout

  curl_get_output_parameter "$@"

  created_temp_output="false"
  output_is_stdout="false"

  ## If caller didn't provide an output file, capture to a temp file
  if [ -z "${output_file-}" ]; then
    created_temp_output="true"
    output_file="$(mktemp -t curl_run.XXXXXXXX)" || die 1 "${FUNCNAME[0]}: mktemp failed"
  else
    ## If caller explicitly targets stdout, don't try to rm/touch/stcat it
    case "$output_file" in
      "-"|/dev/stdout|/dev/fd/1|/proc/self/fd/1)
        output_is_stdout="true"
        ;;
    esac
  fi

  if [ "$output_is_stdout" != "true" ]; then
    safe-rm -f -- "$output_file"
    touch -- "$output_file"
  fi

  args=( "$@" )
  url="${args[$((${#args[@]} - 1))]}"

  [[ -v curl_run_no_encode ]] || curl_run_no_encode=""

  if [ "$curl_run_no_encode" = "true" ]; then
    encoded_url="$url"
  else
    encoded_url="$(mw-urlencode "$url")"
  fi

  curl_exit_code=0

  ## Rebuild args: exclude last param (url), then append encoded url.
  ## If we created a temp output, inject --output <temp> so we can capture it.
  if [ "$created_temp_output" = "true" ]; then
    args=("${@:1:$#-1}" "--output" "$output_file" "$encoded_url")
  else
    args=("${@:1:$#-1}" "$encoded_url")
  fi

  log_run debug "$curl" "${args[@]}" || curl_exit_code="$?"

  if [ "$curl_exit_code" != "0" ]; then
    log error "curl failed with exit code '$curl_exit_code': url: '$encoded_url' output_file: '${output_file-}'"
    if [ "$output_is_stdout" != "true" ] && [ -n "${output_file-}" ]; then
      stcat "$output_file" >&2 || true
    fi
    die 1 'curl failure, cannot continue.'
  fi

  # If caller didn't specify --output, behave like curl and print to stdout
  if [ "$created_temp_output" = "true" ]; then
    cat -- "$output_file"
  fi
}

trap exit_handler INT HUP ABRT QUIT EXIT

[[ -v TMPFOLDER ]] || TMPFOLDER="$HOME/mediawiki-shell-temp"
[[ -v USERDOMAIN ]] || USERDOMAIN=""
[[ -v cookie_jar ]] || cookie_jar="$TMPFOLDER/wiki-cookiejar"
[[ -v curl ]] || set_curl_binary_default
[[ -v curl_opts ]] || curl_opts=(
  "--fail"
  "--no-progress-meter"
  "--show-error"
  "--retry-connrefused"
  "--retry" "10"
  "--retry-delay" "5"
  "--cookie" "nocache=true"
  "--user-agent" "mediawiki-shell"
)

umask 077
mkdir --parents -- "$TMPFOLDER"
chmod og-rw -- "$TMPFOLDER"
