#!/usr/bin/bash
# Copyright 2015-2025 Chad D. Lemmen https://www.lemmen.com
#
################################################################################
# File:     govtalk 
# Date:     2015-08-07
# Author:   Chad Lemmen https://www.stansoft.org
# Purpose:  UK Real Time Information (RTI)
#           Submit XML files to HMRC transaction engine
#           https://www.gov.uk/government/collections/real-time-information-online-internet-submissions-support-for-software-developers
#
# History:  2025-08-04 CDL - When doing a re-poll search in all sub-directories.
#

if [ $# = 0 ]; then
    echo "Usage: $0 filename [CorrelationID]"
    exit 1
fi

xf=$1
cid=$2
tmp=/tmp
fpath=$tmp/ss-govtalk-$USER
timestamp=$(date +%Y%m%d-%H%M%S)
ack="$fpath"/ack$timestamp.xml
res="$fpath"/res$timestamp.xml
msg="$fpath"/msg$timestamp.xml
# UK Government Gateway live submissions for PAYE
gwurl='https://transaction-engine.tax.service.gov.uk/submission'
# To use the test system export SSDEVGW=1
if [ -n "$SSDEVGW" ]; then
  gwurl='https://test-transaction-engine.tax.service.gov.uk/submission'
fi

mkdir -p "$fpath"

# Requirements to generate an IRmark:
# https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/366490/irmark_step_by_step_govtalk.pdf
# The IRmark is generated from everything inside and including
# the <Body></Body> node. When you extract the body you must
# "inherit" any and all namespace declarations in the <GovTalkMessage>
# node and place them in the <Body> node.
# e.g. <Body xmlns="http://www.govtalk.gov.uk/CM/envelope">
# Before generating the IRmark, the IRmark node must be removed from the
# <Body>. Any data around the IRmark opening and closing tags e.g. white space,
# line-endings, tabs etc must be preserved.
# * canonicalise (c14n) the XML document
#   xmllint --c14n file.xml
# * generate a 160-bit binary secure hash from the canonicalise XML
#   using the SHA-1 algorithm
# * encode the binary data using base-64 to produce a 28 character string
#   openssl dgst -binary -sha1 file | openssl enc -base64


submit_poll() {
  # Get the Pollinterval attribute node value and poll url
  pi=$(xmllint --xpath "string(//*[name()='ResponseEndPoint']/@PollInterval)" "$ack")
  pollurl=$(xmllint --xpath "//*[local-name()='ResponseEndPoint']/text()" "$ack")

  class=$(xmllint --xpath "//*[local-name()='MessageDetails']/*[local-name()='Class']/text()" "$ack")

  count=1
  rescode=

  while [ -z $rescode ]; do # is null
    echo "Attempt $count to poll server: $pollurl"

    # Wait the value of Pollinterval before sending the submission_poll message
    sleep $pi

    # Send submission_poll message to get the status of the submission
    curl -s -d @- $pollurl <<EOD | xmllint --format - > "$res"
<?xml version="1.0"?>
<GovTalkMessage xmlns="http://www.govtalk.gov.uk/CM/envelope">
  <EnvelopeVersion>2.0</EnvelopeVersion>
  <Header>
    <MessageDetails>
      <Class>$class</Class>
      <Qualifier>poll</Qualifier>
      <Function>submit</Function>
      <CorrelationID>$cid</CorrelationID>
      <Transformation>XML</Transformation>
    </MessageDetails>
  </Header>
  <GovTalkDetails>
    <Keys/>
  </GovTalkDetails>
</GovTalkMessage>
EOD

    # Success Response
    rescode=$(xmllint --xpath "string(//*[local-name()='IRmarkReceipt']/*[local-name()='Message']/@code)" "$res")

    if [ -z $rescode ]; then
      # Error Response
      rescode=$(xmllint --xpath "string(//*[local-name()='GovTalkErrors']/*[local-name()='Error']/*[local-name()='Number']/text())" "$res")
    fi

    if [ $count -gt 3 ]; then # give up
      break
    fi

    let count=count+1
  done

  if [ -n "$rescode" ]; then
    # Get the receipt text
    receipt=$(xmllint --xpath "//*[local-name()='IRmarkReceipt']/*[local-name()='Message']/text()" "$res")
    echo "$receipt" > "$fpath"/gtrec
  fi

  case "$rescode" in
    0000)
      # Submission was successful
      ret=0
      ;;
    "")
      # Unable to get poll result
      ret=2 
      ;;
    *)
      # There was some error
      ret=1
  esac

  # Save a copy of the xml message file, which will include the IRmark
  if [ -n "$SSXMLSAVE" ]; then
    cp $xf $msg
  fi

  exit $ret
}

# CorrelationID was passed to the script, this must be a re-poll request
if [ -n "$cid" ]; then
  # Search in all directories not just the current user
  allpath=$(echo "$fpath" | sed "s/-$USER/-/") # 2025-08-04 CDL

  # Get the acknowledgement filename
  ack=$(grep -l "$cid" "$allpath"*/ack*.xml)

  if [ -z "$ack" ]; then
    echo "Unable to get acknowledgement filename"
    sleep 2
    exit -1
  fi

  submit_poll
fi

# Calculate the IRmark
#
# When you extract the body you must 'inherit' any and all namespace
# declarations in the <GovTalkMessage> node and place them in the <Body> node.
#   xmlstarlet can do this: xmlstarlet sel -t -c "//*[local-name()='Body']"
# Remove IRmark, preserving leading white space. 
ns=$(xmllint --xpath "namespace-uri(/*)" "$xf")
sedcmd=(sed -e "s%<Body>.*$%<Body xmlns=\"$ns\">%" -e 's/<IRmark.*//' "$xf")
# There should be no newline at end of the xml file when generating
# the IRmark, the following xmllint command extracts the Body
# without the newline. With libxml 2.9.9 the end of file newline
# is not stripped so we need to use printf, which removes it.
xmllintcmd=(xmllint --xpath "//*[local-name()='Body']" --c14n -)
sslcmd=(openssl dgst -binary -sha1)
sslcmd2=(openssl enc -base64)

# The IRmark hash should only be on the Body and excluding IRmark
# printf removes the end of file newline
irmark=`"${sedcmd[@]}" | "${xmllintcmd[@]}" | printf %s "$(cat -)" | "${sslcmd[@]}" | "${sslcmd2[@]}"`

# Insert the generated IRmark, using % as sed separator in case $irmark
# contains a slash
sed -i "s%\(<IRmark Type=\"generic\">\)\(<\/IRmark>\)%\1${irmark}\2%" "$xf"

# Send the submission_request xml file to HMRC
echo "Sending data to server: $gwurl"
curl -s --data-binary @"$xf" "$gwurl" | xmllint --format - > "$ack"

if [ $? -ne 0 ]; then
  # error sending data
  exit 7
fi

# Get the CorrelationID from the submission_acknowledgement message
cid=$(xmllint --xpath "//*[local-name()='CorrelationID']/text()" "$ack")


# Save CorrelationID for 4GL import to re-poll if needed
if [ -d /tmp ]; then
  echo "$cid" > "$fpath"/gtcid
else
  echo "Directory /tmp does not exist, you must create it"
  sleep 1
fi

# Delete old files older than 60 days
find "$fpath" -mtime +60 -exec rm {} \;

# Send submission_poll
submit_poll

