#!/bin/sh
# Name: print-to-pdf
# Type: create-pdf-with-ghostscript
# DPI: 600
#
#############################################################################
#
# This is a work in progress. It's basic aim is to provide a mechanism to
# feed HylaFax without a fax client software by just printing "into it"
# and using a special PPD to choose options.
#
# It may be either called as a notification script in EtherShare 2.5 or
# above, as a filter in a lpd compatible spooling system (so it can be used
# with CAP, Netatalk or remote LPR printing from macs), as a CUPS backend
# filter (so you can use it locally on MacOS X, any other CUPS platform or
# on a CUPS server to provide services network wide) and finally with Netatalk
# directly from papd. To do the latter, add some lines to your papd.conf, eg.:
#
#     Print to PDF:\
#         :pr=|/usr/local/bin/print-to-pdf.sh:\
#         :pd=/usr/local/share/ghostscript/7.31/lib/destiller.ppd:\
#         :op=lp:
# 
# (Don't know whether you can run CAP's lwsrv in a similar fashion with eg.
# 'lwsrv [...] -n "Print to PDF" -p |/usr/local/bin/print-to-pdf.sh')
#
# If the script will be called from EtherShare < 3.1 then $2 will be treated
# as source PS file. When using EtherShare 3.1 or above then it will look for
# the file $HELIOS_DFFILE is referring to. Otherwise it assumes, that the job
# is on stdin:
#
# The postscript job (stdin, $xy) will be saved in $TMP_FILE, some informations
# from the header will be saved in $HEADER_FILE, some information will be
# converted from MacRoman encoding to Latin1 (you will need GNU recode for
# this -- adjust the path below) and the job will be piped through GhostScript
# and after the conversion to PDF saved to a file in $TARGET_DIR, the name
# derived from the '%%Title' comment, truncated to $MAX_FILENAME_LENGTH and
# in case a file with that name already existed appended with a numerical
# suffix. 
#
# Note: In an EtherShare environment we simply trust on the capabilities
# of the spooling system's backend and we just use the name of the input
# file and change the extension to .pdf only (so we don't have to deal
# with UTF-8 or hex-style namespace differentiation)
#
# Actual, on Netatalk systems we have to deal with different encodings,
# but we don't do that -- instead we make an assumption... ;-)
# 
# To see correct filenames on the mac clients the volume containing 
# $TARGET_DIR should be set to Latin1 encoding, that means your 
# AppleVolumes file should read like
#
# 	/raid/hotfolder     codepage:maccode.iso8859-1     "Hotfolder"
#
# To get correct file type / creator code combinations you should have a
# proper configuration in your AppleVolumes.system, eg.:
#
# 	.pdf	"PDF "	"CARO"
#
# In case, you want to test what this script does, assign the paths to fit
# your needs, set DEBUG=TRUE and pipe a postscript file, created on a mac,
# through the script, eg.:
#
#	cat test.ps | /usr/local/bin/print-to-pdf.sh
#
# If $MAIL=TRUE and the %%Routing DSC comment in the PostScript header
# isn't empty (Laserwriter 8.5.1 to 8.7.1 will fill in the email address 
# of the active 'Internet Config' configuration) then the PDF will be
# emailed to this address. This functionality needs the mimencode program
# so ensure that it is available and the $MIMEENCODE path points to it.
#
# If you have the excellent xpdf package installed then you might want to
# define $PDF2TEXT so that it points to pdftotext from this package. In
# such case the complete text of your printjob will be included in the 
# body of the email so you can do a full-text search in each of your FAX
# documents from within your email client.
#
#############################################################################
#
# author: Thomas Kaiser <mailto:print-to-pdf@kaiser-edv.de>
# license: GPL - http://www.gnu.org/copyleft/gpl.html
# url: http://users.phg-online.de/tk/netatalk/scripts/print-to-pdf.sh.gz
# date: 18 Oct 2002 (v.0.0.6)
#
#############################################################################
#
# Changes:
# 0.0.1 initial release
# 0.0.2 added support for MIME encoding, email submission and text
#       extraction
# 0.0.3 added support for EtherShare
# 0.0.4 added support for win/unix lineendings. Improved GS error handling
# 0.0.5 removed creation of date header when sending mail due to LOCALE
#       problems. Added support for recent HELIOS versions. Added a switch
#       DEL_WHEN_MAILED to force the deletion of temporary files if the 
#       resulting PDF has been successfully emailed to the creator
# 0.0.6 added basic support for CUPS. Modified the script to be able
#       to add other backends more easily in the future
#
#############################################################################
#
# Non-Warranty: 
# This script comes with absolutely no warranty.
#
# As there is only very limited checking in the code and nearly no error
# handling at the moment, this script will open several security holes
# (handling temporary files, etc.) Use in a production environment isn't
# recommended at all.
# 
# Use at your own risk!
#
#############################################################################
 
# define some paths
 
MAC2LATIN1="/usr/bin/recode applemac..latin1"   # convert MacRoman --> Latin1
QP="/usr/bin/recode latin1..latin1/QP"   # convert Latin1 --> QuotedPrintable
GREP="/usr/bin/grep -a"           # '-a' means 'treat binary files like text'
GS="/usr/bin/gs"                             # path to the GhostScript binary
GS_VERSION=`$GS --version`   # check GS version to create appropriate DOCINFO
GS_FONTPATH=/usr/share/ghostscript/fonts            # where are the GS fonts?
export GS_FONTPATH    # in case you use another shell, you might use 'setenv'

# GS_OPTIONS are the parameters, GS is called with. If you're using an older
# GhostScript version then you might want to suppress the advanced settings
# like -dPDFSETTINGS, -dProcessColorModel and -dCompatibilityLevel...
# For further info: http://www.ghostscript.com/doc/gnu/7.05/Ps2pdf.htm
 
GS_OPTIONS="-q -dNOPAUSE -dBATCH -r600 -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress -dProcessColorModel=/DeviceCMYK -dCompatibilityLevel=1.3"

# the next 4 lines are only needed if you set $MAIL=TRUE

SENDMAIL="/usr/sbin/sendmail -t -i"    # how the MTA will be called
MIMEENCODE=/usr/bin/mimencode    # path to the mimencode binary
PDF2TEXT=/usr/bin/pdftotext    # path to the famous pdftotext binary
ADMIN_ADDRESS=root@localhost    # adjust this!

# the following three lines set up the paths where temporary and final
# files will be stored -- adjust to your needs!
 
TMP_FILE=/tmp/temporary-ps-$$.ps
HEADER_FILE=/tmp/$$.header   # Should not be inside an EtherShare volume
TARGET_DIR=/Users/tk/Documents

# some user definable settings

MAX_FILENAME_LENGTH=31   # Longer filenames will be truncated to this value
# MAIL=FALSE    # Send the PDF as a base64 encoded MIME-attachment to $MAILTO
MAIL=TRUE     # Send the PDF as a base64 encoded MIME-attachment to $MAILTO
DEL_WHEN_MAILED=TRUE   # Remove the local pdf file when succesfully emailed
DEBUG=TRUE                           # set to TRUE to get some debug output
DEBUG_LOG=/Users/tk/Documents/logs/debug.txt    # where to write messages to
GS_ERRORS=/tmp/gs-errors-$$.txt          # where should GS write stderr to?

# How are we called? What is our environment?
#
# Check whether we are called as a CUPS backend script, a HELIOS
# notification script, from within the samba spool system or ???

SPOOLSYSTEM=HELIOS
test "X$HELIOS_PRINTERNAME" = "X" && SPOOLSYSTEM=LPD
echo `dirname $0` | grep -q "/cups/backend$" && SPOOLSYSTEM=CUPS

# we'll do some preprocessing and ensure, that the printjob will be saved in $TMP_FILE

case ${SPOOLSYSTEM} in
	CUPS)
		if [ ${#} = 0 ]; then # CUPS is calling us to determine which services we can provide
			echo "direct hylafax \"Unknown\" \"HylaFax FAX Queuer Agent\""
			echo "direct hylafax://localhost?dpi=196 \"Unknown\" \"Local FAX Queuer - high resolution\""
			echo "direct hylafax://localhost?dpi=98 \"Unknown\" \"Local FAX Queuer - low resolution\""
			exit 0
		elif [ ${#} = 5 ]; then	
			# Get print file from stdin; copies have already been handled...
			cat >"$TMP_FILE"   # redirect stdin in $TMP_FILE
			copies=1
			# If we get data from stdin we have to delete tmpfiles ourself
			DEL_AFTER_PRINT=TRUE
		elif [ ${#} = 6 ]; then
			TMP_FILE="$6"	# use the last argument as $TMP_FILE
			DEL_AFTER_PRINT=FALSE
		else
			exit 1
		fi
                CP=/bin/cp; MV=/bin/mv; RM=/bin/rm; CHMOD=/bin/chmod
		;;
	HELIOS)
		# Check whether we're called from the 4th generation EtherShare or not
		if [ -r /etc/HELIOSInstallPath ]; then
			read HELIOSDIR </etc/HELIOSInstallPath
			DT="${HELIOSDIR}/bin/dt"
			TMP_FILE="$HELIOS_DFFILE"
		else
			ESDIR=/usr/local/es   # adjust if necessary!
			DT="${ESDIR}/dt"
			TMP_FILE="${2}"
		fi
		CP="${DT} cp"; MV="${DT} mv -k"; RM="${DT} rm"; CHMOD="${DT} chmod"
		DEL_AFTER_PRINT=TRUE
		;;
	LPD)
		cat >"$TMP_FILE"   # redirect stdin in $TMP_FILE
		CP=/bin/cp; MV=/bin/mv; RM=/bin/rm; CHMOD=/bin/chmod
		DEL_AFTER_PRINT=TRUE
		;;
esac

# some environment tests

if test "`echo -e "\060"`" = "0"
then
        EE='-e'   # we have to use the -e switch to let 'echo' deal correctly
else              # with quoted text
        EE=''
fi

# check whether we can process $TMP_FILE to extract job metadata

if [ ! -r "${TMP_FILE}" ] ; then
	exit 1
else
	# Extracting the PostScript header into $HEADER_FILE and extract 
	# relevant informations:

	# DSC_VERSION -- the version of Adobe(R) Document Structure 
	#                Conventions the file claims to be
	# TITLE -- the title of the document
	# PAGES -- count of pages (according to the relevant DSC comment)
	# MAILTO -- to whom should the PDF and/or error messages be mailed?
	# CREATOR -- with which app has the PostScript been generated?
	# CREATIONDATE -- when has it been printed (local time on the mac)
	# OWNER -- who has created it (owner name from filesharing control
	#          panel or full name when EtherShare is able to resolve it)

	if [ ${SPOOLSYSTEM} = HELIOS ]; then
		head "$TMP_FILE" | sed 's/)$//g' >"$HEADER_FILE"
		PAGES=$HELIOS_JOBPAGES
		OWNER="$HELIOS_USER"
	else
		$GREP "^%\!PS" "$TMP_FILE" | tr "\015" "\012" | sed 's/)$//g' >"$HEADER_FILE"
		test `wc -l "$HEADER_FILE" | awk '{print $1}'` -le 3 && head "$TMP_FILE" | tr -d "\015" >"$HEADER_FILE"
		PAGES=`$GREP "^%%Pages" "$HEADER_FILE" | cut -b9- | sed 's/^(//g'`
		# check whether the %%Pages DSC comment is written in the header
		# or in the trailer (at the end of the printjob)
		if [ ! "X`echo $PAGES | tr -d [:digit:]`" = "X" ] ; then
			PAGES=`tail "$TMP_FILE" | tr "\015" "\012" | sed 's/)$//g' | $GREP "^%%Pages" | cut -b10-`
		fi
		OWNER=`$GREP "^%%For" "$HEADER_FILE" | cut -b8- | sed 's/^(//g'`
	fi

	DSC_VERSION=`$GREP "^%\!PS" "$HEADER_FILE"`
	MAILTO=`$GREP "^%%Routing" "$HEADER_FILE" | cut -b23- | sed 's/^(//g'`
	if [ ! $MAILTO ] ; then          # if there is no %%Routing DSC comment
		MAILTO=$ADMIN_ADDRESS    # then use the ADMIN_ADDRESS instead
	fi
	RAW_CREATOR=`$GREP "^%%Creator" "$HEADER_FILE" | cut -b12- | sed 's/^(//g'`
	CREATOR=`echo $EE "$RAW_CREATOR" | $MAC2LATIN1 | tr -d "\015"`
	RAW_CREATIONDATE=`$GREP "^%%CreationDate" "$HEADER_FILE" | cut -b17- | sed 's/^(//g'`
	CREATIONDATE=`echo $EE "$RAW_CREATIONDATE" | $MAC2LATIN1 | tr -d "\015"`

	case ${SPOOLSYSTEM} in
		CUPS)
			TITLE="$3"
			;;
		HELIOS)
			RAW_TITLE="$HELIOS_JOBTITLE"
			TITLE=`echo $EE "$RAW_TITLE" | $MAC2LATIN1 | tr -d "\015"`
			;;
		LPD)
			RAW_TITLE=`$GREP "^%%Title" "$HEADER_FILE" | cut -b10- | sed 's/^(//g'`
			TITLE=`echo $EE "$RAW_TITLE" | $MAC2LATIN1 | tr -d "\015"`
			;;
	esac
fi

test $DEBUG = TRUE && echo $EE "\nExtracted information: \n\n\tEtherShare environment: $HELIOS \n\tRouting: $MAILTO \n\tTitle: $TITLE ($HELIOS_JOBTITLE)\n\tFor: $OWNER ($HELIOS_JOBFOR/$HELIOS_USER)\n\tPages: $PAGES \n\tCreator: $CREATOR \n\tCreationDate: $CREATIONDATE\n" >"$DEBUG_LOG"

# Create a DOCINFO pdfmark and an annotation on the front page with
# relevant submission information

DOCINFO="/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse \r[ /Title ($TITLE)\r/Author ($OWNER)\r/Creator ($CREATOR)\r/Producer (GhostScript $GS_VERSION)\r/DOCINFO pdfmark \r[ /PageMode /UseNone /DOCVIEW pdfmark \r[ {Catalog} << /PageLayout /SinglePage >> /PUT pdfmark \r[ {Catalog} <</ViewerPreferences << /FitWindow true >> >> /PUT pdfmark\r[ /Rect [75 500 375 750 ]\r/Open true\r/Title (Submission information)\r/Contents (Faksimile status for job $TITLE\\\r\\\rSender: $OWNER -- mailto:$MAILTO\\\rRecipient: \\\rPrinting time: $CREATIONDATE\\\rQueueing time: `date` (on `hostname`)\\\r\\\rA total of $PAGES pages have been queued)\r/Color [1 0 0 ]\r/ANN pdfmark"

# Clear error log and create PDF file at the proper location

PS_ERROR=FALSE
test -f "$GS_ERRORS" && $RM "$GS_ERRORS"
if [ ${SPOOLSYSTEM} = HELIOS ]; then
	TARGET_FILE="$TARGET_DIR"/`basename "$TMP_FILE" .ps`.pdf
	$DT touch "$TARGET_FILE"
	$DT set -t "PDF " -c "CARO" -ai "$TARGET_FILE"
	(echo $EE "$DSC_VERSION\r$DOCINFO\r\c" ; cat "$TMP_FILE" ) | $GS $GS_OPTIONS -sOutputFile="$TARGET_FILE" - 2>&1 > "$GS_ERRORS" || PS_ERROR=TRUE
	$CHMOD 666 "$TARGET_FILE"
	$DT set -a-i "$TARGET_FILE"
	$DT upd "$TARGET_FILE"
	test $DEBUG = TRUE && echo $EE "filename $TARGET_FILE used. Finished" >>"$DEBUG_LOG"
else
	SUFFIX=""
	COUNTER=0
	while true   # we loop until we find a filename that doesn't already exist
	do
		SUFFIX_LENGTH=`echo $SUFFIX | wc -c`
		MAX_LENGTH=`expr $MAX_FILENAME_LENGTH - 3 - $SUFFIX_LENGTH`
		TARGET_FILE="$TARGET_DIR"/`echo $TITLE | tr "[/][:]" "_" | cut -b-$MAX_LENGTH`"$SUFFIX".pdf
		test $DEBUG = TRUE && echo $EE "filename $TARGET_FILE\c" >>"$DEBUG_LOG"
		if [ -f "$TARGET_FILE" ] ; then   # check whether target file exists
			COUNTER=`expr $COUNTER + 1`
			SUFFIX=".$COUNTER"
			test $DEBUG = TRUE && echo $EE " already exists. Skipping" >>"$DEBUG_LOG"
		else    # let's go: insert the DOCINFO and annotation and pipe the stuff to GS
			(echo $EE "$DSC_VERSION\r$DOCINFO\r\c" ; cat "$TMP_FILE" ) | $GS $GS_OPTIONS -sOutputFile="$TARGET_FILE" - 2>&1 > "$GS_ERRORS" || PS_ERRORS=TRUE
			$CHMOD 666 "$TARGET_FILE"
                	test $DEBUG = TRUE && echo $EE " used. Finished" >>"$DEBUG_LOG"
			break
		fi
	done
fi

# check whether the PDF should be emailed to the sender...

# According to RFCs 821/822 and 2045-2049 we will have to do some
# formatting to get SMTP/MIME compliance:
# Some strings have to be converted to QuotedPrintable encoding and
# the PDF should be encoded into Base64 via mimencode

if [ $MAIL = TRUE ] ; then
	QP_TITLE="=?ISO-8859-1?Q?`echo $EE "$TITLE" | $QP`?="
	echo $EE "X-Mailer: print-to-pdf\nFrom: \"Print to PDF\" <$ADMIN_ADDRESS>\nTo: <$MAILTO>\nMime-version: 1.0\nX-Priority: 3\nSubject: [FAX] $QP_TITLE\nContent-type: multipart/mixed;\n   boundary=\"Print-to-PDF_3057413257_738494_MIME_Part\"\n" >"$HEADER_FILE"
	echo $EE "This is a multi-part message in MIME format. If your mailer doesn't support MIME you should upgrade to a more recent version.\n\n--Print-to-PDF_3057413257_738494_MIME_Part\nContent-type: text/plain; charset=\"ISO-8859-1\"\nContent-transfer-encoding: 8bit\n" >>"$HEADER_FILE"

	# check whether errors occured while running GhostScript

	if [ "$PS_ERRORS" = TRUE ]; then
		echo $EE "*** WARNING ***\n\nA problem occured with your printjob. See error-messages below:\n\n-----------------------------------------------\n" >>"$HEADER_FILE"
		cat "$GS_ERRORS" >>"$HEADER_FILE"
		echo $EE "\n-----------------------------------------------\n\nWe try to continue with text extraction..." >>"$HEADER_FILE"
	fi

	# check whether we can extract the text with pdftotext, too

	if [ -x $PDF2TEXT ] ; then
		$PDF2TEXT "$TARGET_FILE" - | uniq | tr "\014" "\012" >>"$HEADER_FILE"
	else   # if $PDF2TEXT isn't executable we write a lame message in the body instead
		echo $EE "Attached your print job \"$TITLE\"\n" >>"$HEADER_FILE"
	fi
	echo $EE "\n--Print-to-PDF_3057413257_738494_MIME_Part\nContent-type: application/pdf; name=\""$QP_TITLE".pdf\";\n x-mac-creator=\"4341524F\";\n x-mac-type=\"50444620\";\nContent-disposition: attachment\nContent-transfer-encoding: base64\n" >>"$HEADER_FILE"
	cat "$TARGET_FILE" | $MIMEENCODE >>"$HEADER_FILE"
	echo $EE "\n--Print-to-PDF_3057413257_738494_MIME_Part--\n" >>"$HEADER_FILE"
	cat "$HEADER_FILE" | $SENDMAIL
	STATUS=$?
	if [ $STATUS -eq 0 -a $DEL_WHEN_MAILED=TRUE ]; then
		$RM "$TARGET_FILE"
	fi
fi

# delete temporary files

test ${DEL_AFTER_PRINT} = TRUE && $RM "$TMP_FILE"
$RM "$HEADER_FILE"
$RM "$GS_ERRORS"
exit 0

