#!/bin/sh

# Author: John Eikenberry <jae@zhar.net>
# License: CC0 <http://creativecommons.org/publicdomain/zero/1.0/>

# (27.08.07) Fixed problem detecting html from munpack info
# (01.10.07) Didn't remove tmp files on error condition.
# (09.10.07) Replaced 'dc' subshell with shell arithmetic
# (31.10.16) Fix issue with content-id spanning multiple lines

# Given an html email (either as body or attached as multi-part) it will unpack
# the html and all the attachments. It rewrites the Content-ID resource
# locators with the unpacked filenames so you can see attached images and such.
#
# Also accepts 'safe' argument. When received it kills all external URLs (for
# images, javascript, etc). I use this when I want to see the html but don't
# trust the source.
#
# All temp files are cleaned up.
#
# This was developed on a Debian system and might have some Debian assumptions
# baked in (like the x-www-browser alternative).
#
# It uses things from the mpack and procmail debian packages.
#
# To use from mutt include this keybinding in the approparite config file.
# Replace <F> keys with whatever you like.
#
## view an html email with firefox
#macro  index  <F12>  "<copy-message>/tmp/tmpbox\n<enter><shell-escape>mutt-view-html\n" "View HTML in browser"
#macro  pager  <F12>  "<copy-message>/tmp/tmpbox\n<enter><shell-escape>mutt-view-html\n" "View HTML in browser"
#
## safe version (no external images loaded)
#macro  index  <F11>  "<copy-message>/tmp/tmpbox\n<enter><shell-escape>mutt-view-html safe\n" "View (safe) HTML in browser"
#macro  pager  <F11>  "<copy-message>/tmp/tmpbox\n<enter><shell-escape>mutt-view-html safe\n" "View (safe) HTML in browser"

tmpdir=/tmp/mutthtmltmpdir
mbox=/tmp/tmpbox

mkdir -p $tmpdir
cd $tmpdir
file_info=`munpack -q -t -C $tmpdir $mbox`

trap 'cleanup' 0
cleanup () {
    [ $? -eq 0 ] && sleep 2 # give browser a chance to run
    rm -f $files $mbox
    [ -d $tmpdir ] && rmdir $tmpdir
}

if [ "$file_info" = "Did not find anything to unpack from $mbox" ]; then
    cat $mbox | formail -x content-type: | grep -q text/html
    if [ $? -eq 0 ]; then
        file_info="part1 (text/html)"
        # sed here strips out mail headers
        cat $mbox | sed -e '1,/^$/ d' > part1
    fi
fi

echo $file_info | grep -iq text/html
if [ $? -ne 0 ]; then
    echo "No html found!"
    rmdir $tmpdir
    rm $mbox
    exit 1
fi

file_html=`echo $file_info | sed 's/.*\(part[0-9]\+\) (text\/html).*/\1/g'`
files=`echo $file_info | sed 's/\([^()]\+\) ([^()]\+)/\1/g'`
# need list of datafiles in case attachments are unnamed
data_files=`echo "$file_info" | grep -v "\(text/.*\)" | \
        sed 's/\([^()]\+\) ([^()]\+)/\1/g'`

html_file=$file_html.html
mv $file_html $html_file
files=`echo $files | sed "s/$file_html/$html_file/"`

#/usr/bin/tidy -mq -wrap 0 $html_file 2> /dev/null

if [ -n "$html_file" ]; then
    grep -qi src=\"cid: $html_file
    if [ $? -eq 0 ]; then
        # tr there because munpack replaces ' ' with 'X'
        filenames=`grep -i content-disposition $mbox \
            | tr ' ' 'X' \
            | sed -e 's/.*filename="\([^"]\+\)"/\1/'`
        if [ -z $filesnames ]; then
            # unnamed file attachments :P
            filenames="$data_files"
        fi
        # only fix image refereneces
        # include 10 as content-disposition is sometimes long
        # first sed handles Content-IDs that are too long and put on next line
        cids=`grep -A 10 -i "content-type: image/" $mbox \
            | sed -e '/^Content-ID:$/ {N;s|Content-ID:\n|Content-ID: |;}'\
            | grep Content-ID \
            | sed -e 's/Content-ID:\s\+<\([^<>]\+\)>/\1/'`
        echo $cids

        count=1
        for fn in $filenames; do
            cid=`echo $cids | awk -v count=$count '{ print $count }'`
            echo $fn $cid
            count=$(( 1 + $count ))
            sed -i "s/cid:$cid/$fn/g" $html_file
        done
        # base tag screws with local cid references
        if grep -qi "<base.*href=" $html_file; then
            grep -vi "<base.*href=" $html_file > $html_file.tmp
            mv -f $html_file.tmp $html_file
        fi
    fi
    # safe mode, strip out remote image references
    if [ "$1" = "safe" ]; then
        grep -qi src=\"http $html_file
        if [ $? -eq 0 ]; then
            # misses javascript placed images, disable js in browser if needed
            sed -i "s/src\s*=\s*\"*https*:\/\/[^\">]*\"*/src=\"#\"/g" $html_file
            sed -i "s/background\s*=\s*\"*https*:\/\/[^\">]*\"*/background=\"#\"/g" $html_file
        fi
    fi
else
    rm $files $mbox
    rmdir $tmpdir
    exit 1
fi

x-www-browser --new-window $tmpdir/$html_file &

## vim: ft=sh