tools: rudimentary docx importer
Add a rudimentary importer for Microsoft Word docx files. It uses pandoc to convert the Word documents to Markdown and extracts the embedded images into static/img/. It's also fixing the paths and names of the images so we can generally use it. If an author, date or description is provided the article meta data is patched to use it. Also a fotogrid section will be created. What's left is cleaning the generated Markdown from Word's image size constraints, I have not found a generally usable way that works without manual intervention. Also resizing the images is not yet done. Signed-off-by: Johannes Thumshirn <jth@kernel.org>pull/41/head
parent
7509cf0bd7
commit
19eaa46656
@ -0,0 +1,77 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-3.0
|
||||
# vim: set sw=4 ts=4 ex
|
||||
|
||||
die() {
|
||||
echo "$@"
|
||||
exit
|
||||
}
|
||||
|
||||
usage() {
|
||||
die "Usage: $(basename $0) [-a author] [-d date ] [-D description] docx"
|
||||
}
|
||||
|
||||
|
||||
AUTHOR="FIXME"
|
||||
DATE=$(date +"%d.%m.%Y")
|
||||
DESCRIPTION="FIXME"
|
||||
|
||||
while getopts "a:d:" opts; do
|
||||
case $opts in
|
||||
a)
|
||||
AUTHOR=$OPTARG
|
||||
;;
|
||||
d)
|
||||
DATE=$OPTARG
|
||||
;;
|
||||
D)
|
||||
DESCRIPTION=$OPTARG
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
shift "$((OPTIND - 1))"
|
||||
DOCX=$1
|
||||
if [ $# -lt 1 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
which pandoc 2>&1 || die "Please install pandoc to use $(basename $0)"
|
||||
|
||||
ARTICLE=${DOCX/.docx/}
|
||||
MD="$ARTICLE.md"
|
||||
|
||||
TEMPDIR=$(mktemp -d $ARTICLE.XXX)
|
||||
|
||||
pandoc -f docx -t markdown -o $TEMPDIR/$MD --extract-media $TEMPDIR $DOCX
|
||||
|
||||
pushd $TEMPDIR
|
||||
sed -i "s:$TEMPDIR/media/image:static/img/$ARTICLE:g" $MD
|
||||
for f in media/*.jpeg; do
|
||||
mv $f ${f/image/$ARTICLE};
|
||||
done
|
||||
popd
|
||||
|
||||
mv $TEMPDIR/media/*.jpeg static/img/
|
||||
mv $TEMPDIR/$MD pages/
|
||||
|
||||
TMP="$(mktemp $ARTICLE.XXX)"
|
||||
echo "title: $ARTICLE" >> $TMP
|
||||
echo "date: $DATE" >> $TMP
|
||||
echo "author: $AUTHOR" >> $TMP
|
||||
echo "description: $DESCRIPTION" >> $TMP
|
||||
echo "" >> $TMP
|
||||
cat pages/$MD >> $TMP
|
||||
echo "" >> $TMP
|
||||
echo "<hr/>" >> $TMP
|
||||
echo "{{ fotogrid([" >> $TMP
|
||||
for pic in static/img/$ARTICLE*.jpeg; do
|
||||
echo "\"$pic\"," >> $TMP;
|
||||
done
|
||||
echo "]) | safe }}" >> $TMP
|
||||
mv $TMP pages/$MD
|
||||
|
||||
rm -rf $TEMPDIR
|
Loading…
Reference in New Issue