Downloader for Wikileak's Leaked DNC Emails
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

70 lines
1.4 KiB

#!/bin/bash
##
# SPDX-License-Identifier: CC0-1.0
#
# curl -OJL https://wikileaks.org/dnc-emails//get/<id-here>
# IDs range from [1, 22456]
#
##
OUTPUT_FILE="dncdownload.log"
RETRY_TIMEOUT=1
RETRY_COUNT=2
rm -rf eml output $OUTPUT_FILE
exec {FD}>$OUTPUT_FILE
FD_PATH="/dev/fd/$FD"
mkdir eml output
cd eml
mkdir tmp
cd tmp
i=0
r=$RETRY_COUNT
while ((i <= 22456)); do
#for i in {1..22456}; do
let i=i+1
echo "Downloading $i..." | tee -a $FD_PATH
curl -OJL "https://wikileaks.org/dnc-emails//get/$i" > "../../output/$i.stdout" 2> "../../output/$i.stderr"
CURLRET=$?
if [ $CURLRET -ne 0 ]; then
rm -rf *
echo " * Failed to download: cURL returned $CURLRET. See output/$i.stderr for more information." | tee -a $FD_PATH
continue
fi
EMLFILE=`ls -1 | head -1`
EXT=${EMLFILE,,}
EXT=${EXT##*.}
# This usually happens if we hit Wikileaks too fast, so just retry until it lets us.
if [ "${EXT}" != "eml" ]; then
rm -rf *
echo " * Failed to download: File unavailable, trying again in $RETRY_TIMEOUT second(s)..." | tee -a $FD_PATH
if [ $r -ne 0 ]; then
let i=i-1
let r=r-1
sleep $RETRY_TIMEOUT
else
echo " * Failed to download: File unavailable, exceeded retry count. Skipping..." | tee -a $FD_PATH
r=$RETRY_COUNT
fi
continue
fi
r=$RETRY_COUNT
OUTFILE=`printf "%05d" $i`_$EMLFILE
mv -- "$EMLFILE" "../$OUTFILE"
done
cd ..
rm -rf tmp