Replica, primary development repository is https://github.com/jjg/preposter.us
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

385 lines
17 KiB

  1. #!/usr/bin/python
  2. import imaplib
  3. import email
  4. import os
  5. import hashlib
  6. import smtplib
  7. import sys
  8. import mimetypes
  9. import unicodedata
  10. import re
  11. import ConfigParser
  12. import shutil
  13. import traceback
  14. import humanhash
  15. import json
  16. import xml.etree.ElementTree as ET
  17. #import dateutil
  18. from email.mime.text import MIMEText
  19. # load config
  20. config = ConfigParser.RawConfigParser()
  21. config.read('preposter.us.cfg')
  22. IMAP_SERVER = config.get('mailserver', 'imap_server')
  23. SMTP_SERVER = config.get('mailserver', 'smtp_server')
  24. SMTP_PORT = config.get('mailserver', 'smtp_port')
  25. EMAIL_ADDRESS = config.get('mailserver', 'email_address')
  26. EMAIL_PASSWORD = config.get('mailserver', 'email_password')
  27. WEB_HOST = config.get('webserver', 'web_hostname')
  28. WEB_ROOT = config.get('webserver', 'web_filesystem_root')
  29. ADMIN_EMAIL = config.get('system', 'admin_email')
  30. class Post(object):
  31. title = ''
  32. slug = ''
  33. author = ''
  34. date = ''
  35. url = ''
  36. def unpack_message(uid, message, blog_dir):
  37. email_body = ''
  38. html_body = ''
  39. text_body = ''
  40. counter = 1
  41. audio_filename = None
  42. audio_length = 0
  43. for part in message.walk():
  44. if part.get_content_maintype() == 'multipart':
  45. continue
  46. # extract message body
  47. if part.get_content_type() == 'text/html':
  48. # TODO: remove any containing head/body tags
  49. html_body = part.get_payload(decode=True)
  50. if part.get_content_type() == 'text/plain':
  51. text_body += part.get_payload(decode=True)
  52. filename = part.get_filename()
  53. if not filename:
  54. ext = mimetypes.guess_extension(part.get_content_type())
  55. if not ext:
  56. # Use a generic bag-of-bits extension
  57. ext = '.bin'
  58. filename = 'part-%03d%s' % (counter, ext)
  59. filename = '%s-%s' % (uid, filename)
  60. # only store files we know what to do with
  61. store_file = False
  62. # caps just makes comparisons harder
  63. filename = filename.lower()
  64. # handle images
  65. if filename.find('.jpg') > 0 or filename.find('.jpeg') > 0 or filename.find('.png') > 0 or filename.find('.gif') > 0 or filename.find('.pdf') > 0:
  66. store_file = True
  67. if part.get('Content-ID'):
  68. cid = 'cid:%s' % part.get('Content-ID')[1:-1]
  69. # if we can find the file embedded, update the link
  70. if html_body.find(cid) > -1:
  71. # re-write CID img tag to use stored filename
  72. html_body = html_body.replace(cid, 'assets/%s' % filename)
  73. else:
  74. # otherwise, just embed the file
  75. email_body = email_body + '<a href=\'assets/%s\'><img src=\'assets/%s\'></a>' % (filename, filename)
  76. # handle video
  77. if filename.find('.mov') > 0 or filename.find('.mp4') > 0 or filename.find('.ogg') > 0 :
  78. store_file = True
  79. email_body = email_body + '<video controls><source src=\'assets/%s\'></video>' % filename
  80. # handle audio
  81. if filename.find('.mp3') > 0 or filename.find('.wav') > 0 or filename.find('.m4a') > 0:
  82. store_file = True
  83. email_body = email_body + '<audio controls><source src=\'assets/%s\'></audio>' % filename
  84. audio_filename = filename
  85. # There might be a better way to get this number...
  86. audio_length = len(part.get_payload(decode=True))
  87. if store_file:
  88. counter += 1
  89. fp = open(os.path.join(blog_dir, 'assets', filename), 'wb')
  90. fp.write(part.get_payload(decode=True))
  91. fp.close()
  92. if html_body:
  93. email_body = html_body + email_body
  94. else:
  95. email_body = text_body + email_body
  96. return {"email_body": email_body, "audio_filename": audio_filename, "audio_length": audio_length}
  97. def send_notification(destination_email, subject, message):
  98. # assemble email
  99. message = MIMEText(message)
  100. message['Subject'] = subject
  101. message['From'] = EMAIL_ADDRESS
  102. message['To'] = destination_email
  103. # send
  104. s = smtplib.SMTP(SMTP_SERVER + ':' + SMTP_PORT)
  105. s.ehlo()
  106. s.starttls()
  107. s.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
  108. s.sendmail(EMAIL_ADDRESS, destination_email, message.as_string())
  109. s.quit()
  110. # get messages
  111. imap_search = 'UNSEEN'
  112. suppress_notification = False
  113. if len(sys.argv) > 1:
  114. if sys.argv[1] == 'rebuild':
  115. shutil.copy('index.html', WEB_ROOT)
  116. shutil.copy('podcast.xml', WEB_ROOT)
  117. shutil.copytree('images', WEB_ROOT + '/images')
  118. shutil.copytree('css', WEB_ROOT + '/css')
  119. imap_search = 'ALL'
  120. suppress_notification = True
  121. mailbox = imaplib.IMAP4_SSL(IMAP_SERVER)
  122. mailbox.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
  123. mailbox.select()
  124. result, data = mailbox.uid('search', None, imap_search)
  125. uid_list = data.pop().split(' ')
  126. # if there's no valid uid in the list, skip it
  127. if uid_list[0] != '':
  128. for uid in uid_list:
  129. # global exception handlers like this are for bad programmers
  130. try:
  131. # fetch message
  132. latest_email_uid = uid
  133. result, data = mailbox.uid('fetch', latest_email_uid, '(RFC822)')
  134. raw_email = data[0][1]
  135. email_message = email.message_from_string(raw_email)
  136. email_from = email.utils.parseaddr(email_message['From'])
  137. email_address = email_from[1]
  138. # assemble post components
  139. post_author = email_address.split('@')[0]
  140. post_date = email_message['Date']
  141. post_title = email_message['Subject']
  142. post_slug = unicodedata.normalize('NFKD', unicode(post_title))
  143. post_slug = post_slug.encode('ascii', 'ignore').lower()
  144. post_slug = re.sub(r'[^a-z0-9]+', '-', post_slug).strip('-')
  145. post_slug = re.sub(r'[-]+', '-', post_slug)
  146. # check for blog subdir
  147. email_hash = hashlib.md5()
  148. email_hash.update(email_address)
  149. blog_directory = email_hash.hexdigest()
  150. blog_physical_path = WEB_ROOT + '/' + blog_directory
  151. humane_blog_name = humanhash.humanize(blog_directory)
  152. if not os.path.exists(WEB_ROOT + '/' + blog_directory):
  153. # create directory for new blog
  154. os.makedirs(blog_physical_path)
  155. os.makedirs(os.path.join(blog_physical_path, 'assets'))
  156. # copy over the default stylsheet
  157. shutil.copytree('css', blog_physical_path + '/css')
  158. # create human-readable link to blog directory
  159. os.symlink(blog_directory, os.path.join(WEB_ROOT, humane_blog_name))
  160. # create html blog post index
  161. template = open('postindextemplate.html', 'r').read()
  162. new_index = template
  163. new_index = new_index.replace('{0}', post_author)
  164. new_index = new_index.replace('{1}', blog_directory)
  165. blog_index = open(blog_physical_path + '/index.html', 'w')
  166. blog_index.write(new_index)
  167. blog_index.close()
  168. # create rss blog post index
  169. template = open('postrssindextemplate.xml', 'r').read()
  170. new_index = template
  171. new_index = new_index.replace('{0}', '%s\'s Preposter.us Blog' % post_author)
  172. new_index = new_index.replace('{1}', 'http://%s/%s' % (WEB_HOST, humane_blog_name))
  173. new_index = new_index.replace('{2}', '%s\'s blog on preposter.us' % post_author)
  174. blog_index = open(blog_physical_path + '/rss.xml', 'w')
  175. blog_index.write(new_index)
  176. blog_index.close()
  177. # podcast support - create individual podcast XML
  178. template = open('podcastrssindextemplate.xml', 'r').read()
  179. new_index = template
  180. new_index = new_index.replace('{0}', '%s\'s Preposter.us Podcast' % post_author)
  181. new_index = new_index.replace('{1}', 'http://%s/%s' % (WEB_HOST, humane_blog_name))
  182. new_index = new_index.replace('{2}', '%s\'s podcast on preposter.us' % post_author)
  183. blog_index = open(blog_physical_path + '/podcast.xml', 'w')
  184. blog_index.write(new_index)
  185. blog_index.close()
  186. # add new blog to site index
  187. blog_index_partial = open(WEB_ROOT + '/blogs.html', 'a')
  188. blog_index_partial.write('<li><a href=\'%s\'>%s</a></li>\n' % (humane_blog_name, post_author))
  189. blog_index_partial.close()
  190. if not suppress_notification:
  191. send_notification(email_address, 'Your new Preposter.us blog is ready!', 'You just created a Preposter.us blog, a list of your posts can be found here: http://%s/%s . Find out more about Preposter.us by visiting the project repository at https://github.com/jjg/preposter.us' % (WEB_HOST, humane_blog_name))
  192. post_physical_path = blog_physical_path + '/' + post_slug + '.html'
  193. # parse the actual message
  194. unpacked_message = unpack_message(uid, email_message, blog_physical_path)
  195. post_body = unpacked_message["email_body"]
  196. # if necessary, update post index
  197. if not os.path.exists(post_physical_path):
  198. # update post index partial
  199. post_index_partial = open(blog_physical_path + '/posts.html', 'a')
  200. post_index_partial.write('<li><a href=\'%s.html\'>%s</a> - %s</li>' % (post_slug, post_title, post_date))
  201. post_index_partial.close()
  202. # update post index json
  203. post = Post()
  204. post.title = post_title
  205. post.slug = post_slug
  206. post.author = post_author
  207. post.date = post_date
  208. post.url = 'http://' + WEB_HOST + '/' + humane_blog_name + '/' + post_slug + '.html'
  209. # create a new index or update an existing one
  210. json_index_physical_path = blog_physical_path + '/posts.json'
  211. post_index_obj = {'posts':[]}
  212. if os.path.exists(json_index_physical_path):
  213. post_index_json = open(json_index_physical_path, 'r')
  214. post_index_obj = json.loads(post_index_json.read())
  215. post_index_json.close()
  216. # TODO: find a more elegant way to do this than .__dict__
  217. post_index_obj['posts'].append({'post':post.__dict__})
  218. post_index_json = open(json_index_physical_path, 'w')
  219. post_index_json.write(json.dumps(post_index_obj))
  220. post_index_json.close()
  221. # update rss feed
  222. rss_physical_path = blog_physical_path + '/rss.xml'
  223. tree = ET.parse(rss_physical_path)
  224. root = tree.getroot()
  225. # add new post
  226. channel = root.find('channel')
  227. item = ET.SubElement(channel, 'item')
  228. item_title = ET.SubElement(item, 'title')
  229. item_link = ET.SubElement(item, 'link')
  230. item_guid = ET.SubElement(item, 'guid')
  231. item_pub_date = ET.SubElement(item, 'pubDate')
  232. item_description = ET.SubElement(item, 'description')
  233. item_title.text = post.title
  234. item_link.text = post.url
  235. item_guid.text = post.url
  236. item_pub_date.text = post.date
  237. item_description.text = 'a post about %s by %s' % (post.title, post.author)
  238. # save changes
  239. tree.write(rss_physical_path)
  240. # podcast support - add post to podcast XML if media is present
  241. if unpacked_message["audio_filename"]:
  242. # unpack media attributes
  243. audio_filename = unpacked_message["audio_filename"]
  244. audio_length = str(unpacked_message["audio_length"])
  245. audio_type = "audio/%s" % audio_filename.split(".")[-1]
  246. audio_url = "http://%s/%s/assets/%s" % (WEB_HOST, humane_blog_name, audio_filename)
  247. # update user's podcast
  248. podcast_physical_path = blog_physical_path + '/podcast.xml'
  249. tree = ET.parse(podcast_physical_path)
  250. root = tree.getroot()
  251. # add new episode
  252. channel = root.find('channel')
  253. item = ET.SubElement(channel, 'item')
  254. item_title = ET.SubElement(item, 'title')
  255. item_link = ET.SubElement(item, 'link')
  256. item_guid = ET.SubElement(item, 'guid')
  257. item_pub_date = ET.SubElement(item, 'pubDate')
  258. item_description = ET.SubElement(item, 'description')
  259. item_enclosure = ET.SubElement(item, 'enclosure')
  260. item_title.text = post.title
  261. item_link.text = post.url
  262. item_guid.text = post.url
  263. item_pub_date.text = post.date
  264. item_description.text = 'an episode about %s by %s' % (post.title, post.author)
  265. # TODO: add extended podcast attributes
  266. item_enclosure.set("url", audio_url)
  267. item_enclosure.set("type", audio_type)
  268. item_enclosure.set("length", audio_length)
  269. # save changes
  270. tree.write(podcast_physical_path)
  271. # update site-wide podcast
  272. # TODO: this could be DRY'd up
  273. podcast_physical_path = WEB_ROOT + '/podcast.xml'
  274. tree = ET.parse(podcast_physical_path)
  275. root = tree.getroot()
  276. # add new episode
  277. channel = root.find('channel')
  278. item = ET.SubElement(channel, 'item')
  279. item_title = ET.SubElement(item, 'title')
  280. item_link = ET.SubElement(item, 'link')
  281. item_guid = ET.SubElement(item, 'guid')
  282. item_pub_date = ET.SubElement(item, 'pubDate')
  283. item_description = ET.SubElement(item, 'description')
  284. item_enclosure = ET.SubElement(item, 'enclosure')
  285. item_title.text = post.title
  286. item_link.text = post.url
  287. item_guid.text = post.url
  288. item_pub_date.text = post.date
  289. item_description.text = 'an episode about %s by %s' % (post.title, post.author)
  290. # TODO: add extended podcast attributes
  291. item_enclosure.set("url", audio_url)
  292. item_enclosure.set("type", audio_type)
  293. item_enclosure.set("length", audio_length)
  294. # save changes
  295. tree.write(podcast_physical_path)
  296. # write post to disk
  297. post_template = open('posttemplate.html', 'r').read()
  298. new_post = post_template
  299. new_post = new_post.replace('{0}', post_title)
  300. new_post = new_post.replace('{1}', post_author)
  301. new_post = new_post.replace('{2}', post_body)
  302. # TODO: format this date to something prettier
  303. new_post = new_post.replace('{3}', post_date)
  304. post_file = open(post_physical_path, 'w')
  305. post_file.write(new_post)
  306. post_file.close()
  307. if not suppress_notification:
  308. send_notification(email_address, 'Preposter.us Post Posted!', 'Your post \"%s\" has been posted, you can view it here: http://%s/%s/%s.html' % (post_title, WEB_HOST, humane_blog_name, post_slug))
  309. except:
  310. print '****************************************'
  311. print traceback.format_exc()
  312. print raw_email
  313. print '****************************************'