You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

349 lines
9.1 KiB

  1. #!/usr/bin/env python
  2. __version__ = '1.0.0'
  3. _VERSION = __version__
  4. _VERSION_STRING = 'Fimf2XeTeX version {0}'.format(__version__)
  5. import json
  6. import re
  7. import sys
  8. import os
  9. import inspect
  10. import codecs
  11. import unicodedata
  12. import imghdr
  13. if sys.hexversion >= 0x3000000:
  14. import urllib.request as urllib2
  15. import urllib.parse as urlparse
  16. else:
  17. import urllib2
  18. from urllib2 import urlparse
  19. from sys import stderr as stderr, stdout as stdout
  20. FIMF_API = "https://www.fimfiction.net/api/story.php?story={0}"
  21. #FIMF_CHAPTERDL = "https://www.fimfiction.net/download_chapter.php?chapter={0}"
  22. #FIMF_CHAPTERDL_HTML = "https://www.fimfiction.net/download_chapter.php?chapter={0}&html"
  23. FIMF_CHAPTERDL = "https://www.fimfiction.net/chapters/download/{0}/txt"
  24. FIMF_CHAPTERDL_HTML = "https://www.fimfiction.net/chapters/download/{0}/html"
  25. USER_AGENT = "Mozilla/5.0"
  26. TEX_PREAMBLE_1 = \
  27. u"""\\documentclass[a4paper,12pt,twoside]{memoir}
  28. \\usepackage{ifxetex}
  29. \\RequireXeTeX
  30. \\usepackage[USenglish]{babel}
  31. \\usepackage{graphicx}
  32. \\usepackage{fontspec}
  33. % Convert non-breaking spaces into normal spaces
  34. \\usepackage{newunicodechar}
  35. \\newunicodechar{\u00a0}{ }
  36. \\PassOptionsToPackage{hyphens}{url}
  37. \\usepackage[linktoc=all]{hyperref}
  38. \\hypersetup{
  39. colorlinks,
  40. citecolor=black,
  41. filecolor=black,
  42. linkcolor=black,
  43. urlcolor=black
  44. }
  45. \\usepackage{calc}
  46. \\usepackage[autostyle]{csquotes}
  47. \\chapterstyle{dash}
  48. % Account for > 100 chapters and add ............ in the TOC
  49. \\renewcommand*{\\cftchapterdotsep}{\\cftdotsep}
  50. \\cftsetindents{chapter}{1em}{3em}
  51. \\newcommand{\\MLPFiM}{\\textsc{My Little Pony: Friendship is Magic}\\textsuperscript{\\textregistered}}
  52. \\newcommand{\\HasbroInc}{\\textsc{Hasbro, Inc.}}
  53. """
  54. TEX_PREAMBLE_2 = \
  55. u"""\\makeevenhead{headings}{\\thepage}{\\textsc{\\fimfAuthor}}{}
  56. \\makeoddhead{headings}{}{\\textsc{\\fimfTitle}}{\\thepage}
  57. \\makeevenfoot{plain}{}{}{}
  58. \\makeoddfoot{plain}{}{}{}
  59. \\title{\\fimfTitle}
  60. \\author{\\fimfAuthor}
  61. \\date{}
  62. """
  63. TEX_BACKTITLE = \
  64. u"""{\\setlength{\\parindent}{0cm}
  65. This is a work of fiction. All of the characters, organisation, and events portrayed in this work are either products of the author's imagination or are used fictitiously.\\\\
  66. \MakeUppercase{\\fimfTitle}, by \\\\
  67. \MakeUppercase{\\fimfAuthor}\\\\
  68. \\MLPFiM is a registered trademark of \\HasbroInc\\\\
  69. The author is not affiliated with \\HasbroInc\\\\
  70. Original Story URL:\\\\
  71. \\url{\\fimfUrl}
  72. }
  73. """
  74. def usage():
  75. stderr.write("Usage: {0} <storyID>\n".format(os.path.basename(sys.argv[0])))
  76. def get_image(url, base_name = None):
  77. if not base_name:
  78. base_name = urlparse.urlsplit(url).path.split('/')[-1].split('.')[0]
  79. image_data = urllib2.urlopen(urllib2.Request(url, headers={"User-Agent": USER_AGENT})).read()
  80. ext = imghdr.what(None, image_data)
  81. if ext == None:
  82. print("Image is in invalid format")
  83. return 1
  84. cover_file = base_name + "." + ext
  85. with open(cover_file, "wb") as f:
  86. f.write(image_data)
  87. return cover_file
  88. def main():
  89. if len(sys.argv) != 2:
  90. usage()
  91. return 1
  92. try:
  93. story_id = int(sys.argv[1])
  94. except ValueError:
  95. usage()
  96. return 1
  97. #story_id = 25966
  98. story_url = FIMF_API.format(story_id)
  99. story = json.loads(urllib2.urlopen(urllib2.Request(story_url, headers={"User-Agent": USER_AGENT})).read())["story"]
  100. print("Story URL: {0}".format(story["url"]))
  101. print("Story: {0} - {1}".format(story["title"], story["author"]["name"]))
  102. print("Story Image: {0}".format(story["full_image"]))
  103. cover_file = get_image(story["full_image"], "coverimage")
  104. """
  105. image_data = urllib2.urlopen(urllib2.Request(story["full_image"], headers={"User-Agent": USER_AGENT})).read()
  106. ext = imghdr.what(None, image_data)
  107. if ext == None:
  108. print("Cover image is in invalid format")
  109. return 1
  110. cover_file = "coverimage." + ext
  111. with open(cover_file, "wb") as f:
  112. f.write(image_data)
  113. """
  114. chapter_includes = []
  115. chapters = story["chapters"]
  116. for i in range(0, len(chapters)):
  117. #for i in range(43,44):
  118. chap = chapters[i]
  119. chapName = ''.join((c for c in unicodedata.normalize('NFD', chap["title"]) if unicodedata.category(c) != 'Mn')).replace(u"\u2018", "`").replace(u"\u2019", "'")
  120. stdout.write(" Chapter {0}: {1}...".format(i + 1, chapName))
  121. file_name = write_chapter_html(i + 1, chap)
  122. # file_name = write_chapter_txt(i + 1, chap)
  123. stdout.write("{0}\n".format(file_name))
  124. chapter_includes.append(file_name)
  125. file_name = write_latex(story, cover_file, chapter_includes)
  126. print("Output written to {0}".format(file_name))
  127. def write_latex(story, cover_file, chapter_includes):
  128. safe_title = re.sub("[^0-9a-zA-Z]+", "_", story["title"].lower())
  129. with codecs.open("backtitle.tex", "wb", encoding="utf-8") as f:
  130. f.write(TEX_BACKTITLE)
  131. file_name = "{0}.tex".format(safe_title)
  132. with codecs.open(file_name, "wb", encoding="utf-8") as f:
  133. f.write(TEX_PREAMBLE_1)
  134. f.write("\n")
  135. f.write("\\newcommand{{\\fimfTitle}}{{{0}}}\n".format(story["title"]))
  136. f.write("\\newcommand{{\\fimfAuthor}}{{{0}}}\n".format(story["author"]["name"]))
  137. f.write("\\newcommand{{\\fimfUrl}}{{{0}}}\n".format(story["url"]))
  138. f.write("\\newcommand{{\\fimfStoryID}}{{{0}}}\n".format(story["id"]))
  139. f.write("\n")
  140. f.write("\\newcommand{\\novelbreak}{\\fancybreak{* * *}}\n")
  141. f.write("\n")
  142. f.write(TEX_PREAMBLE_2)
  143. f.write("\n")
  144. f.write("\n\\begin{document}\n\n")
  145. f.write("\t\\pagestyle{empty}\n\n")
  146. f.write("\t\\frontmatter\n\n")
  147. f.write("\t\\begin{titlingpage}\n")
  148. f.write("\t\t\\maketitle\n")
  149. f.write("\t\t\\begin{center}\n")
  150. f.write("\t\t\t\\includegraphics[width=\\textwidth,height=\\textheight-\\pagetotal,keepaspectratio]{" + cover_file + "}\n")
  151. f.write("\t\t\\end{center}\n")
  152. f.write("\t\\end{titlingpage}\n")
  153. f.write("\t\\clearpage\n\n")
  154. f.write("\t\\include{backtitle}\n")
  155. f.write("\t\\clearpage\n\n")
  156. f.write("\t\\tableofcontents*\n")
  157. f.write("\t\\clearpage\n\n")
  158. f.write("\t\\pagestyle{headings}\n")
  159. f.write("\t\\mainmatter\n\n")
  160. for chap in chapter_includes:
  161. f.write("\t\\include{{{0}}}\n".format(chap))
  162. f.write("\n\\end{document}\n")
  163. return file_name
  164. def tex_escape(line):
  165. # For the love of all things holy, do the \\ escape first...
  166. line = line.replace("\\", "\\textbackslash ")
  167. line = line.replace("&", "\\&")
  168. line = line.replace("_", "\_")
  169. line = line.replace("#", "\\#")
  170. line = line.replace("$", "\\$")
  171. line = line.replace("%", "\\%")
  172. line = line.replace("{", "\\{")
  173. line = line.replace("}", "\\}")
  174. line = line.replace(u"~", u"\\textasciitilde ")
  175. line = line.replace(u"^", u"\\textasciicircum ")
  176. return line
  177. def write_tag(f, tag):
  178. from bs4 import NavigableString as NavigableString
  179. if type(tag) == NavigableString:
  180. f.write(tex_escape(tag))
  181. return
  182. if tag.name in [u'hr']:
  183. f.write("\\novelbreak\n\n")
  184. if tag.name in [u'img']:
  185. if "data-src" in tag.attrs:
  186. imgurl = tag.attrs["data-src"]
  187. else:
  188. imgurl = tag.attrs["src"]
  189. img = get_image(imgurl)
  190. f.write("\\begin{center}{\\includegraphics{" + tex_escape(img) + "}}\\end{center}")
  191. elif tag.name in [u'b', u'strong']:
  192. # f.write("\\textbf{")
  193. f.write("{\\bfseries ")
  194. elif tag.name in [u'i', u'em']:
  195. # f.write("\\textit{")
  196. f.write("{\\itshape ")
  197. elif tag.name in [u"center"]:
  198. f.write("\n\\begin{center}\n")
  199. elif tag.name in [u"blockquote"]:
  200. f.write("\n\\blockquote{")
  201. for text in tag.contents:
  202. write_tag(f, text)
  203. if tag.name in [u'b', u'strong']:
  204. f.write("}")
  205. elif tag.name in [u'i', u'em']:
  206. f.write("}")
  207. elif tag.name in [u"center"]:
  208. f.write("\n\\end{center}\n")
  209. elif tag.name in [u"blockquote"]:
  210. f.write("}\n")
  211. elif tag.name in [u'p']:
  212. f.write("\n\n")
  213. def write_chapter_html(num, chapter):
  214. from bs4 import BeautifulSoup as BeautifulSoup
  215. from bs4.builder import _html5lib as html5lib
  216. safe_title = re.sub("[^0-9a-zA-Z]+", "_", chapter["title"].lower())
  217. file_name = "{0:03d}-{1}.tex".format(num, safe_title)
  218. chapter_url = FIMF_CHAPTERDL_HTML.format(chapter["id"])
  219. chapter_html = urllib2.urlopen(urllib2.Request(chapter_url, headers={"User-Agent": USER_AGENT})).read()
  220. # Use BeautifulSoup to parse it. html5lib is used because we want valid HTML
  221. bs = BeautifulSoup(chapter_html, ["html5lib"], html5lib.HTML5TreeBuilder())
  222. with codecs.open(file_name, "wb", encoding="utf-8") as f:
  223. f.write(u"\\chapter{{{0}}}\n\n".format(tex_escape(chapter["title"])))
  224. current_tag = bs.find("h3").next_sibling
  225. while current_tag:
  226. write_tag(f, current_tag)
  227. current_tag = current_tag.next_sibling
  228. return file_name[:-4]
  229. # Not used anymore, but keeping around just in case.
  230. def write_chapter_txt(num, chapter):
  231. safe_title = re.sub("[^0-9a-zA-Z]+", "_", chapter["title"].lower())
  232. file_name = "{0:03d}-{1}.tex".format(num, safe_title)
  233. chapter_url = FIMF_CHAPTERDL.format(chapter["id"])
  234. chapter_txt = urllib2.urlopen(urllib2.Request(chapter_url, headers={"User-Agent": USER_AGENT})).read()
  235. with open(file_name, "wb") as f:
  236. f.write("\\chapter{{{0}}}\n\n".format(tex_escape(chapter["title"])))
  237. prev_line = ""
  238. i = 0
  239. for line in chapter_txt.split('\n'):
  240. line = line.strip()
  241. if line.startswith("//"):
  242. continue
  243. kek = 0
  244. if line == "":
  245. if prev_line == "":
  246. continue
  247. line = "\n"
  248. kek = 1
  249. line = tex_escape(line)
  250. if kek == 1:
  251. prev_line = ""
  252. else:
  253. prev_line = line
  254. f.write(line)
  255. f.write('\n')
  256. i += 1
  257. return file_name[:-4]
  258. if __name__ == "__main__":
  259. exit(main())