basic OPDS (and HTML) catalog provider for eBooks
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

175 lines
9.8 KiB

  1. <?php
  2. #############################################################################
  3. # miniCalOPe (c) 2010-2015 by Itzchak Rehberg #
  4. # written by Itzchak Rehberg <izzysoft AT qumran DOT org> #
  5. # http://www.izzysoft.de/ #
  6. # ------------------------------------------------------------------------- #
  7. # This program is free software; you can redistribute and/or modify it #
  8. # under the terms of the GNU General Public License (see doc/LICENSE) #
  9. # ------------------------------------------------------------------------- #
  10. # Scan for books and feed database #
  11. #############################################################################
  12. # $Id$
  13. require_once('./lib/class.logging.php'); // must come first as it also defines some CONST
  14. require_once('./config.php');
  15. if ( $scan_cli_only && php_sapi_name() != 'cli' ) { // protect agains access by visitors
  16. header('HTTP/1.0 403 Forbidden');
  17. echo "<h1>You are not allowed to be here.</h1>\n<p>The gatekeeper won't let you see this page. Please point your browser to a different one.</p>\n";
  18. trigger_error('Web access to scan scripts denied by config', E_USER_ERROR);
  19. exit;
  20. }
  21. require_once('./lib/common.php');
  22. require_once('./lib/class.filefuncs.php');
  23. $filefuncs = new filefuncs($logger,$use_markdown,$bookformats,$bookdesc_ext,$bookmeta_ext,$check_xml,$skip_broken_xml);
  24. require_once('./lib/db_sqlite3.php');
  25. require_once('./lib/class.db.php');
  26. if ( $autoExtract ) {
  27. require_once('./lib/class.epubdesc.php');
  28. if ( in_array('all',$extract2desc) || in_array('desc',$extract2desc) ) { // for trans of terms in bookdesc
  29. require_once('./lib/class.csv.php');
  30. $csv = new csv(";",'"',TRUE,FALSE);
  31. } else {
  32. $csv = NULL;
  33. }
  34. } else {
  35. $csv = NULL;
  36. }
  37. $db = new db($dbfile);
  38. $pubdate = date('c');
  39. $books = array();
  40. $genres = array(); $allGenres = array();
  41. $authors = array();
  42. $series = array();
  43. $publisher = array();
  44. // directory structure for books directory:
  45. // <lang>/<genre>/<author>/<books>
  46. #===========================================================[ Collect data ]===
  47. // Go for the languages available
  48. $logger->info("Scanning $bookroot [MODE=$scan_dbmode]",'SCAN');
  49. $logger->debug("use_lang: $use_lang",'SCAN');
  50. $logger->debug("Languages: ".implode(', ',$uselangs),'SCAN');
  51. $logger->debug("DBFile: $dbfile",'SCAN');
  52. $langs = $filefuncs->scanFolder($bookroot);
  53. // Now collect the genres
  54. foreach($langs as $lang) {
  55. GLOBAL $use_markdown;
  56. if ( !empty($uselangs) && !in_array($lang,$uselangs) ) {
  57. $logger->debug("* Skipping langDir '$lang'",'SCAN');
  58. continue;
  59. }
  60. $logger->info("* Scanning langDir '$lang'",'SCAN');
  61. if ( $csv !== NULL && file_exists("./lang/ebookterms.${lang}") ) {
  62. $csv = new csv(";",'"',TRUE,FALSE);
  63. $logger->debug('# Loaded terms for ebook desc from '.__DIR__."./lang/ebookterms.${lang}",'SCAN');
  64. $csv->import("./lang/ebookterms.${lang}");
  65. }
  66. $genres[$lang] = $filefuncs->scanFolder($bookroot . DIRECTORY_SEPARATOR . $lang);
  67. // Now come the authors
  68. foreach($genres[$lang] as $gidx => $genre) {
  69. $logger->info(" + Scanning genre dir '$genre'",'SCAN');
  70. $gdir = $genre;
  71. if ( $use_markdown && !file_exists($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $genre . DIRECTORY_SEPARATOR . '.nomarkdown') ) $gmarkdown = 1;
  72. else $gmarkdown = 0;
  73. if ( in_array('genre',$dotname_overrides) && file_exists($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $genre . DIRECTORY_SEPARATOR . '.name') ) {
  74. $genre = trim(file_get_contents($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $genre . DIRECTORY_SEPARATOR . '.name'));
  75. $genres[$lang][$gidx] = $genre;
  76. }
  77. $allGenres = array_merge($allGenres,array($genre));
  78. $tauthors = $filefuncs->scanFolder($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $gdir);
  79. // Guess what - they wrote books!
  80. foreach($tauthors as $aidx => $author) {
  81. $logger->debug(" - Scanning author dir '$author'",'SCAN');
  82. $adir = $author;
  83. if ( in_array('author',$dotname_overrides) && file_exists($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $gdir . DIRECTORY_SEPARATOR . $adir . DIRECTORY_SEPARATOR . '.name') ) {
  84. $author = trim(file_get_contents($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $gdir . DIRECTORY_SEPARATOR . $adir . DIRECTORY_SEPARATOR . '.name'));
  85. $tauthors[$aidx] = $author;
  86. }
  87. if ( $gmarkdown && !file_exists($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $gdir . DIRECTORY_SEPARATOR . $adir . DIRECTORY_SEPARATOR . '.nomarkdown') ) {
  88. $tbooks = $filefuncs->scanFolder($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $gdir . DIRECTORY_SEPARATOR . $adir, 'files', $gmarkdown);
  89. } else {
  90. $tbooks = $filefuncs->scanFolder($bookroot . DIRECTORY_SEPARATOR . $lang . DIRECTORY_SEPARATOR . $gdir . DIRECTORY_SEPARATOR . $adir, 'files', 0);
  91. }
  92. //array[name] with [files][ext], [desc] ([series],[series_index],[rating],[publisher],[isbn], [author],[tag]
  93. foreach($tbooks as $book=>$dummy) {
  94. $tbooks[$book]['lang'] = $lang;
  95. $tbooks[$book]['genre'] = $genre;
  96. if ( empty($tbooks[$book]['author']) ) { // no author defined in .data
  97. $authors = array_merge($authors,array($author));
  98. $tbooks[$book]['author'][] = $author;
  99. } else {
  100. $authors = array_merge($authors,$tbooks[$book]['author']);
  101. if ( !in_array('author',$data_overrides) ) { // in no-override mode, merge in author from dirname
  102. $authors = array_merge($authors,$author);
  103. if ( !(is_array($tbooks[$book]['author']) && in_array($author,$tbooks[$book]['author'])) ) $tbooks[$book]['author'][] = $author;
  104. }
  105. }
  106. if ( $GLOBALS['autoExtract'] && isset($tbooks[$book]['files']['epub']) ) {
  107. $epub = new epubdesc($tbooks[$book]['files']['epub']);
  108. $pathinfo = pathinfo($tbooks[$book]['files']['epub']);
  109. $cover = $filefuncs->getCover($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename']);
  110. if ( $extractCover > 0 && $GLOBALS['cover_mode']!='off' && empty($cover) ) {
  111. if ( $rc = $epub->writeCover($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename']) ) {
  112. $cover = $filefuncs->getCover($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename']);
  113. $logger->info(" - extracted cover: '${cover}'",'SCAN');
  114. }
  115. if ( $extractCover > 1 && $rc ) {
  116. $filefuncs->resizeCover($cover,$cover_width);
  117. }
  118. }
  119. if ( !empty($extract2data) && !file_exists($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookmeta_ext) ) {
  120. $logger->info(" - extracting Metadata: '".$pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookmeta_ext."'",'SCAN');
  121. $epub->setExtract2data($extract2data);
  122. $epub->setDataExt($bookmeta_ext);
  123. $epub->writeData($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename']);
  124. if ( file_exists($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookmeta_ext) )
  125. $filefuncs->readData($tbooks[$book],$pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookmeta_ext);
  126. }
  127. if ( !empty($extract2desc) && !$filefuncs->file_exists_glob($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'],$bookdesc_ext) ) {
  128. $logger->info(" - extracting Descdata: '".$pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookdesc_ext[0]."'",'SCAN');
  129. if ( !empty($csv->data) ) $epub->setTerms($csv->data);
  130. $epub->setExtract2desc($extract2desc);
  131. $epub->setDescExt($bookdesc_ext[0]);
  132. $epub->writeDesc($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename']);
  133. if ( file_exists($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookdesc_ext[0]) ) {
  134. $tbooks[$book]['desc'] = trim(file_get_contents($pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookdesc_ext[0]));
  135. $filefuncs->formatDesc($tbooks[$book]['desc'],$gmarkdown);
  136. if ( $check_xml && !empty($tbooks[$book]['desc']) ) $filefuncs->validateXML($tbooks[$book]['desc'], $pathinfo['dirname'].DIRECTORY_SEPARATOR.$pathinfo['filename'].'.'.$bookdesc_ext[0]);
  137. }
  138. }
  139. }
  140. if ( !empty($tbooks[$book]['tag']) ) $allGenres = array_merge($allGenres,$tbooks[$book]['tag']); // from *.data file
  141. if ( !empty($tbooks[$book]['series']) ) $series[] = $tbooks[$book]['series']; // from *.data file
  142. if ( !empty($tbooks[$book]['publisher']) ) $publisher[] = $tbooks[$book]['publisher']; // from *.data file
  143. }
  144. $books = array_merge($books,$tbooks);
  145. }
  146. }
  147. }
  148. sort($allGenres);
  149. #======================================================[ Feed the database ]===
  150. $logger->info('* Updating database','SCAN');
  151. $db->truncAll();
  152. $db->make_genres($allGenres);
  153. if (!empty($publisher)) $db->make_publisher($publisher);
  154. if (!empty($series)) $db->make_series($series);
  155. $db->make_authors($authors);
  156. $db->make_books($books);
  157. $logger->info('* Cleaning up database','SCAN');
  158. $db->query('VACUUM');
  159. $db->query('REINDEX');
  160. $logger->info("* Done",'SCAN');
  161. exit;
  162. ?>