super-quick note taking tool
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

212 lines
6.8 KiB

#!/usr/bin/env python3
"""
Identifies broken links in a markdown vimwiki and suggests/appplies fixes
Assumptions:
1. Your vimwiki is in markdown syntax
2. There is no whitespace/newlines between the link and the parenthesis
- This is OK: [some link](blub.md)
- This fails: [some link]( blub.md)
Dependency:
1. mistune (https://github.com/lepture/mistune)
"""
import os.path
import sys
import re
import time
import argparse
import urllib.parse
import mistune
VIMWIKI_ROOT_DEFAULT = os.path.expanduser('~/me/notes/wiki')
EXCLUDE_DEFAULT = 'projects/blog,projects/archive'
class VimWiki(object):
def __init__(self, root, exclude, quiet=False, quiet_warnings=False, relative=False):
self.root = root
self.exclude = exclude
self.files = {}
self.quiet = quiet
self.quiet_warnings = quiet_warnings
self.relative = relative
def warn(self, message, path=None):
if self.quiet_warnings:
return
if path:
sys.stderr.write("Warning in %s: %s\n" % (path, message))
else:
sys.stderr.write("Warning: %s\n" % message)
sys.stderr.flush()
def readfiles(self):
for dirpath, dirs, files in os.walk(self.root, followlinks=True):
for filename in files:
if not filename.endswith('.md'):
continue
fullpath = os.path.join(dirpath, filename)
shortpath = fullpath.replace(self.root.rstrip('/') + '/', '')
if any(shortpath.startswith(x) for x in self.exclude):
content = None
else:
with open(fullpath, 'r', encoding='utf-8') as f:
content = f.read()
self.files[shortpath] = content
def fixlinks(self, dry_run):
for path in sorted(self.files):
content = self.files[path]
if not content:
continue
needs_save = False
bucket = []
extractor = mistune.Markdown(renderer=LinkExtractor(bucket))
extractor(content)
if bucket:
for link in set(bucket):
if not self._check_link(self.files, os.path.dirname(path), link):
try:
fix = self._suggest_fix(self.files, os.path.dirname(path), link)
except UserWarning as e:
self.warn(e.args[0], path)
continue
if not self.quiet:
print("Replacing in %s: %s -> %s" % (path, link, fix))
new_content = content.replace('(%s)' % link, '(%s)' % fix)
if new_content == content:
self.warn("replacing %s with %s didn't work." % (link, fix), path)
continue
content = new_content
needs_save = True
if needs_save and not dry_run:
with open(os.path.join(self.root, path), 'w', encoding='utf-8') as f:
f.write(content)
@staticmethod
def _check_link(allfiles, dirname, link_target):
link = Link(link_target)
# absolute links
if link.target.startswith("/"):
return link.target[1:] in allfiles
# relative links
normalized_target = os.path.normpath(os.path.join(dirname, link.target))
return normalized_target in allfiles
def _suggest_fix(self, allfiles, dirname, link_target):
link = Link(link_target)
if VimWiki._check_link(allfiles, dirname, link.target + '.md'):
link.target += '.md'
return str(link)
link_base = os.path.basename(link.target)
suggestions = []
for filename in allfiles:
if os.path.basename(filename) == link_base:
suggestions.append(filename)
if len(suggestions) == 0:
raise UserWarning("No fix found for link %s" % link_target)
if len(suggestions) > 1:
raise UserWarning("Multiple possible fixes for link %s" % link_target)
target = suggestions[0]
if os.path.dirname(target) == dirname:
target = os.path.basename(target)
elif not self.relative:
target = '/' + target
if self.relative:
target = os.path.relpath(target, dirname)
link.target = os.path.normpath(target)
return str(link)
class Link(object):
"""
Deconstructs links like "/foo/bar.md#Header1" into:
1. target ("/foo/bar.md")
2. suffix ("Header1")
It allows you to change the individual components and convert it back to a
string by calling str() on this object.
"""
def __init__(self, raw):
self.target, self.suffix = self._deconstruct(raw)
def __str__(self):
# TODO: encode/quote url again for Markor?
if self.suffix:
return self.target + '#' + self.suffix
return self.target
@staticmethod
def _deconstruct(link):
link = urllib.parse.unquote(link)
suffix = None
if '#' in link:
link, suffix = link.split('#', 1)
return link, suffix
class LinkExtractor(mistune.Renderer):
def __init__(self, bucket):
super(LinkExtractor, self).__init__()
self.bucket = bucket # should be a list
def link(self, link, title, text):
if '//' in link:
return
if link.startswith('#'):
return
self.bucket.append(link)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--dry-run', action='store_true')
parser.add_argument('-q', '--quiet', action='store_true')
parser.add_argument('-R', '--relative', action='store_true')
parser.add_argument('-W', '--quiet-warnings', action='store_true')
parser.add_argument('-d', '--duplicates', action='store_true',
help="Find duplicate basenames and quit")
parser.add_argument('-r', '--root', type=str,
metavar='VIMWIKI_ROOT', default=VIMWIKI_ROOT_DEFAULT)
parser.add_argument('-x', '--exclude', type=str,
metavar='COMMA_SEPARATED_PATHS', default=EXCLUDE_DEFAULT)
args = parser.parse_args()
if args.exclude:
args.exclude = args.exclude.split(',')
vimwiki = VimWiki(
root=args.root,
exclude=args.exclude,
quiet=args.quiet,
quiet_warnings=args.quiet_warnings,
relative=args.relative,
)
vimwiki.readfiles()
if args.duplicates:
files = list(os.path.basename(f) for f in vimwiki.files)
duplicates = list(sorted(set(f for f in files if files.count(f) > 1)))
for dup in duplicates:
print(dup)
else:
vimwiki.fixlinks(args.dry_run)
if __name__ == '__main__':
main()