Command-line Markdown to HTML converter written in C
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

811 lines
14 KiB

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "markdown.h"
#include "inputfile.h"
// prettyprinting
uint8_t prettyprint = 0;
int pp_tabs = 0;
void
pp_newline()
{
if (prettyprint) putchar('\n');
}
void
pp_indent()
{
int i = 0;
if (!prettyprint) return;
// indentation before line
for (;i < pp_tabs; i++) putchar('\t');
}
// tags used for bold and italic
char* tag_bold = "b";
char* tag_italic = "i";
void
strong_em_tags()
{
tag_bold = "strong"; // strcpy is *b l o a t*
tag_italic = "em";
}
// called when only printing text in first heading
void
skim_for_heading()
{
long int index = 0;
// find first line that starts with #
while (input_file.buf[index] != '#') {
if (index >= input_file.size) return;
// skip to next line
while (input_file.buf[index] != '\n') {
if (index >= input_file.size) return;
index++;
}
index++;
}
// skip # and whitespace
while (input_file.buf[index] == '#') index++;
index = skip_whitespace(index);
// print heading text
mdconvert('\n', 1, index); // parse markdown in heading
putchar('\n');
}
// goes through whole file
void
mdconvert_all(char term)
{
uint8_t first_line_done = 0;
long int index = 0;
char c;
while (index < input_file.size) {
index = skip_whitespace(index);
c = input_file.buf[index];
if (c == term) break;
switch (c) {
case '\n': // blank line
break;
default: // parse line
if (first_line_done) pp_newline();
index = mdconvert_line(index);
first_line_done = 1;
break;
}
index++;
}
}
// parses syntax starting at beginning of line
long int
mdconvert_line(long int index)
{
pp_indent();
switch (input_file.buf[index]) {
case '#': // heading
index = md_heading(index);
break;
case '-': // horizontal rule or unordered list item
switch (is_hr_or_li(index)) {
case is_hr: // horizontal rule
printf("<hr/>");
pp_newline();
index += 3;
break;
case is_li: // unordered list
index = md_unordered_list(index);
break;
default: // parse as paragraph
index = md_paragraph(index);
break;
}
break;
case '>': // blockquote
index = md_blockquote(index);
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': // maybe ordered list
if (is_ordered_li(index))
index = md_ordered_list(index); // parse as ordered list
else
index = md_paragraph(index); // parse as paragraph
break;
case '~':
case '`': // maybe code block
if (is_code_block(index))
index = md_code_block(index); // parse as code block
else
index = md_paragraph(index); // parse as paragraph
break;
default: // paragraph
index = md_paragraph(index);
break;
}
return index;
}
// parses inline syntax up to terminator character
long int
mdconvert(char term, int8_t min_term, long int index)
{
char c;
int8_t termcount;
while (index < input_file.size) {
// encounter terminator a minimum number of times in a row to return
if ((c = input_file.buf[index]) == term) {
termcount = 0;
while (input_file.buf[index] == term) {
termcount++;
index++;
if (termcount >= min_term) return index - 1;
}
index -= termcount;
}
switch (c) {
case '[': // maybe link
if (is_link(index))
index = md_link(index);
else
printf("[");
break;
case '!': // maybe image
// images have same syntax as links but with !,
// so is_link can be used to see if it is an image
if (is_link(index + 1))
index = md_image(index);
else
printf("!");
break;
case '`': // code
index = md_code(index);
break;
case '_':
case '*': // bold, italic, bold and italic
index = md_bold_italic(index);
break;
case '^': // superscript
index = md_superscript(index);
break;
case '\t': // tab as space
putchar(' ');
index = skip_whitespace(index) - 1;
break;
case '~': // subscript or strikethrough
index = md_sub_strike(index);
break;
case ' ': // could be line break
index = md_spaces(index);
break;
default: // any other character
putchar(c);
break;
}
index++;
}
return index;
}
long int
md_bold_italic(long int index)
{
char c;
int count = 0;
c = input_file.buf[index];
// count number of character
while (input_file.buf[index] == c) {
index++;
count++;
}
switch (count) {
case 1: // italic
printf("<%s>", tag_italic);
index = mdconvert(c, 1, index);
printf("</%s>", tag_italic);
break;
case 2: // bold
printf("<%s>", tag_bold);
index = mdconvert(c, 2, index);
printf("</%s>", tag_bold);
break;
case 3: // bold and italic
printf("<%s><%s>", tag_bold, tag_italic);
index = mdconvert(c, 3, index);
printf("</%s></%s>", tag_italic, tag_bold);
break;
default: // no formatting, print character
for (;count > 0; count--) putchar(c);
index--;
}
return index;
}
long int
md_sub_strike(long int index)
{
int count = 0;
// count number of ~
while (input_file.buf[index] == '~') {
index++;
count++;
}
switch (count) {
case 1: // subscript
printf("<sub>");
index = mdconvert('~', 1, index);
printf("</sub>");
break;
case 2: // strikethrough
printf("<s>");
index = mdconvert('~', 2, index);
printf("</s>");
break;
default: // no formatting, print tildes
for (;count > 0; count--) putchar('~');
index--;
}
return index;
}
long int
md_superscript(long int index)
{
printf("<sup>");
index = mdconvert('^', 1, index + 1);
printf("</sup>");
return index;
}
// paragraph stops at two returns in a row
long int
md_paragraph(long int index)
{
// parse first line of paragraph
printf("<p>");
index = mdconvert('\n', 1, skip_whitespace(index));
// parse paragraph lines until two returns in a row reached
while (index < input_file.size) {
index++;
index = skip_whitespace(index);
switch (input_file.buf[index]) {
case '\n':
goto end_paragraph;
default:
putchar(' ');
index = mdconvert('\n', 1, index);
}
}
end_paragraph:
printf("</p>");
pp_newline();
return index;
}
// treats blockquote like md document within md document
long int
md_blockquote(long int index)
{
mdstring original_buf;
char add_char;
printf("<blockquote>");
pp_newline();
pp_tabs++;
// save original buffer size and memory location
original_buf.size = input_file.size;
original_buf.buf = input_file.buf;
// load blockquote contents into new buffer
input_file.buf = malloc(1);
input_file.size = 0;
while (index < original_buf.size) {
// stop when there isn't > at beginning of line
if (original_buf.buf[index] != '>') break;
// skip >
index++;
// read rest of line into buffer
while (index < original_buf.size) {
// get character to append
add_char = original_buf.buf[index];
// append character
input_file.buf = realloc(input_file.buf, input_file.size+2);
input_file.buf[input_file.size] = add_char;
input_file.size++;
index++;
if (add_char == '\n') break;
}
}
// parse markdown in buffer
mdconvert_all('\0');
// destroy buffer, restore original buffer
free(input_file.buf);
input_file.size = original_buf.size;
input_file.buf = original_buf.buf;
// end blockquote
pp_tabs--;
pp_indent();
printf("</blockquote>");
pp_newline();
return index - 1;
}
long int
md_unordered_list(long int index)
{
printf("<ul>");
pp_newline();
// loop through each list item
pp_tabs++;
while (index < input_file.size) {
index = skip_whitespace(index);
if (is_hr_or_li(index) == is_li) {
index = md_list_item(skip_whitespace(index + 1)) + 1;
} else break;
}
pp_tabs--;
index--;
pp_indent();
printf("</ul>");
pp_newline();
return index;
}
long int
md_ordered_list(long int index)
{
int count = 0;
int start = 0;
int mult = 1;
int i;
// go to last digit, count digits
while (is_number(index)) {
index++;
count++;
}
index--;
// convert start number to int
for (i = 0; i < count; i++) {
start += (input_file.buf[index] - '0') * mult;
index--;
mult *= 10;
}
index++;
// start ol
if (start == 1)
printf("<ol>");
else
printf("<ol start=\"%d\">", start);
pp_newline();
// loop through each list item
pp_tabs++;
while (index < input_file.size) {
index = skip_whitespace(index);
if (is_ordered_li(index)) {
index = md_list_item(skip_whitespace(skip_no_whitespace(index))) + 1;
} else break;
}
pp_tabs--;
index--;
// ol closing tag
pp_indent();
printf("</ol>");
pp_newline();
return index;
}
long int
md_list_item(long int index)
{
pp_indent();
printf("<li>");
index = mdconvert('\n', 1, index);
printf("</li>");
pp_newline();
return index;
}
long int
md_heading(long int index)
{
char c;
int8_t size = 0;
// count number of #
while (input_file.buf[index] == '#') {
size++;
index++;
// no headings smaller than h6
if (size == 7) return md_paragraph(index - 7);
}
// print heading
printf("<h%d>", size);
index = skip_whitespace(index);
index = mdconvert('\n', 1, index); // parse markdown in heading
printf("</h%d>", size);
pp_newline();
return index;
}
long int
md_link(long int index)
{
char c;
long int text_index;
long int link_index;
// find index values
text_index = index + 1;
link_index = find_close_bracket(index) + 2;
// main index to ) at end of link
index = link_index;
while (input_file.buf[index] != ')') index++;
// print link tag
// temporarily sets ) to null terminator
// to cast link index as string for printing
input_file.buf[index] = '\0';
printf("<a href=\"%s\">", (char*)(input_file.buf + link_index));
input_file.buf[index] = ')';
// print link text
mdconvert(']', 1, text_index);
// close link
printf("</a>");
return index;
}
long int
md_image(long int index)
{
char c;
long int alt_index;
long int link_index;
long int alt_term;
// find index values
alt_index = index + 2;
alt_term = find_close_bracket(index + 1);
link_index = alt_term + 2;
// main index to ) at end of image
index = link_index;
while (input_file.buf[index] != ')') index++;
// temporarily set ] and ) to null terminators for printing
input_file.buf[alt_term] = '\0';
input_file.buf[index] = '\0';
printf("<img src=\"%s\" alt=\"%s\"/>",
(char*)(input_file.buf + link_index),
(char*)(input_file.buf + alt_index)
);
// set ] and ) back
input_file.buf[alt_term] = ']';
input_file.buf[index] = ')';
return index;
}
long int
md_code(long int index)
{
char c;
printf("<code>");
// print code until end of code reached
index++;
while ((c = input_file.buf[index]) != '`') {
if (c == '\0') goto code_eof;
putchar(c);
index++;
}
printf("</code>");
return index;
code_eof:
printf("</code>");
return index - 1;
}
long int
md_code_block(long int index)
{
char c;
// skip to next line
while (input_file.buf[index] != '\n') index++;
index++;
printf("<pre><code>");
// print code until end of code block reached
for (;;) {
while ((c = input_file.buf[index]) != '\n') {
if (c == '\0') goto code_block_eof;
putchar(c);
index++;
}
index++;
if (is_code_block(index))
goto end_code_block;
else
putchar('\n');
}
end_code_block:
printf("</code></pre>");
pp_newline();
while (input_file.buf[index] != '\n') index++;
return index;
code_block_eof:
printf("</code></pre>");
pp_newline();
return index - 1;
}
long int
md_spaces(long int index)
{
int8_t count = 0;
// count if there are two or more spaces
while (input_file.buf[index] == ' ') {
count += (count < 2);
index++;
}
// linebreak if at end of line
if (input_file.buf[index] == '\n') {
printf("<br/>");
pp_newline();
pp_indent();
return index;
} else {
putchar(' ');
return index - 1;
}
}
long int
skip_no_whitespace(long int index)
{
while (input_file.buf[index] != ' ' && input_file.buf[index] != '\t')
index++;
return index;
}
long int
skip_whitespace(long int index)
{
while (input_file.buf[index] == ' ' || input_file.buf[index] == '\t')
index++;
return index;
}
// detect if line starting with dashes is hr or li
// return value is 0 if neither
// return value is nonzero if hr or li
uint8_t
is_hr_or_li(long int index)
{
int len = 0;
// count number of -
while (input_file.buf[index] == '-') {
len++;
index++;
}
switch (len) {
case 3: // maybe horizontal rule
if (input_file.buf[index] == '\n') // hr must end with return
return is_hr;
else
return 0;
case 1: // list item
return is_li;
default:
return 0;
}
}
// detect if line starting with number is ordered list item
// return value is nonzero if it is ordered li, zero if not ordered li
uint8_t
is_ordered_li(long int index)
{
// if character is not a number, know right away that it is not ordered li
if (!is_number(index)) return 0;
// skip numbers
while (is_number(index)) index++;
// number in list item always ends with .
return (input_file.buf[index] == '.');
}
uint8_t
is_number(long int index)
{
switch (input_file.buf[index]) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return 1;
default:
return 0;
}
}
// detect if something is a link
// return value nonzero if it is a link, zero if not a link
uint8_t
is_link(long int index)
{
// return 0 if no bracket
if (input_file.buf[index] != '[') return 0;
// go to closing bracket
// return 0 if bracket doesn't close
if ((index = find_close_bracket(index)) == -1) return 0;
index++;
// links have ( right after text in brackets
if (input_file.buf[index] != '(') return 0;
index++;
// tell if ( closes
while (index < input_file.size) {
if (input_file.buf[index] == ')') return 1;
index++;
}
// if it reaches this point, ( doesn't close
return 0;
}
uint8_t
is_code_block(long int index)
{
char c;
int8_t count = 0;
c = input_file.buf[index];
if (c != '`' && c != '~') return 0;
// count if there is three of the character in a row
while (input_file.buf[index] == c) {
count++;
index++;
if (count > 3) return 0;
}
// code block if 3 in a row
return (count == 3);
}
// return value is index of closing bracket, or -1 if bracket doesn't close
long int
find_close_bracket(long int index)
{
int open = 0;
do {
// there can be brackets inside brackets, so it counts open brackets
// to make sure it stops at correct bracket
switch (input_file.buf[index]) {
case '[': open++; break;
case ']': open--; break;
}
index++;
// return -1 if end of data reached
if (index >= input_file.size) return -1;
} while (open > 0);
// return index of closing bracket
return index - 1;
}