#!/usr/bin/env python3

# Copyright (c) 2024 John Gabriele <jgabriele@fastmail.fm>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use, copy,
# modify, merge, publish, distribute, sublicense, and/or sell copies
# of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import sys, os, os.path, re, io, subprocess

usage_msg = """rippledoc.py

This script generates html files from a directory (including
nested subdirs) of ordered markdown-formatted text files named
like "042-stuff.md". A toc (table of contents) file is generated
as well. Start your doc project like this:

    mkdir my-doc-proj
    cd my-doc-proj
    touch index.md _copyright 010-getting-started.md
    # edit, edit, edit (index.md is the overview)
    rippledoc.py

You'll need to have Pandoc installed for this script to work,
since pandoc is what does the heavy lifting.

The top level of your doc project directory must contain:

  * an index.md file,
  * a _copyright file (described below), and
  * some *.md files named like "123-foo.md".

The .md files will be processed into corresponding .html files
(without the "123-" prefixes in the names). For example, name
your doc source files like:

    index.md
    001-intro.md
    005-rationale.md
    008-overview.md
    010-details-A.md
    020-details-B.md
    080-summary.md
    100-cheatsheet.md
    other-stuff/
        010-foo.md
        020-bar.md
    reference/
        010-baz.md

The index.md file becomes your doc project's front page
(index.html). In your index.md, you must yourself manually
provide a link to toc.html. (That is, write in "[toc](toc.html)"
somewhere).

There must also be a _copyright file present at the top level
of your docs (so that its content can be added at the footer of
every page). This file is usually one line (which can contain
html) that looks something like:

    <a href="http://your-site.com/">Your Name</a>, 20xx – 20xx

Again, this program expects to find both an index.md file and a
_copyright file in the top-level dir where you run it. That's how
it knows its cwd is at the top level and not in some subdir of your
doc project.

All of your md files must start with a line like:

    % Title of Doc

and may also have two more lines right after it:

    % Your Name
    % YYYY-MM-DD

The first line is the only one that's strictly required, as that
title will be used in the ToC. Also, the first line of index.md
will be the main title of your doc project.

Note, this script:

  * doesn't take any options. If you pass any, it prints this
    help message.

  * ignores everything else besides the NNN-foo.md files (well,
    and the index.md, _copyright file). Feel free to have images,
    .txt files, or whatever scattered about; they will be ignored
    by this program.

  * only regenerates html if that html file is older than its
    corresponding .md file.

  * (re)creates the toc.md and toc.html files every time it's run.

  * creates the styles.css file if it's not present. Feel free
    to modify yours if you like.

Also note:

  * Your filenames and subdir names should not contain any spaces.
    This script will complain if it doesn't like your filenames or
    dirnames.

  * If you rename an md file (after its "nnn-"), you must manually
    delete its corresponding html file, lest it become detritus,
    as this script won't delete the old-named one.

    Note, it's fine to *renumber* your md files with wild abandon
    (since the generated html filenames don't contain the "nnn-"
    numbers).

  * If you want to remove everything created by this script,
    leaving your doc project clean as the driven snow, from the
    top level of your doc project, do:

        find . -type f -name '*.html' -exec rm {} \;
        rm toc.txt styles.css

Now get writing!
"""

# TODO:
#
#   * XXX put a nav bar at the top of index.html
#
#   * look into option to have this use djot.
#
#   * maybe have a way to specify the order of dirs in the ToC.

VERSION = "2024-12-26"
copyright_content = ""
doc_proj_title    = ""
all_md_fnms   = []
all_dirnames  = []
all_html_fnms = []
all_titles    = []
all_depths    = []

pandoc_cmd = [
        "pandoc",
        "-s",
        "--mathjax=https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js",
        "-o",  # then the .html file,
        # then any other options,
        # then, lastly, the .md filename.
]

#-------------------------------------------------------------------
def main():
    print(f"================= Rippledoc, version {VERSION} =================")

    if len(sys.argv) > 1:
        print(usage_msg)
        sys.exit()

    if not os.path.exists("index.md"):
        print("""\
There's no index.md file here. Are you at the top-level of your
doc project dir? If so, please create an index.md file here.
Exiting.""")
        sys.exit()

    global doc_proj_title
    doc_proj_title = get_title_line("index.md")

    global copyright_content
    if os.path.exists("_copyright"):
        copyright_content = io.open("_copyright").read().strip()
    else:
        print("""\
There's no _copyright file here. Are you at the top-level of your
doc project dir? If so, please create a _copyright file here that
contains just one line; something like:

    <a href="/index.html">Your Name</a>, 20YY

Exiting.""")
        sys.exit()

    if not os.path.exists("styles.css"):
        print("No styles.css present. Generating one...")
        with io.open("styles.css", "w") as f:
            f.write(styles_content)

    # Make the big list of all NNN-foo.md filenames.
    for dir_path, dirs_here, files_here in os.walk("."):
        fnms_here = sorted(files_here)
        for dir_here in dirs_here:
            # exits if suboptimal dirname
            check_dirname(dir_here, dir_path)
        for fnm in fnms_here:
            if not fnm.endswith(".md") or fnm == "index.md" or fnm == "toc.md":
                continue
            check_fnm(fnm, dir_path) # exits if suboptimal filename
            # fnm      is like "123-foo.md"
            # dir_path is like "./foo/bar"
            full_md_fnm = dir_path + "/" + fnm
            all_dirnames.append(dir_path)
            all_md_fnms.append(full_md_fnm)
            all_depths.append(dir_path.count("/"))
            all_titles.append(get_title_line(full_md_fnm))
            all_html_fnms.append(dir_path + "/" + fnm[4:-3] + ".html")

    # Ok, let's process some md files into html...
    # First do the index.md file.
    if (not os.path.exists("index.html")) or \
            (os.path.getmtime("index.md") > os.path.getmtime("index.html")):
        print("Generating index.html...")
        subprocess.check_call(
            pandoc_cmd +
            ["index.html"] +
            ["--css", "./styles.css"] +
            ["index.md"]
        )

    print("Generating toc.txt...")
    with io.open("toc.txt", "w") as f:
        f.write("% " + doc_proj_title + "\n\n")
        f.write("## Table of Contents\n\n")
        f.write("\[[home](index.html)\]\n\n")
        this_dirname = ""
        for i, _ in enumerate(all_md_fnms):
            # Decide if we want to add a dir line in the ToC.
            new_dirname = all_dirnames[i]
            if new_dirname != this_dirname:
                if new_dirname == ".":
                    pass
                else:
                    f.write("\n**" + new_dirname[2:] + "/**\n\n")
                this_dirname = new_dirname
            f.write("  * [" + all_titles[i] + "](" + all_html_fnms[i] + ")\n")
        f.write("\n")

    print("Generating toc.html...")
    subprocess.check_call(
        pandoc_cmd +
        ["toc.html"] +
        ["--css", "./styles.css"] +
        ["toc.txt"]
    )

    for idx, md_fnm in enumerate(all_md_fnms):
        html_fnm = all_html_fnms[idx]
        if (not os.path.exists(html_fnm)) or \
                (os.path.getmtime(md_fnm) > os.path.getmtime(html_fnm)):
            print("*", md_fnm, "-->", html_fnm, "...")
            sty_prfx = "../" * (md_fnm.count("/") - 1)
            mk_html_before_and_after_files_for(idx)
            this_pandoc_cmd = pandoc_cmd + \
                [html_fnm] + \
                ["--css", sty_prfx + "./styles.css"] + \
                ["-B", "/tmp/before.html", "-A", "/tmp/after.html"] + \
                [md_fnm]
            subprocess.check_call(this_pandoc_cmd)

    print("All done. Point your browser to ./index.html.")

#-------------------------------------------------------------------
def check_dirname(dir_name, dir_path):
    if not re.match(r'[\w\.,-]+$', dir_name):
        print("Directory names should be named only with letters,")
        print("numbers, dots, dashes, commas, underscores. Please")
        print("rename dir name:", "\"" + dir_path + "/" + dir_name + "\".")
        print("Exiting.")
        sys.exit()

def check_fnm(fnm, dir_path):
    if not re.match(r'\d{3}-[\w\.,-]+$', fnm):
        print("md filenames should be named like \"123-foo.md\",")
        print("and only contain letters, numbers, dashes, dots,")
        print("commas, and underscores. No spaces. Please fix")
        print("filename:", "\"" + dir_path + "/" + fnm + "\".", "Exiting.")
        sys.exit()

#-------------------------------------------------------------------
def get_title_line(fnm):
    line = ""
    with io.open(fnm) as f:
        line = f.readline()
    if len(line) < 3 or (not line.startswith("% ")):
        print(fnm, "must have a first line containing its title,")
        print('for example, "% The Life of Sunflowers". Please fix.')
        print("Exiting.")
        sys.exit()
    return line[2:].strip()

#-------------------------------------------------------------------
# Make the /tmp/before.html and /tmp/after.html files.
def mk_html_before_and_after_files_for(idx):
    depth = all_depths[idx]
    prefix = "../" * depth
    if idx == 0:
        prev_link = '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'
        next_link = '<a href="' + prefix + all_html_fnms[idx+1] + '">next →</a>'
    elif idx == len(all_html_fnms) - 1:
        prev_link = '<a href="' + prefix + all_html_fnms[idx-1] + '">← prev</a>'
        next_link = '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'
    else:
        prev_link = '<a href="' + prefix + all_html_fnms[idx-1] + '">← prev</a>'
        next_link = '<a href="' + prefix + all_html_fnms[idx+1] + '">next →</a>'

    nb_content = nav_bar_content.replace("{{prev-link}}", prev_link)
    nb_content =      nb_content.replace("{{next-link}}", next_link)
    nb_content =      nb_content.replace("{{prefix}}", prefix)

    htb = html_before.replace("{{project-name}}", doc_proj_title)
    htb =         htb.replace("{{nav-bar-content}}", nb_content)
    htb =         htb.replace("{{prefix}}",  prefix)
    this_dirname = all_dirnames[idx]
    if depth == 0:
        this_dirname = "&nbsp;"
    else:
        this_dirname = "∈ " + this_dirname[2:] + "/"
    htb =         htb.replace("{{this-dir-name}}", this_dirname)

    hta = html_after.replace("{{nav-bar-content}}", nb_content)
    hta =        hta.replace("{{copyright-info}}", copyright_content)

    io.open("/tmp/before.html", "w").write(htb)
    io.open("/tmp/after.html", "w").write(hta)

#-------------------------------------------------------------------
html_before = """
<div id="top-proj-title-header">
  <a href="{{prefix}}index.html">{{project-name}}</a>
</div>

<div id="nav-bar">
{{nav-bar-content}}
</div>

<div id="this-dir-name">
{{this-dir-name}}
</div>

<div id="article-content">
"""

nav_bar_content = \
"""<div>{{prev-link}}</div>
<div><a href="{{prefix}}toc.html">ToC</a></div>
<div>{{next-link}}</div>"""

html_after = """
</div> <!-- article-content -->

<div id="nav-bar">
{{nav-bar-content}}
</div>

<div id="closing-footer">
{{copyright-info}}<br/>
</div>
"""
#-------------------------------------------------------------------

styles_content = """
html {
    width: 100%;
}

body {
    margin: 1em;
    background-color: #fff;
    line-height: 1.5;
    font-family: "Clear Sans", sans-serif;
}

#top-proj-title-header {
    font-weight: bold;
    font-size: large;
    padding: 4px 10px 10px 10px;
}

#nav-bar {
    font-size: small;
    display: flex;
    justify-content: space-between;
    padding: 6px 6px 10px 10px;
    background-color: #ddd;
}

#this-dir-name {
    background-color: #bbb;
    padding: 6px 6px 6px 10px;
    font-size: small;
    font-weight: bold;
}

#article-content {
    /* padding: 20px; */
}

/* Pandoc automatically puts title, subtitle, author, and date
   into a header element at the top of the page. Comment these
   out if you'd rather have them displayed.
*/
header  .author { display: none; }
header  .date   { display: none; }

#closing-footer {
    clear: both;
    padding: 10px;
    font-style: italic;
    font-size: small;
}

a:link {
    color: #3A4089;
}

a:visited {
    color: #875098;
}

table {
    background-color: #eee;
    padding-left: 2px;
    border: 2px solid #d4d4d4;
    border-collapse: collapse;
}

th {
    background-color: #d4d4d4;
    padding-right: 4px;
}

tr, td, th {
    border: 2px solid #d4d4d4;
    padding-left: 4px;
    padding-right: 4px;
}

dt {
    font-weight: bold;
}

code {
    background-color: #eee;
    font-size: large;
}

pre {
    background-color: #eee;
    border: 1px solid #ddd;
    padding-left: 6px;
    padding-right: 2px;
    padding-bottom: 5px;
    padding-top: 5px;
    font-size: large;
}

blockquote {
    background-color: #d8deea;
    border: 1px solid #c6d1e7;
    border-radius: 6px;
    padding-top: 2px;
    padding-bottom: 2px;
    padding-left: 16px;
    padding-right: 16px;
}

blockquote code, blockquote pre {
    background-color: #cad2e4;
    border-style: none;
}

h1, h2, h3, h4, h5, h6 {
    color: #567EB5;
}

h3, h5 {
    font-style: italic;
}
"""

#-------------------------------------------------------------------
main()
