commit a7c5ede45301f2d00603b2be23f1dbb4cd91450e
parent 49b6cd3461b6146e5b2c74f18c278271b377b226
Author: krasjet
Date: 2020-07-27 11:42Z

add usage for all commands

Diffstat:
Mpdftocgen/__init__.py | 2+-
Mpdftocgen/app.py | 54++++++++++++++++++++++++++++++++++++++++++++++++------
Mpdftocio/app.py | 2+-
Mpdfxmeta/app.py | 43++++++++++++++++++++++++++++++-------------
4 files changed, 80 insertions(+), 21 deletions(-)

diff --git a/pdftocgen/__init__.py b/pdftocgen/__init__.py @@ -1,3 +1,3 @@ -"""Generate table of contents for pdf based on recipe file""" +"""Generate table of contents for pdf based on a recipe file""" __version__ = '0.1.0' diff --git a/pdftocgen/app.py b/pdftocgen/app.py @@ -7,16 +7,58 @@ import sys from argparse import Namespace from fitzutils import open_pdf, dump_toc, pprint_toc from .tocgen import gen_toc +from textwrap import dedent def getargs() -> Namespace: """parse commandline arguments""" - app_desc = "pdftocgen: generate pdf table of contents from a recipe file." - parser = argparse.ArgumentParser(description=app_desc) + app_desc = dedent(""" + pdftocgen: generate pdf table of contents from a recipe file. + + This command automatically generates a table of contents for a pdf file + based on the font attributes and position of headings, which are specified + in a TOML recipe file. See the README for an introduction to the recipe + file. + + To generate the table of contents for a pdf, use input redirection or pipes + to supply the recipe file + + $ pdftocgen in.pdf < recipe.toml + + or alternatively use the -r flag + + $ pdftocgen -r recipe.toml in.pdf + + The output of this command can be directly piped into pdftocio to generate + a new pdf file using the generated table of contents + + $ pdftocgen -r recipe.toml in.pdf | pdftocio -o out.pdf in.pdf + + or you could save the output of this command to a file for further + tweaking using output redirection + + $ pdftocgen -r recipe.toml in.pdf > toc + + or the -o flag + + $ pdftocgen -r recipe.toml -o toc in.pdf + + If you only need a readable format of the table of contents, use the -H + flag + + $ pdftocgen -r recipe.toml -H in.pdf + + This format cannot be parsed by pdftocio, but it is slightly more readable. + + """) + parser = argparse.ArgumentParser( + description=app_desc, + formatter_class=argparse.RawDescriptionHelpFormatter + ) parser.add_argument('input', - metavar='doc.pdf', + metavar='in.pdf', help="path to the input pdf document") parser.add_argument('-r', '--recipe', metavar='recipe.toml', @@ -37,9 +79,9 @@ def getargs() -> Namespace: metavar="file", type=argparse.FileType('w'), default='-', - help="path to the output file. " - "if this flag is not specified, " - "the default is stdout") + help="""path to the output file. + if this flag is not specified, + "the default is stdout""") parser.add_argument('-g', '--debug', action='store_true', help="enable debug mode") diff --git a/pdftocio/app.py b/pdftocio/app.py @@ -19,7 +19,7 @@ def getargs() -> Namespace: app_desc = dedent(""" pdftocio: manipulate the table of contents of a pdf file. - This program can operate in two ways: it can either be used to extract the + This command can operate in two ways: it can either be used to extract the table of contents of a pdf, or add table of contents to a pdf using the output of pdftocgen. diff --git a/pdfxmeta/app.py b/pdfxmeta/app.py @@ -6,36 +6,54 @@ import sys from argparse import Namespace from fitzutils import open_pdf -from textwrap import indent +from textwrap import indent, dedent def getargs() -> Namespace: """parse commandline arguments""" - app_desc = "pdfxmeta: extract metadata for a string in a pdf document." - parser = argparse.ArgumentParser(description=app_desc) + app_desc = dedent(""" + pdfxmeta: extract metadata for a string in a pdf document. + + To use this command, first open up the pdf file your favorite pdf reader + and find the string you want to search for. Then use + + $ pdfxmeta -p 1 in.pdf "Subsection One" + + to find the metadata, mainly the font attributes and bounding box, of lines + containing the query "Subsection One" on page 1. Specifying a page number + is optional but highly recommended, since it greatly reduces the ambiguity + of matches and execution time. + + The output of this command can be directly copy-pasted to build a recipe + file for pdftocgen. + """) + parser = argparse.ArgumentParser( + description=app_desc, + formatter_class=argparse.RawDescriptionHelpFormatter + ) parser.add_argument('input', - metavar='doc.pdf', + metavar='in.pdf', help="path to the input pdf file") parser.add_argument('needle', - help="the string to search for") + help="the query string to search for") parser.add_argument('-p', '--page', action='store', type=int, - help="specify the page in which the string " - "occurs (1-based index)") + help="""specify the page in which the string + occurs (1-based index)""") parser.add_argument('-i', '--ignore-case', action='store_true', - help="when flag is set, search will be " - "case-insensitive") + help="""when flag is set, search will be + case-insensitive""") parser.add_argument('-o', '--out', metavar="file", type=argparse.FileType('w'), default='-', - help="path to the output file. " - "if this flag is not specified, " - "the default is stdout") + help="""path to the output file. + if this flag is not specified, + the default is stdout""") return parser.parse_args() @@ -58,7 +76,6 @@ def main(): with open_pdf(args.input) as doc: meta = pdfxmeta.extract_meta(doc, args.needle, args.page, args.ignore_case) - # nothing found if len(meta) == 0: sys.exit(1)