pdf.tocgen - add usage for all commands

commit a7c5ede45301f2d00603b2be23f1dbb4cd91450e
parent 49b6cd3461b6146e5b2c74f18c278271b377b226
Author: krasjet
Date: 2020-07-27 11:42Z

add usage for all commands

Diffstat:
M pdftocgen/__init__.py  | 2 +-
M pdftocgen/app.py  | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------
M pdftocio/app.py  | 2 +-
M pdfxmeta/app.py  | 43 ++++++++++++++++++++++++++++++-------------

4 files changed, 80 insertions(+), 21 deletions(-)
diff --git a/pdftocgen/__init__.py b/pdftocgen/__init__.py
@@ -1,3 +1,3 @@
-"""Generate table of contents for pdf based on recipe file"""
+"""Generate table of contents for pdf based on a recipe file"""
 
 __version__ = '0.1.0'
diff --git a/pdftocgen/app.py b/pdftocgen/app.py
@@ -7,16 +7,58 @@ import sys
 from argparse import Namespace
 from fitzutils import open_pdf, dump_toc, pprint_toc
 from .tocgen import gen_toc
+from textwrap import dedent
 
 
 def getargs() -> Namespace:
     """parse commandline arguments"""
 
-    app_desc = "pdftocgen: generate pdf table of contents from a recipe file."
-    parser = argparse.ArgumentParser(description=app_desc)
+    app_desc = dedent("""
+    pdftocgen: generate pdf table of contents from a recipe file.
+
+    This command automatically generates a table of contents for a pdf file
+    based on the font attributes and position of headings, which are specified
+    in a TOML recipe file. See the README for an introduction to the recipe
+    file.
+
+    To generate the table of contents for a pdf, use input redirection or pipes
+    to supply the recipe file
+
+        $ pdftocgen in.pdf < recipe.toml
+
+    or alternatively use the -r flag
+
+        $ pdftocgen -r recipe.toml in.pdf
+
+    The output of this command can be directly piped into pdftocio to generate
+    a new pdf file using the generated table of contents
+
+        $ pdftocgen -r recipe.toml in.pdf | pdftocio -o out.pdf in.pdf
+
+    or you could save the output of this command to a file for further
+    tweaking using output redirection
+
+        $ pdftocgen -r recipe.toml in.pdf > toc
+
+    or the -o flag
+
+        $ pdftocgen -r recipe.toml -o toc in.pdf
+
+    If you only need a readable format of the table of contents, use the -H
+    flag
+
+        $ pdftocgen -r recipe.toml -H in.pdf
+
+    This format cannot be parsed by pdftocio, but it is slightly more readable.
+
+    """)
+    parser = argparse.ArgumentParser(
+        description=app_desc,
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
 
     parser.add_argument('input',
-                        metavar='doc.pdf',
+                        metavar='in.pdf',
                         help="path to the input pdf document")
     parser.add_argument('-r', '--recipe',
                         metavar='recipe.toml',
@@ -37,9 +79,9 @@ def getargs() -> Namespace:
                         metavar="file",
                         type=argparse.FileType('w'),
                         default='-',
-                        help="path to the output file. "
-                        "if this flag is not specified, "
-                        "the default is stdout")
+                        help="""path to the output file.
+                        if this flag is not specified,
+                        "the default is stdout""")
     parser.add_argument('-g', '--debug',
                         action='store_true',
                         help="enable debug mode")
diff --git a/pdftocio/app.py b/pdftocio/app.py
@@ -19,7 +19,7 @@ def getargs() -> Namespace:
     app_desc = dedent("""
     pdftocio: manipulate the table of contents of a pdf file.
 
-    This program can operate in two ways: it can either be used to extract the
+    This command can operate in two ways: it can either be used to extract the
     table of contents of a pdf, or add table of contents to a pdf using the
     output of pdftocgen.
 
diff --git a/pdfxmeta/app.py b/pdfxmeta/app.py
@@ -6,36 +6,54 @@ import sys
 
 from argparse import Namespace
 from fitzutils import open_pdf
-from textwrap import indent
+from textwrap import indent, dedent
 
 
 def getargs() -> Namespace:
     """parse commandline arguments"""
 
-    app_desc = "pdfxmeta: extract metadata for a string in a pdf document."
-    parser = argparse.ArgumentParser(description=app_desc)
+    app_desc = dedent("""
+    pdfxmeta: extract metadata for a string in a pdf document.
+
+    To use this command, first open up the pdf file your favorite pdf reader
+    and find the string you want to search for. Then use
+
+        $ pdfxmeta -p 1 in.pdf "Subsection One"
+
+    to find the metadata, mainly the font attributes and bounding box, of lines
+    containing the query "Subsection One" on page 1. Specifying a page number
+    is optional but highly recommended, since it greatly reduces the ambiguity
+    of matches and execution time.
+
+    The output of this command can be directly copy-pasted to build a recipe
+    file for pdftocgen.
+    """)
+    parser = argparse.ArgumentParser(
+        description=app_desc,
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
 
     parser.add_argument('input',
-                        metavar='doc.pdf',
+                        metavar='in.pdf',
                         help="path to the input pdf file")
     parser.add_argument('needle',
-                        help="the string to search for")
+                        help="the query string to search for")
     parser.add_argument('-p', '--page',
                         action='store',
                         type=int,
-                        help="specify the page in which the string "
-                             "occurs (1-based index)")
+                        help="""specify the page in which the string
+                             occurs (1-based index)""")
     parser.add_argument('-i', '--ignore-case',
                         action='store_true',
-                        help="when flag is set, search will be "
-                        "case-insensitive")
+                        help="""when flag is set, search will be
+                        case-insensitive""")
     parser.add_argument('-o', '--out',
                         metavar="file",
                         type=argparse.FileType('w'),
                         default='-',
-                        help="path to the output file. "
-                        "if this flag is not specified, "
-                        "the default is stdout")
+                        help="""path to the output file.
+                        if this flag is not specified,
+                        the default is stdout""")
 
     return parser.parse_args()
 
@@ -58,7 +76,6 @@ def main():
     with open_pdf(args.input) as doc:
         meta = pdfxmeta.extract_meta(doc, args.needle,
                                      args.page, args.ignore_case)
-
         # nothing found
         if len(meta) == 0:
             sys.exit(1)

M	pdftocgen/__init__.py	\|	2	+-
M	pdftocgen/app.py	\|	54	++++++++++++++++++++++++++++++++++++++++++++++++------
M	pdftocio/app.py	\|	2	+-
M	pdfxmeta/app.py	\|	43	++++++++++++++++++++++++++++++-------------