From 8e5b259956ca7325cd7a69426a736b2a7ef1d0e1 Mon Sep 17 00:00:00 2001 From: Moritz Schubotz <wiki@physikerwelt.de> Date: Sat, 17 Nov 2018 15:26:54 +0100 Subject: [PATCH] Add python GitHub interface * create a test issue * fix typo in encrypted github token --- .travis.yml | 7 +- README.md | 2 +- support/commitUp.sh | 2 +- support/issue.py | 65 ++++ support/parseTeXlog.py | 774 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 847 insertions(+), 3 deletions(-) create mode 100644 support/issue.py create mode 100755 support/parseTeXlog.py diff --git a/.travis.yml b/.travis.yml index 6fff389..66f8bee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,9 @@ +language: python +python: +- '3.6' install: - source ./support/texlive_install.sh +- pip install PyGithub cache: directories: - "/tmp/texlive" @@ -24,7 +28,8 @@ deploy: repo: ag-gipp/bib env: global: - secure: eAJppYPl3FzB2+ntMmSLRh45pJpGSSUx/pWLl0sNbGy8bhVLBFMa9DiURVWJ1YKpKDPhjy/7hYn0BmAq8/fDkpp5DMdZu5E0dCk5Ay+AsZR4aikpWyhGJXHc2WEmxbHF/xTV0/9G/jmAhlznRw0CR6E9S5RB0PFOunBlRDmMPSeICL+V5K/2Yc/7Ha/ljZS/gJlSIwZcVHhlaZSLxdi/CRFSx7VgoTIEgzRBYb82y7VbfwpYC0lTknv5Opg82mpbSIb7AgNzGCjLnoHtXntsGQIS8Jcq/oUyNYPT1wocwH/UPNq29/IKQuczEZzxZp/nJq8D4xNmryRd4iNiL7TdA2nqPz7oxdXZTJZRSWP3EZYw5iubYnN8KeoFrRMMFhT91KrRR6wdUq1WoH0bR7I3EwAR/9VuJ1azAFfA1w/wfDeQuuxmHl5kqNerIXRA/gQIpbDNdRz6piic04PFmXTfy5rRCVc6RHj6+sSaZedokkLq2NG7FgOaFGxaYDPWopWKcPoS0L0vArKHjw/moifYW6mDW3zu0DffUeF8sSXtTCbP99tp8hMUmyFPGCTit4MMECu5zTgSraO3E+lle8KxTvw592juCyELXWRsQvXuoMmt8L/mUFpnapHApLA48yBHDBGYW8Cj0xWEacui/utXGFTVyEFY0wKOnuQxELIvXrI= + - secure: eAJppYPl3FzB2+ntMmSLRh45pJpGSSUx/pWLl0sNbGy8bhVLBFMa9DiURVWJ1YKpKDPhjy/7hYn0BmAq8/fDkpp5DMdZu5E0dCk5Ay+AsZR4aikpWyhGJXHc2WEmxbHF/xTV0/9G/jmAhlznRw0CR6E9S5RB0PFOunBlRDmMPSeICL+V5K/2Yc/7Ha/ljZS/gJlSIwZcVHhlaZSLxdi/CRFSx7VgoTIEgzRBYb82y7VbfwpYC0lTknv5Opg82mpbSIb7AgNzGCjLnoHtXntsGQIS8Jcq/oUyNYPT1wocwH/UPNq29/IKQuczEZzxZp/nJq8D4xNmryRd4iNiL7TdA2nqPz7oxdXZTJZRSWP3EZYw5iubYnN8KeoFrRMMFhT91KrRR6wdUq1WoH0bR7I3EwAR/9VuJ1azAFfA1w/wfDeQuuxmHl5kqNerIXRA/gQIpbDNdRz6piic04PFmXTfy5rRCVc6RHj6+sSaZedokkLq2NG7FgOaFGxaYDPWopWKcPoS0L0vArKHjw/moifYW6mDW3zu0DffUeF8sSXtTCbP99tp8hMUmyFPGCTit4MMECu5zTgSraO3E+lle8KxTvw592juCyELXWRsQvXuoMmt8L/mUFpnapHApLA48yBHDBGYW8Cj0xWEacui/utXGFTVyEFY0wKOnuQxELIvXrI= + - secure: IT1vMl49aqp6eNlJ8XM+Gcio9gg5IwR+278UNuznScaQZ9DyFTmDp75KBNS2PQXMcwcUHFKRTS24pMYSrquN8NakQx7qQtTSTO09dRE0QGSVgeKnGC4kuQTN2Gqgj06Wg+TVJI2f2k7DPPHXZ8FdbcnMUn7vO1pcjGI1Z54hZwOpzJbWV8Qkbz9WzCrg+RZfoDJnw5NTVU3vfvMziVP+NkH1j0hDKl9yPl8FUdcuknnTqhYYElO0AcEB0CtWxxA3S06uCbDTjqd6uY4yGH4SjurMjxGdjQvphp+rd3cnlBLXZojzPC4GqnsnZ7r68NYfeV0UUAtA/qa7JqBPG7XpD6t+2Al4eLvY/kbJzi4BQCZwBKTLa9654eFn1htkQQLwCz3rQiUMA547z7Ob4Y632fm9gG63D5El15sA1LYuuhC1E7gxlfFOFDkPyBWIsy6PsreK+ss6mgnlwBeDYi0TsVA3FrxVSxqIq3BV85dxGW6RbKgBzaReKDXpfm0gy7mCISQiJoT/sBOapWYvutUMXWph1VxRK8GIAFyCdwkVOEGaSOe3uiYoHzKEi+fT3Q+BEDOMMcnBxv1a+VQjrvM7J/9WNMTpUKmPnpX8bwD8/0he6C9kiKNrCpZXXnTDjJmDb5tOuBjuiqB9M6AciD/YiVmDRecJ/PTRDDqldbJOsy8= before_install: - openssl aes-256-cbc -K $encrypted_125c18bb9738_key -iv $encrypted_125c18bb9738_iv -in bibbot.asc.enc -out bibbot.asc -d diff --git a/README.md b/README.md index 9ac7080..b15b4ea 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,6 @@ For the interaction with GitHub independently from travis, we create a [personal access token](https://github.com/settings/tokens) and make it available as environment variable ```bash -travis encrypt $GITHUB_TOKEN=<paste PAT> --add +travis encrypt GITHUB_TOKEN=<paste PAT> --add ``` diff --git a/support/commitUp.sh b/support/commitUp.sh index e6d2fc2..953f5d9 100755 --- a/support/commitUp.sh +++ b/support/commitUp.sh @@ -5,5 +5,5 @@ git config --global user.email "bibbot@ckurs.de" git config --global user.name "BibBot" git config --global user.signingkey "1617C27854592471" git commit -m -S "Reformat bib file" gipp.bib -git remote add http https://bibbot:${GITHUB_TOKEN}@github.com/ag-gipp/bib.git > /dev/null 2>&1 +git remote add http https://${GITHUB_TOKEN}@github.com/ag-gipp/bib.git git push http HEAD:$TRAVIS_BRANCH diff --git a/support/issue.py b/support/issue.py new file mode 100644 index 0000000..f22689b --- /dev/null +++ b/support/issue.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# ST2/ST3 compat +from __future__ import print_function +from parseTeXlog import * +import re +import sys +import os +import os.path +import subprocess +import urllib3 + +urllib3.disable_warnings() +from github import Github + +token = os.environ['GITHUB_TOKEN'] + +# To accommodate both Python 2 and 3 +if sys.version_info >= (3,): + advance_iterator = next +else: + def _advance_iterator(it): + return it.next() + + + advance_iterator = _advance_iterator + +# If invoked from the command line, parse provided log file + +if __name__ == '__main__': + try: + logfilename = '../src/tex/allRefs.log' + if len(sys.argv) == 3: + extra_file_ext = sys.argv[2].split(" ") + data = open(logfilename, 'rb').read() + root_dir = os.path.dirname(logfilename) + errors, warnings, badboxes = parse_tex_log(data, root_dir) + print("") + print("Errors:") + for err in errors: + print(err) + print("") + print("Warnings:") + unrefs = {} + lastfile = '' + issue_content = '' + for warn in warnings: + matchObj = re.match( r'(.*?).tex:(\d+): Package natbib Warning: Citation `(.*?)\' on page (\d+) undefined on input line (\d+).', warn, re.M|re.I) + if matchObj: + label = "\n- [ ] [" + matchObj.group(3) + "](https://github.com/usnistgov/DRMF/blob/drmf/" + matchObj.group(1) + ".tex#L" + matchObj.group(2) + ")" + if (lastfile != matchObj.group(1) ): + issue_content += "\n## " + matchObj.group(1) + " \n" + lastfile = matchObj.group(1); + issue_content += label + unrefs[matchObj.group(3)] = label + # od = collections.OrderedDict(sorted(unrefs.items())) + if token: + g = Github(token) + repo = g.get_repo("ag-gipp/bib") + repo.create_issue("Issue title", issue_content) + # print(unrefs) + print("") + except Exception as e: + import traceback + + traceback.print_exc() diff --git a/support/parseTeXlog.py b/support/parseTeXlog.py new file mode 100755 index 0000000..3f46713 --- /dev/null +++ b/support/parseTeXlog.py @@ -0,0 +1,774 @@ +# ST2/ST3 compat +from __future__ import print_function + +import re +import sys +import os.path + + +# To accommodate both Python 2 and 3 +if sys.version_info >= (3,): + advance_iterator = next +else: + def _advance_iterator(it): + return it.next() + advance_iterator = _advance_iterator + +print_debug = False +interactive = False +extra_file_ext = [] + +def debug(s): + if print_debug: + print(u"parseTeXlog: {0}".format(s)) + +# The following function is only used when debugging interactively. +# +# If file is not found, ask me if we are debugging +# Rationale: if we are debugging from the command line, perhaps we are parsing +# a log file from a user, so apply heuristics and / or ask if the file not +# found is actually legit +# +# Return value: the question is, "Should I skip this file?" Hence: +# True means YES, DO SKIP IT, IT IS NOT A FILE +# False means NO, DO NOT SKIP IT, IT IS A FILE +def debug_skip_file(f, root_dir): + # If we are not debugging, then it's not a file for sure, so skip it + # if not (print_debug or interactive): + if not (interactive or print_debug): + return True + debug("debug_skip_file: " + f) + f_ext = os.path.splitext(f)[1].lower()[1:] + # Heuristic: TeXlive on Mac or Linux (well, Ubuntu at least) or Windows / MiKTeX + # Known file extensions: + known_file_exts = ['tex','sty','cls','cfg','def','mkii','fd','map','clo', 'dfu', + 'ldf', 'bdf', 'bbx','cbx','lbx','dict'] + if (f_ext in known_file_exts) and \ + (("/usr/local/texlive/" in f) or ("/usr/share/texlive/" in f) or ("Program Files\\MiKTeX" in f) \ + or re.search(r"\\MiKTeX(?:\\| )\d\.\d+\\tex",f)) or ("\\MiKTeX\\tex\\" in f): + print ("TeXlive / MiKTeX FILE! Don't skip it!") + return False + if (f_ext in known_file_exts and re.search(r'(\\|/)texmf\1', f, re.I)): + print ("File in TEXMF tree! Don't skip it!") + return False + # Heuristic: "version 2010.12.02" + if re.match(r"version \d\d\d\d\.\d\d\.\d\d", f): + print ("Skip it!") + return True + # Heuristic: TeX Live line + if re.match(r"TeX Live 20\d\d(/Debian)?\) \(format", f): + print ("Skip it!") + return True + # Heuristic: MiKTeX line + if re.match("MiKTeX \d\.\d\d?",f): + print ("Skip it!") + return True + # Heuristic: no two consecutive spaces in file name + if " " in f: + print ("Skip it!") + return True + # Heuristic: various diagnostic messages + if f=='e.g.,' or "ext4): destination with the same identifier" in f or "Kristoffer H. Rose" in f: + print ("Skip it!") + return True + # Heuristic: file in local directory with .tex ending + file_exts = extra_file_ext + ['tex', 'aux', 'bbl', 'cls', 'sty', 'out', 'toc', 'dbx'] + if (f.startswith(root_dir) or f[0:2] in ['./', '.\\', '..']) and f_ext in file_exts: + print ("File! Don't skip it") + return False + + # Heuristic: absolute path that looks like home directory + if f[0] == '/': + if f.split('/')[1] in ['home', 'Users']: + print("Assuming home directory file. Don't skip!") + return False + # N.B. this is not a good technique for detecting the user folder + # on Windows, but is hopefully "good enough" for the common configuration + # (given that this will not usually be run on the computer that generated + # the log) + elif re.match(r'^[A-Z]:\\(?:Documents and Settings|Users)\\', f): + print("Assuming home directory file. Don't skip!") + return False + + if not interactive: + print("Automatically skipping") + return True + + if sys.version_info < (3,): + choice = raw_input() + else: + choice = input() + + if choice == "": + print ("Skip it") + return True + else: + print ("FILE! Don't skip it") + return False + + +# More robust parsing code: October / November 2012 +# Input: tex log file, read in **binary** form, unprocessed +# Output: content to be displayed in output panel, split into lines + +def parse_tex_log(data, root_dir): + debug("Parsing log file") + errors = [] + warnings = [] + badboxes = [] + parsing = [] + + guessed_encoding = 'UTF-8' # for now + + # Split data into lines while in binary form + # Then decode using guessed encoding + # We need the # of bytes per line, not the # of chars (codepoints), to undo TeX's line breaking + # so we construct an array of tuples: + # (decoded line, length of original byte array) + + try: + log = [(l.decode(guessed_encoding, 'ignore'), len(l)) for l in data.splitlines()] + except UnicodeError: + debug("log file not in UTF-8 encoding!") + errors.append("ERROR: your log file is not in UTF-8 encoding.") + errors.append("Sorry, I can't process this file") + return (errors, warnings, badboxes) + + # loop over all log lines; construct error message as needed + # This will be useful for multi-file documents + + # some regexes + # file_rx = re.compile(r"\(([^)]+)$") # OLD + # Structure (+ means captured, - means not captured) + # + maybe " (for Windows) + # + maybe a drive letter and : (for Windows) + # + maybe . NEW: or ../ or ..\, with repetitions + # + then any char, matched NON-GREEDILY (avoids issues with multiple files on one line?) + # + then . + # + then any char except for whitespace or " or ); at least ONE such char + # + then maybe " (on Windows/MikTeX) + # - then whitespace or ), or end of line + # + then anything else, captured for recycling + # This should take care of e.g. "(./test.tex [12" or "(./test.tex (other.tex" + # NOTES: + # 1. we capture the initial and ending " if there is one; we'll need to remove it later + # 2. we define the basic filename parsing regex so we can recycle it + # 3. we allow for any character besides "(" before a file name starts. This gives a lot of + # false positives but we kill them with os.path.isfile + file_basic = r"\"?(?:[a-zA-Z]\:)?(?:\.|(?:\.\./)|(?:\.\.\\))*.+?\.[^\s\"\)\.]+\"?" + file_rx = re.compile(r"[^\(]*?\((" + file_basic + r")(\s|\"|\)|$)(.*)") + # Useless file #1: {filename.ext}; capture subsequent text + # Will avoid nested {'s as these can't really appear, except if file names have braces + # which is REALLY bad!!! + file_useless1_rx = re.compile(r"\{\"?(?:\.|\.\./)*[^\.]+\.[^\{\}]*\"?\}(.*)") + # Useless file #2: <filename.ext>; capture subsequent text + file_useless2_rx = re.compile(r"<\"?(?:\.|\.\./)*[^\.]+\.[^>]*\"?>(.*)") + # attempt to filter out log messages like this: + # (package) continued warning... + # from being considered files + file_badmatch_rx = re.compile(r"^\s*\([a-zA-Z]+\)\s{4,}.+") + pagenum_begin_rx = re.compile(r"\s*\[\d*(.*)") + line_rx = re.compile(r"^l\.(\d+)\s(.*)") # l.nn <text> + + warning_rx = re.compile(r"^(.*?) Warning: (.+)") # Warnings, first line + line_rx_latex_warn = re.compile(r"input line (\d+)\..*") # Warnings, line number + + badbox_rx = re.compile(r"^(.*?)Overfull (.*)") # Bad box warning + line_rx_latex_badbox = re.compile(r"lines (\d+)--(.*?)") # Bad box lines + matched_parens_rx = re.compile(r"\([^()]*\)") # matched parentheses, to be deleted (note: not if nested) + assignment_rx = re.compile(r"\\[^=]*=") # assignment, heuristics for line merging + # Special case: the xy package, which reports end of processing with "loaded)" or "not reloaded)" + xypic_begin_rx = re.compile(r"[^()]*?(?:not re)?loaded\)(.*)") + xypic_rx = re.compile(r".*?(?:not re)?loaded\)(.*)") + # Special case: the comment package, which prints ")" after some text + comment_rx = re.compile(r"Excluding comment '.*?'(.*)") + + files = [] + xypic_flag = False # If we have seen xypic, report a warning, not an error for incorrect parsing + + # Support function to handle warnings + def handle_warning(l): + if files==[]: + location = "[no file]" + parsing.append("PERR [handle_warning no files] " + l) + debug("PERR [handle_warning no files] (%d)" % (line_num,)) + else: + location = files[-1] + + warn_match_line = line_rx_latex_warn.search(l) + + if warn_match_line: + warn_line = warn_match_line.group(1) + warnings.append(location + ":" + warn_line + ": " + l) + else: + warnings.append(location + ": " + l) + + # Support function to handle bad boxes + def handle_badbox(l): + if files==[]: + location = "[no file]" + parsing.append("PERR [handle_badbox no files] " + l) + debug("PERR [handle_badbox no files] (%d)" % (line_num,)) + else: + location = files[-1] + + badbox_match_line = line_rx_latex_badbox.search(l) + + if badbox_match_line: + badbox_line = badbox_match_line.group(1) + badboxes.append(location + ":" + badbox_line + ": " + l) + else: + badboxes.append(location + ": " + l) + + # State definitions + STATE_NORMAL = 0 + STATE_SKIP = 1 + STATE_REPORT_ERROR = 2 + STATE_REPORT_WARNING = 3 + + state = STATE_NORMAL + + # Use our own iterator instead of for loop + log_iterator = log.__iter__() + line_num = 0 + line = "" + linelen = 0 + + recycle_extra = False # Should we add extra to newly read line? + reprocess_extra = False # Should we reprocess extra, without reading a new line? + emergency_stop = False # If TeX stopped processing, we can't pop all files + incomplete_if = False # Ditto if some \if... statement is not complete + + while True: + # first of all, see if we have a line to recycle (see heuristic for "l.<nn>" lines) + if recycle_extra: + line, linelen = extra, extralen + recycle_extra = False + line_num += 1 + elif reprocess_extra: + line = extra # NOTE: we must remember that we are reprocessing. See long-line heuristics + else: # we read a new line + # save previous line for "! File ended while scanning use of..." message + prev_line = line + try: + line, linelen = advance_iterator(log_iterator) # will fail when no more lines + line_num += 1 + except StopIteration: + break + # Now we deal with TeX's decision to truncate all log lines at 79 characters + # If we find a line of exactly 79 characters, we add the subsequent line to it, and continue + # until we find a line of less than 79 characters + # The problem is that there may be a line of EXACTLY 79 chars. We keep our fingers crossed but also + # use some heuristics to avoid disastrous consequences + # We are inspired by latexmk (which has no heuristics, though) + + # HEURISTIC: the first line is always long, and we don't care about it + # also, the **<file name> line may be long, but we skip it, too (to avoid edge cases) + # We make sure we are NOT reprocessing a line!!! + # Also, we make sure we do not have a filename match, or it would be clobbered by exending! + if (not reprocess_extra) and line_num > 1 and linelen >= 79 and line[0:2] != "**": + debug ("Line %d is %d characters long; last char is %s" % (line_num, len(line), line[-1])) + # HEURISTICS HERE + extend_line = True + recycle_extra = False + # HEURISTIC: check first if we just have a long "(.../file.tex" (or similar) line + # A bit inefficient as we duplicate some of the code below for filename matching + file_match = file_rx.match(line) + if file_match: + if line.startswith('runsystem') or file_badmatch_rx.match(line): + debug("Ignoring possible file: " + line) + file_match = False + + if file_match: + debug("MATCHED (long line)") + file_name = file_match.group(1) + + # remove quotes if necessary, but first save the count for a later check + quotecount = file_name.count("\"") + file_name = file_name.replace("\"", "") + + # Normalize the file path + file_name = os.path.normpath(file_name) + if not os.path.isabs(file_name): + file_name = os.path.normpath(os.path.join(root_dir, file_name)) + + file_extra = file_match.group(2) + file_match.group(3) # don't call it "extra" + + # NOTE: on TL201X pdftex sometimes writes "pdfTeX warning" right after file name + # This may or may not be a stand-alone long line, but in any case if we + # extend, the file regex will fire regularly + if file_name[-6:] == "pdfTeX" and file_extra[:8] == " warning": + debug("pdfTeX appended to file name, extending") + # Else, if the extra stuff is NOT ")" or "", we have more than a single + # file name, so again the regular regex will fire + elif file_extra not in [")", ""]: + debug("additional text after file name, extending") + # If we have exactly ONE quote, we are on Windows but we are missing the final quote + # in which case we extend, because we may be missing parentheses otherwise + elif quotecount==1: + debug("only one quote, extending") + # Now we have a long line consisting of a potential file name alone + # Check if it really is a file name + elif (not os.path.isfile(file_name)) and debug_skip_file(file_name, root_dir): + debug("Not a file name") + else: + debug("IT'S A (LONG) FILE NAME WITH NO EXTRA TEXT") + extend_line = False # so we exit right away and continue with parsing + + while extend_line: + debug("extending: " + line) + try: + # different handling for Python 2 and 3 + extra, extralen = advance_iterator(log_iterator) + debug("extension? " + extra) + line_num += 1 # for debugging purposes + # HEURISTIC: if extra line begins with "Package:" "File:" "Document Class:", + # or other "well-known markers", + # we just had a long file name, so do not add + if extralen > 0 and ( + extra[0:5] == "File:" or + extra[0:8] == "Package:" or + extra[0:11] == "Dictionary:" or + extra[0:15] == "Document Class:" + ) or ( + extra[0:9] == "LaTeX2e <" or + assignment_rx.match(extra) + ): + extend_line = False + # no need to recycle extra, as it's nothing we are interested in + # HEURISTIC: when TeX reports an error, it prints some surrounding text + # and may use the whole line. Then it prints "...", and "l.<nn> <text>" on a new line + # pdftex warnings also use "..." at the end of a line. + # If so, do not extend + elif line[-3:]=="...": # and line_rx.match(extra): # a bit inefficient as we match twice + debug("Found [...]") + extend_line = False + recycle_extra = True # make sure we process the "l.<nn>" line! + # unsure about this... + # if the "extra" (next line) starts with a ( and we already have a + # valid file, this likely starts something else we need to + # process as a file, so add a space... + elif extralen > 0 and extra[0] == '(' and ( + os.path.isfile(file_name) or not debug_skip_file(file_name, root_dir) + ): + line += " " + extra + debug("Extended: " + line) + linelen += extralen + 1 + if extralen < 79: + extend_line = False + else: + line += extra + debug("Extended: " + line) + linelen += extralen + if extralen < 79: + extend_line = False + except StopIteration: + extend_line = False # end of file, so we must be done. This shouldn't happen, btw + + # NOW WE GOT OUR EXTENDED LINE, SO START PROCESSING + + # We may skip the above "if" because we are reprocessing a line, so reset flag: + reprocess_extra = False + # Check various states + if state==STATE_SKIP: + state = STATE_NORMAL + continue + if state==STATE_REPORT_ERROR: + # skip everything except "l.<nn> <text>" + debug("Reporting error in line: " + line) + # We check for emergency stops here, too, because it may occur before the l.nn text + if "! Emergency stop." in line: + emergency_stop = True + debug("Emergency stop found") + continue + err_match = line_rx.match(line) + if not err_match: + continue + # now we match! + # state = STATE_NORMAL + # TeX splits the error line in two, so we skip the + # second part. In the future we may want to capture that, too + # and figure out the column, perhaps. + state = STATE_SKIP + err_line = err_match.group(1) + err_text = err_match.group(2) + # err_msg is set from last time + if files==[]: + location = "[no file]" + parsing.append("PERR [STATE_REPORT_ERROR no files] " + line) + debug("PERR [STATE_REPORT_ERROR no files] (%d)" % (line_num,)) + else: + location = files[-1] + debug("Found error: " + err_msg) + errors.append(location + ":" + err_line + ": " + err_msg + " [" + err_text + "]") + continue + if state == STATE_REPORT_WARNING: + # add current line and check if we are done or not + current_warning += line + if len(line) == 0 or line[-1] == '.': + handle_warning(current_warning) + current_warning = None + state = STATE_NORMAL # otherwise the state stays at REPORT_WARNING + continue + if line=="": + continue + + # Sometimes an \if... is not completed; in this case some files may remain on the stack + # I think the same format may apply to different \ifXXX commands, so make it flexible + if len(line)>0 and line.strip()[:23]=="(\\end occurred when \\if" and \ + line.strip()[-15:]=="was incomplete)": + incomplete_if = True + debug(line) + + # Skip things that are clearly not file names, though they may trigger false positives + if len(line) > 0 and ( + line[0:5] == "File:" or + line[0:8] == "Package:" or + line[0:11] == "Dictionary:" or + line[0:15 ] == "Document Class:" + ) or ( + line[0:9] == "LaTeX2e <" or assignment_rx.match(line) + ): + continue + + # Are we done? Get rid of extra spaces, just in case (we may have extended a line, etc.) + if line.strip() == "Here is how much of TeX's memory you used:": + if len(files)>0: + if emergency_stop or incomplete_if: + debug("Done processing, files on stack due to known conditions (all is fine!)") + elif xypic_flag: + parsing.append("PERR [files on stack (xypic)] " + ";".join(files)) + else: + parsing.append("PERR [files on stack] " + ";".join(files)) + files=[] + # break + # We cannot stop here because pdftex may yet have errors to report. + + # Special error reporting for e.g. \footnote{text NO MATCHING PARENS & co + if "! File ended while scanning use of" in line: + scanned_command = line[35:-2] # skip space and period at end + # we may be unable to report a file by popping it, so HACK HACK HACK + file_name, linelen = advance_iterator(log_iterator) # <inserted text> + file_name, linelen = advance_iterator(log_iterator) # \par + file_name, linelen = advance_iterator(log_iterator) + file_name = file_name[3:] # here is the file name with <*> in front + errors.append("TeX STOPPED: " + line[2:-2]+prev_line[:-5]) + errors.append("TeX reports the error was in file:" + file_name) + continue + + # Here, make sure there was no uncaught error, in which case we do more special processing + # This will match both tex and pdftex Fatal Error messages + if "==> Fatal error occurred," in line: + debug("Fatal error detected") + if errors == []: + errors.append("TeX STOPPED: fatal errors occurred. Check the TeX log file for details") + continue + + # If tex just stops processing, we will be left with files on stack, so we keep track of it + if "! Emergency stop." in line: + state = STATE_SKIP + emergency_stop = True + debug("Emergency stop found") + continue + + # TOo many errors: will also have files on stack. For some reason + # we have to do differently from above (need to double-check: why not stop processing if + # emergency stop, too?) + if "(That makes 100 errors; please try again.)" in line: + errors.append("Too many errors. TeX stopped.") + debug("100 errors, stopping") + break + + # catch over/underfull + # skip everything for now + # Over/underfull messages end with [] so look for that + if line[0:8] == "Overfull" or line[0:9] == "Underfull": + + current_badbox = line; + if line[-2:]=="[]": # one-line over/underfull message + handle_badbox(current_badbox) + continue + + ou_processing = True + while ou_processing: + try: + line, linelen = advance_iterator(log_iterator) # will fail when no more lines + except StopIteration: + debug("Over/underfull: StopIteration (%d)" % line_num) + break + line_num += 1 + debug("Over/underfull: skip " + line + " (%d) " % line_num) + # Sometimes it's " []" and sometimes it's "[]"... +# if len(line)>0 and line[:3] == " []" or line[:2] == "[]": + # NO, it really should be just " []" + if len(line)>0 and line == " []": + ou_processing = False + else: + current_badbox += line + + if ou_processing: + warnings.append("Malformed LOG file: over/underfull") + warnings.append("Please let me know via GitHub") + break + else: + handle_badbox(current_badbox) + continue + + # Special case: the bibgerm package, which has comments starting and ending with + # **, and then finishes with "**)" + if len(line)>0 and line[:2] == "**" and line[-3:] == "**)" \ + and files and "bibgerm" in files[-1]: + debug("special case: bibgerm") + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + f = files.pop() + debug(u"Popped file: {0} ({1})".format(f, line_num)) + continue + + # Special case: the relsize package, which puts ")" at the end of a + # line beginning with "Examine \". Ah well! + if len(line)>0 and line[:9] == "Examine \\" and line[-3:] == ". )" \ + and files and "relsize" in files[-1]: + debug("special case: relsize") + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + f = files.pop() + debug(u"Popped file: {0} ({1})".format(f, line_num)) + continue + + # Special case: the comment package, which puts ")" at the end of a + # line beginning with "Excluding comment 'something'" + # Since I'm not sure, we match "Excluding comment 'something'" and recycle the rest + comment_match = comment_rx.match(line) + if comment_match and files and "comment" in files[-1]: + debug("special case: comment") + extra = comment_match.group(1) + debug("Reprocessing " + extra) + reprocess_extra = True + continue + + # Special case: the numprint package, which prints a line saying + # "No configuration file... found.)" + # if there is no config file (duh!), and that (!!!) signals the end of processing :-( + + if len(line)>0 and line.strip() == "No configuration file `numprint.cfg' found.)" \ + and files and "numprint" in files[-1]: + debug("special case: numprint") + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + f = files.pop() + debug(u"Popped file: {0} ({1})".format(f, line_num)) + continue + + # Special case: xypic's "loaded)" at the BEGINNING of a line. Will check later + # for matches AFTER other text. + xypic_match = xypic_begin_rx.match(line) + if xypic_match: + debug("xypic match before: " + line) + # Do an extra check to make sure we are not too eager: is the topmost file + # likely to be an xypic file? Look for xypic in the file name + if files and "xypic" in files[-1]: + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + f = files.pop() + debug(u"Popped file: {0} ({1})".format(f, line_num)) + extra = xypic_match.group(1) + debug("Reprocessing " + extra) + reprocess_extra = True + continue + else: + debug("Found loaded) but top file name doesn't have xy") + + # mostly these are caused by hyperref and re-using internal identifiers + if "pdfTeX warning (ext4): destination with the same identifier" in line: + # add warning + handle_warning(line[line.find("destination with the same identifier"):]) + continue + + line = line.strip() # get rid of initial spaces + # note: in the next line, and also when we check for "!", we use the fact that "and" short-circuits + # denotes end of processing of current file: pop it from stack + if len(line) > 0 and line[0] == ')': + if files: + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + f = files.pop() + debug(u"Popped file: {0} ({1})".format(f, line_num)) + extra = line[1:] + debug("Reprocessing " + extra) + reprocess_extra = True + continue + else: + parsing.append("PERR [')' no files]") + debug("PERR [')' no files] (%d)" % (line_num,)) + break + + # Opening page indicators: skip and reprocess + # Note: here we look for matches at the BEGINNING of a line. We check again below + # for matches elsewhere, but AFTER matching for file names. + pagenum_begin_match = pagenum_begin_rx.match(line) + if pagenum_begin_match: + extra = pagenum_begin_match.group(1) + debug("Reprocessing " + extra) + reprocess_extra = True + continue + + # Closing page indicators: skip and reprocess + # Also, sometimes we have a useless file <file.tex, then a warning happens and the + # last > appears later. Pick up such stray >'s as well. + if len(line)>0 and line[0] in [']', '>']: + extra = line[1:] + debug("Reprocessing " + extra) + reprocess_extra = True + continue + + # Useless file matches: {filename.ext} or <filename.ext>. We just throw it out + file_useless_match = file_useless1_rx.match(line) or file_useless2_rx.match(line) + if file_useless_match: + extra = file_useless_match.group(1) + debug("Useless file: " + line) + debug("Reprocessing " + extra) + reprocess_extra = True + continue + + + # this seems to happen often: no need to push / pop it + if line[:12]=="(pdftex.def)": + continue + + # Now we should have a candidate file. We still have an issue with lines that + # look like file names, e.g. "(Font) blah blah data 2012.10.3" but those will + # get killed by the isfile call. Not very efficient, but OK in practice + debug("FILE? Line:" + line) + file_match = file_rx.match(line) + if file_match: + if line.startswith('runsystem') or file_badmatch_rx.match(line): + debug("Ignoring possible file: " + line) + file_match = False + + if file_match: + debug("MATCHED") + file_name = file_match.group(1) + file_name = os.path.normpath(file_name.strip('"')) + + if not os.path.isabs(file_name): + file_name = os.path.normpath(os.path.join(root_dir, file_name)) + + extra = file_match.group(2) + file_match.group(3) + # remove quotes if necessary + file_name = file_name.replace("\"", "") + # on TL2011 pdftex sometimes writes "pdfTeX warning" right after file name + # so fix it + # TODO: report pdftex warning + if file_name[-6:]=="pdfTeX" and extra[:8]==" warning": + debug("pdfTeX appended to file name; removed") + file_name = file_name[:-6] + extra = "pdfTeX" + extra + # This kills off stupid matches + if (not os.path.isfile(file_name)) and debug_skip_file(file_name, root_dir): + #continue + # NOTE BIG CHANGE HERE: CONTINUE PROCESSING IF NO MATCH + pass + else: + debug("IT'S A FILE!") + files.append(file_name) + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + # Check if it's a xypic file + if (not xypic_flag) and "xypic" in file_name: + xypic_flag = True + debug("xypic detected, demoting parsing error to warnings") + # now we recycle the remainder of this line + debug("Reprocessing " + extra) + reprocess_extra = True + continue + + # Special case: match xypic's " loaded)" markers + # You may think we already checked for this. But, NO! We must check both BEFORE and + # AFTER looking for file matches. The problem is that we + # may have the " loaded)" marker either after non-file text, or after a loaded + # file name. Aaaarghh!!! + xypic_match = xypic_rx.match(line) + if xypic_match: + debug("xypic match after: " + line) + # Do an extra check to make sure we are not too eager: is the topmost file + # likely to be an xypic file? Look for xypic in the file name + if files and "xypic" in files[-1]: + debug(" "*len(files) + files[-1] + " (%d)" % (line_num,)) + f = files.pop() + debug(u"Popped file: {0} ({1})".format(f, line_num)) + extra = xypic_match.group(1) + debug("Reprocessing " + extra) + reprocess_extra = True + continue + else: + debug("Found loaded) but top file name doesn't have xy") + + if len(line)>0 and line[0]=='!': # Now it's surely an error + debug("Error found: " + line) + # If it's a pdftex error, it's on the current line, so report it + if "pdfTeX error" in line: + err_msg = line[1:].strip() # remove '!' and possibly spaces + # This may or may not have a file location associated with it. + # Be conservative and do not try to report one. + errors.append(err_msg) + errors.append("Check the TeX log file for more information") + continue + # Now it's a regular TeX error + err_msg = line[2:] # skip "! " + # next time around, err_msg will be set and we'll extract all info + state = STATE_REPORT_ERROR + continue + + # Second match for opening page numbers. We now use "search" which matches + # everywhere, not just at the beginning. We do so AFTER matching file names so we + # don't miss any. + pagenum_begin_match = pagenum_begin_rx.search(line) + if pagenum_begin_match: + debug("Matching [xx after some text") + extra = pagenum_begin_match.group(1) + debug("Reprocessing " + extra) + reprocess_extra = True + continue + + warning_match = warning_rx.match(line) + if warning_match: + # if last character is a dot, it's a single line + if line[-1] == '.': + handle_warning(line) + continue + # otherwise, accumulate it + current_warning = line + state = STATE_REPORT_WARNING + continue + + # If there were parsing issues, output them to debug + if parsing: + warnings.append("(Log parsing issues. Disregard unless something else is wrong.)") + print_debug = True + for l in parsing: + debug(l) + return (errors, warnings, badboxes) + + +# If invoked from the command line, parse provided log file + +if __name__ == '__main__': + print_debug = True + interactive = True + try: + logfilename = sys.argv[1] + if len(sys.argv) == 3: + extra_file_ext = sys.argv[2].split(" ") + data = open(logfilename, 'rb').read() + root_dir = os.path.dirname(logfilename) + errors, warnings, badboxes = parse_tex_log(data, logfilename) + print("") + print("Errors:") + for err in errors: + print(err) + print("") + print ("Warnings:") + for warn in warnings: + print(warn) + print("") + print("Bad boxes:") + for box in badboxes: + print(box) + except Exception as e: + import traceback + traceback.print_exc() \ No newline at end of file -- GitLab