"""
a klusterbox module: Klusterbox Converter for Employee Everything Reports from PDF to CSV format
this module contains the pdf converter which reads employee everything reports in the pdf format and converts them
into csv formatted employee everything reports which can be read by the automatic data entry, auto overmax finder and
the employee everything reader.
"""
from kbtoolbox import inquire, dir_filedialog, find_pp, PdfConverterFix, titlebar_icon, Convert, ProgressBarDe
# Standard Libraries
from tkinter import messagebox, filedialog, ttk, Label, Tk
from datetime import timedelta
import os
import csv
from io import StringIO  # change from cStringIO to io for py 3x
import time
import re
import fitz  # named PyMuPDF in requirements
# PDF Converter Libraries
from pdfminer.pdfparser import PDFParser  # named pdfminer.six in requirements
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, resolve1
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage


class PdfConverter:
    """ take a weekly employee everything report and convert it into the csv format. """

    def __init__(self):
        self.frame = None
        self.gen_error_report = None  # option to generate error report
        self.gen_raw_report = None  # option to generate raw text file from pdf miner output
        self.allow_txt_reader = None  # directs the pdf converter to read from text file instead of pdf file
        self.station = None
        self.starttime = None
        self.endtime = None
        self.text = None  # text generated by pdfminer or a text file of pdfminer output
        self.pages = None  # an array of pages.
        self.file_path = None  # the input file with path
        self.new_file_path = None  # the new output file name with the path
        self.short_file_name = None  # the new output file name without the path
        self.kbpc_rpt = None
        self.kbpc_rpt_file_path = None
        self.kbpc_raw_rpt_file_path = None
        self.writer = None
        self.validtext = False
        self.csv_doc = None
        self.csv_count = 1  # counts the number of csv files generated
        self.multi_csv = []  # an array of csv file names
        self.pb = None  # progress bar object
        self.pbi = 0  # progress bar count index
        self.movecode_holder = None
        self.date_holder = []
        self.underscore_slash_result = None
        self.yyppwk = None
        self.page_num = 1  # initialize var to count pages
        self.eid_count = 0  # initialize var to count underscore dash items
        self.daily_underscoreslash = []
        self.mv_holder = []
        self.time_holder = []
        self.timezone_holder = []
        self.finance_holder = []
        self.foundday_holder = []
        self.daily_array = []
        self.franklin_array = []
        self.mv_desigs = ("BT", "MV", "ET", "OT", "OL", "IL", "DG")
        self.days = ("Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
        self.saved_pp = ""  # hold the pp to identify if it changes
        self.pp_days = []  # array of date/time objs for each day in the week
        self.found_days = []  # array for holding days worked
        self.alt_founddays = []  # array for holding days worked - found by alternate method.
        self.base_time = []  # array for holding hours worked during the day
        self.eid = ""  # hold the employee id
        self.prime_info = ""  # the first part of the csv line
        self.lastname = ""  # holds the last name of the employee
        self.fi = ""
        self.jobs = []  # holds the d/a code
        self.jobs_alt = []  # holds the d/a code if it appears in an altered format
        self.routes = []  # holds the route
        self.level = []  # hold the level (one or two normally)
        self.base_temp = ("Base", "Temp")
        self.eid_label = False
        self.lookforname = False
        self.lookforfi = False
        self.lookforroute = False
        self.lookfor2route = False
        self.lookforlevel = False
        self.lookfor2level = False
        self.base_counter = 0
        self.base_chg = 0
        self.lookfortimes = False
        self.unprocessedrings = ""
        self.new_page = False
        self.unprocessed_counter = 0
        self.mcgrath_indicator = False
        self.mcgrath_carryover = ""
        self.rod_rpt = []  # error reports
        self.frank_rpt = []
        self.rose_rpt = []
        self.robert_rpt = []
        self.stevens_rpt = []
        self.carroll_rpt = []
        self.nguyen_rpt = []
        self.salih_rpt = []
        self.unruh_rpt = []
        self.mcgrath_rpt = []
        self.levelindexerror_rpt = []
        self.routesindexerror_rpt = []
        # denton error - employee id not showing up till end of page causes error with found days.
        self.denton_rpt = []
        self.unresolved = []
        self.basecounter_error = []
        self.failed = []
        self.daily_array_days = []  # build an array of formatted days with just month/ day
        self.csv_sat = []
        self.csv_sun = []
        self.csv_mon = []
        self.csv_tue = []
        self.csv_wed = []
        self.csv_thr = []
        self.csv_fri = []
        self.csv_output = []
        self.page = None  # the pdfminer text document of a page
        csv.register_dialect('myDialect',
                             delimiter=',',
                             quoting=csv.QUOTE_NONE,
                             skipinitialspace=True,
                             lineterminator="\r",
                             escapechar='!'
                             )

    def run(self, frame):
        """ this is a master method for running the other methods in the proper order. """
        self.frame = frame
        # run the process to get settings, create the csv, error report and check the text
        if not self.StartUp(self).run():
            return  # return if there no file selected or if there is an error
        if not self.PageAnalysis(self).run():  # will analyse groups and lines in loops
            return  # return if a file being written to is open.
        # this class finishes up by generating an summary at the end of the error report and showing one or two
        #         messageboxes.
        self.FinishUp(self).run()

    def get_path(self, add_on, extension):
        """ generate csv file name and path """
        file_parts = self.file_path.split("/")  # split path into folders and file
        file_name_xten = file_parts[len(file_parts) - 1]  # get the file name from the end of the path
        file_name = file_name_xten[:-4]  # remove the file extension from the file name
        file_name = file_name.replace("_raw_kbpc", "")
        path = self.file_path[:-len(file_name_xten)]  # get the path back to the source folder
        new_fname = file_name + add_on  # add suffix to to show converted pdf to csv
        new_file_path = path + new_fname + extension  # new path with modified file name
        return new_file_path

    def get_shortname(self):
        """ get the last part of the file name"""
        file_parts = self.new_file_path.split("/")  # split path into folders and file
        file_name_xten = file_parts[len(file_parts) - 1]  # get the file name from the end of the path
        return file_name_xten

    def pdf_converter_reorder_founddays(self):
        """ makes sure the days are in the proper order. """
        new_order = []
        correct_series = ("Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
        for cs in correct_series:
            if cs in self.found_days:
                new_order.append(cs)
        return new_order

    def create_new_file(self):
        """ create the new csv file that is generated by the pdf converter. """
        # create the csv file and write the first line
        line = ["TAC500R3 - Employee Everything Report"]
        try:  # handle error if the file is already open
            self.csv_doc = open(self.new_file_path, "w")
        except PermissionError:
            messagebox.showerror("PDF Converter",
                                 "The file written to must be closed. Close {}, then rerun "
                                 "Automatic Data Entry".format(self.new_file_path),
                                 parent=self.frame)
            try:  # handle error if the files don't exist
                self.csv_doc.close()  # close the csv document before returning
                if self.gen_error_report:
                    self.kbpc_rpt.close()  # close the error report before returning
            except (PermissionError, AttributeError):
                pass
            return False
        self.writer = csv.writer(self.csv_doc, dialect='myDialect')
        self.writer.writerow(line)
        # define csv writer parameters
        line = ["YrPPWk", "Finance No", "Organization Name", "Sub-Unit", "Employee Id", "Last Name",
                "FI", "MI", "Pay Loc/Fin Unit", "Var. EAS", "Borrowed", "Auto H/L", "Annual Lv Bal",
                "Sick Lv Bal", "LWOP Lv Bal", "FMLA Hrs", "FMLA Used", "SLDC Used", "Job", "D/A", "LDC",
                "Oper/Lu", "RSC", "Lvl", "FLSA", "Route #", "Loaned Fin #", "Effective Start",
                "Effective End", "Begin Tour", "End Tour", "Lunch Amt", "1261 Ind", "Lunch Ind",
                "Daily Sched Ind", "Time Zone", "FTF", "OOS", "Day", ]
        self.writer = csv.writer(self.csv_doc, dialect='myDialect')
        self.writer.writerow(line)
        return True

    def validation_error(self, _type):
        """ if self.validate_text returns False then display messagebox.
        the kbpc rpt is closed and destroyed. """
        msg = "This file is either not an Employee Everything Report or is not a searchable pdf file. \n\n " \
              "The PDF Converter will not generate a file "
        if _type == "nostation":
            msg = "No station could be indentified in this Employee Everything Report. \n\n" \
                  "The PDF Converter will not generate a file "
        if _type == "nopayperiod":
            msg = "No pay period could be indentified in this Employee Everything Report. \n\n" \
                  "The PDF Converter can not process page {} ".format(self.page_num)
        messagebox.showerror("Klusterbox PDF Converter", msg, parent=self.frame)
        if _type in ("general", "nostation"):
            self.close_destroy()  # close and destroy files if the conversion process fails.

    def solve_level_indexerror(self, i):
        """ this will handle rare cases were length of the jobs array is longer than the level array by
        adding to the level array. """
        try:
            if self.level[i]:  # if there is an index for the level array
                pass  # no nothing
        except IndexError:  # if there is not an index for the level array
            self.levelindexerror_rpt.append(self.lastname)
            self.level.append(self.level[0])  # use the first element of the level array

    def solve_routes_indexerror(self, i):
        """ this will handle rare cases were length of the jobs array is longer than the level array by
        adding to the level array. """
        try:
            if self.routes[i]:  # if there is an index for the level array
                pass  # no nothing
        except IndexError:  # if there is not an index for the level array
            self.routesindexerror_rpt.append(self.lastname)
            self.routes.append(self.routes[0])  # use the first element of the level array

    def close_destroy(self):
        """ close and destroy files if the conversion process fails. """
        if os.path.exists(self.new_file_path):  # destroy the new csv file
            self.csv_doc.close()
            os.remove(self.new_file_path)
        if self.gen_error_report:  # close and destroy the error report
            if self.kbpc_rpt:
                self.kbpc_rpt.close()
            if self.kbpc_rpt_file_path is not None:
                if os.path.exists(self.kbpc_rpt_file_path):
                    os.remove(self.kbpc_rpt_file_path)
        if self.gen_raw_report:  # destroy the raw pdf miner file
            if self.kbpc_raw_rpt_file_path is not None:
                if os.path.exists(self.kbpc_raw_rpt_file_path):
                    os.remove(self.kbpc_raw_rpt_file_path)

    class StartUp:
        """
        run the process to get settings, create the csv, error report and check the text
        """

        def __init__(self, parent):
            self.parent = parent

        def run(self):
            """ master method for running other methods in proper sequence. """
            self.get_settings()  # gets preferences from tolerances table.
            if self.parent.allow_txt_reader:  # if you have opted to read from a text file instead of a pdf file...
                # allows the user to read the pdf output from a text file instead of a pdf
                if not self.ask_readfrom_rawtxt():
                    return False  # if the conversion fails, then end process
            else:
                if not self.readfrom_pdf():  # read from the pdf file without asking
                    return False  # end process if there is a failure.
            if self.parent.gen_raw_report:  # put the raw output from the pdf conversion into a text file
                if not self.create_raw_report():
                    return False
            if self.parent.gen_error_report:  # create text document for data extracted from the raw pdfminer output
                self.create_error_report()
            if not self.parent.create_new_file():  # create and start the csv file that is created by the pdf converter.
                return False
            self.split_document()  # split the document into pages.
            if not self.parent.validtext:  # if validate text has not been run in pdf to text.
                # search for a station name to make sure the doc is an ee report
                if not self.validate_text(self.parent.pages[0]):
                    return False
            if not self.get_station():  # find the station
                return False
            return True

        def get_settings(self):
            """ inquire as to if the pdf converter reports have been opted for by the user """
            sql = "SELECT tolerance FROM tolerances WHERE category ='%s'" % "pdf_error_rpt"
            result = inquire(sql)
            self.parent.gen_error_report = Convert(result[0][0]).onoff_to_bool()
            sql = "SELECT tolerance FROM tolerances WHERE category ='%s'" % "pdf_raw_rpt"
            result = inquire(sql)
            self.parent.gen_raw_report = Convert(result[0][0]).onoff_to_bool()
            self.parent.starttime = time.time()  # start the timer
            # make it possible for user to select text file
            sql = "SELECT tolerance FROM tolerances WHERE category ='%s'" % "pdf_text_reader"
            result = inquire(sql)
            self.parent.allow_txt_reader = Convert(result[0][0]).onoff_to_bool()

        def ask_readfrom_rawtxt(self):
            """ allows the user to read the output of pdfminer from a text file instead of the pdf file """
            msg = "The \"Read from text file\" option is selected for the pdf converter. " \
                  "This is an advanced setting used for debugging and development. Go to \"Management > " \
                  "Pdf Converter Settings\" to disable it. \n\n" \
                  "Select \"Yes\" to read from a text file generated by the pdf converter (this be a text " \
                  "file with a suffix of \"_raw_kbpc.txt\").\n\n" \
                  "Select \"No\" to read from a pdf file. Extra time will be needed to process the pdf file. "
            if not messagebox.askyesno("PDF Converter", msg, parent=self.parent.frame):
                if not self.readfrom_pdf():
                    return False
            else:  # user opts to read from raw klusterbox pdf converter text file
                if not self.readfrom_rawtext():
                    return False
            return True

        def readfrom_rawtext(self):
            """ this will allow the user to read from a text file which is the output of a klusterbox application
             of pdfminer. this is stored in a file with a suffix '_raw_kbpc.txt' """
            path = dir_filedialog()
            self.parent.file_path = filedialog.askopenfilename(initialdir=path,
                                                               filetypes=[("text files", "*.txt")])  # get the pdf file
            if not self.parent.file_path:  # return if no file is selected.
                return False
            if not self.get_csvpath():  # if the file already exist and user opts to not overwrite.
                return False  # end the process
            self.parent.gen_raw_report = False  # since you are reading a raw report, turn off the generator
            try:
                with open(self.parent.file_path, 'r') as file:  # read the txt file and put it in the text variable
                    self.parent.text = file.read()
            except FileNotFoundError:
                return False
            return True

        def readfrom_pdf(self):
            """ get a pdf file and translate it to something readable to be stored in the self.text variable. """
            try:
                self.parent.file_path = filedialog. \
                    askopenfilename(initialdir=dir_filedialog(), filetypes=[("PDF files", "*.pdf")])  # get the pdf file
            except FileNotFoundError:  # end process if the user fails to select a valid file.
                return False  # end the process
            if not self.parent.file_path:  # return if no file is selected.
                return False  # end the process
            if not self.get_csvpath():  # if the file already exist and user opts to not overwrite.
                return False  # end the process
            # warn user that the process can take several minutes
            if not messagebox.askokcancel("PDF Converter", "This process will take several minutes. "
                                                           "Did you want to proceed?",
                                          parent=self.parent.frame):
                return False
            else:
                self.parent.text = self.pdf_to_text()  # read the pdf with pdfminer
                if self.parent.text == "Fail":  # pdf to text returns "Fail" if the process was unsucessful.
                    return False
            return True

        def get_csvpath(self):
            """ get the csv path and the shortened file name. if file already exist, ask before overwriting.  """
            # generate csv file name and path
            self.parent.new_file_path = self.parent.get_path("_kbpc", ".csv")
            self.parent.short_file_name = self.parent.get_shortname()
            # if the file path already exist - ask for confirmation
            if os.path.exists(self.parent.new_file_path):
                if not messagebox.askokcancel("Possible File Name Discrepancy",
                                              "There is already a file named {}. "
                                              "If you proceed, the file will be overwritten. "
                                              "Did you want to proceed?".format(self.parent.short_file_name),
                                              parent=self.parent.frame):
                    return False
            return True

        def pdf_to_text(self):
            """ Called by pdf_converter() to read pdfs with pdfminer """
            text = None
            codec = 'utf-8'
            password = ""
            maxpages = 0
            caching = (True, True)
            pagenos = set()
            laparams = (
                LAParams(
                    line_overlap=.1,  # best results
                    char_margin=2,
                    line_margin=.5,
                    word_margin=.5,
                    boxes_flow=0,
                    detect_vertical=True,
                    all_texts=True),
                LAParams(
                    line_overlap=.5,  # default settings
                    char_margin=2,
                    line_margin=.5,
                    word_margin=.5,
                    boxes_flow=.5  # detect_vertical=False (default), all_texts=False (default)
                )
            )
            for i in range(2):
                retstr = StringIO()
                rsrcmgr = PDFResourceManager()
                device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams[i])
                interpreter = PDFPageInterpreter(rsrcmgr, device)
                page_count = self.get_pagecount()  # get page count
                with open(self.parent.file_path, 'rb') as filein:
                    # create progressbar
                    pb_root = Tk()  # create a window for the progress bar
                    pb_root.geometry("%dx%d+%d+%d" % (450, 75, 200, 300))
                    pb_root.title("Klusterbox PDF Converter - reading pdf")
                    titlebar_icon(pb_root)  # place icon in titlebar
                    Label(pb_root, text="This process takes several minutes. Please wait for results.\n"
                                        "Don't use Klusterbox until the process is finished") \
                        .grid(row=0, column=0, columnspan=2, sticky="w")
                    pb_label = Label(pb_root, text="Reading PDF: ")  # make label for progress bar
                    pb_label.grid(row=1, column=0, sticky="w")
                    pb = ttk.Progressbar(pb_root, length=350, mode="determinate")  # create progress bar
                    pb.grid(row=1, column=1, sticky="w")
                    pb_text = Label(pb_root, text="", anchor="w")
                    pb_text.grid(row=2, column=0, columnspan=2, sticky="w")
                    pb["maximum"] = page_count  # set length of progress bar
                    pb.start()
                    count = 0
                    try:
                        for page in PDFPage.get_pages(filein, pagenos, maxpages=maxpages, password=password,
                                                      caching=caching[i]):
                            interpreter.process_page(page)
                            pb["value"] = count  # increment progress bar
                            pb_text.config(text="Reading page: {}/{}".format(count, page_count))
                            pb_root.update()
                            count += 1
                    except TypeError:
                        pb.stop()  # stop and destroy the progress bar
                        pb_label.destroy()  # destroy the label for the progress bar
                        pb.destroy()
                        pb_root.destroy()
                        self.error_unreadable()
                        return "Fail"
                    text = retstr.getvalue()
                    device.close()
                    retstr.close()
                pb.stop()  # stop and destroy the progress bar
                pb_label.destroy()  # destroy the label for the progress bar
                pb.destroy()
                pb_root.destroy()
                # test the results
                text = text.replace("", "")
                page = text.split("")  # split the document into page
                if self.validate_text(page[0]):
                    break
                else:
                    if i < 1:
                        msg = "PDF Conversion has failed and will not generate a file.  \n\n" \
                              "We will try again."
                        result = messagebox.askokcancel("Klusterbox PDF Converter", msg, parent=self.parent.frame)
                        if not result:
                            return "Fail"
                    else:  # if the second attempt failed
                        messagebox.showerror("Klusterbox PDF Converter",
                                             "PDF Conversion has failed and will not generate a file.  \n\n"
                                             "You will either have to obtain the Employee Everything Report "
                                             "in the csv format from management or manually enter in the "
                                             "information",
                                             parent=self.parent.frame)
            self.parent.validtext = True  # shows the test does not need to be repeated.
            return text  # return the product of pdf miner if successful

        def get_pagecount(self):
            """ gives a page count for pdf_to_text """
            file = open(self.parent.file_path, 'rb')
            parser = PDFParser(file)
            try:
                document = PDFDocument(parser)
                page_count = resolve1(document.catalog['Pages'])['Count']  # This will give you the count of pages
            except TypeError:
                page_count = 200
            return page_count

        def create_raw_report(self):
            """ put the raw output from the pdf conversion into a text file """
            # generate csv file name and path
            self.parent.kbpc_raw_rpt_file_path = self.parent.get_path("_raw_kbpc", ".txt")
            kbpc_raw_rpt = open(self.parent.kbpc_raw_rpt_file_path, "w")
            kbpc_raw_rpt.write("KLUSTERBOX PDF CONVERSION REPORT \n\n")
            kbpc_raw_rpt.write("Raw output from pdf miner\n\n")
            datainput = "subject file: {}\n\n".format(self.parent.file_path)
            kbpc_raw_rpt.write(datainput)
            try:
                kbpc_raw_rpt.write(self.parent.text)
            except UnicodeEncodeError:
                msg = "Klusterbox PDF Converter has encountered characters which can not be translated and must " \
                      "end the conversion process.\n\n\n" \
                      "Troubleshooting: \n\n" \
                      "1. Make sure this is a Employee Everything Report generated by TACS. " \
                      "Klusterbox PDF Converter can not read Employee Everything Reports scanned from " \
                      "paper hardcopies. Other kinds of USPS reports, such as Overtime Alert Reports, " \
                      "can not be read by Klusterbox Automatic Data Entry. \n" \
                      "2. You will either have to obtain the Employee Everything Report " \
                      "in the csv format from management or manually enter in the " \
                      "information"
                messagebox.showerror("Klusterbox PDF Converter", msg, parent=self.parent.frame)
                return False
            kbpc_raw_rpt.close()
            return True

        def create_error_report(self):
            """ create text document for data extracted from the raw pdfminer output """
            self.parent.kbpc_rpt_file_path = self.parent.get_path("_kbpc", ".txt")
            self.parent.kbpc_rpt = open(self.parent.kbpc_rpt_file_path, "w")
            self.parent.kbpc_rpt.write("KLUSTERBOX PDF CONVERSION REPORT \n\n")
            self.parent.kbpc_rpt.write("Data extracted from pdfminer output and error reports\n\n")
            datainput = "subject file: {}\n\n".format(self.parent.file_path)
            self.parent.kbpc_rpt.write(datainput)

        def split_document(self):
            """ split the document into pages. The character which indicates a new page often
            appears as an up arrow in the text. """
            self.parent.text = self.parent.text.replace("", "")
            self.parent.pages = self.parent.text.split("")  # split the document into pages

        def validate_text(self, field):
            """ search for key terms Restricted USPS T&A Information and Employee Everything Report make sure
            the doc is an ee report. if both terms are not found on the first page, terminate the process. """
            result = re.search('Restricted USPS T&A Information', field, re.DOTALL)
            if not result:
                self.parent.validation_error("general")  # show error message and terminate process.
                return False
            result = re.search('Employee Everything Report', field, re.DOTALL)
            if not result:
                self.parent.validation_error("general")  # show error message and terminate process.
                return False
            return True

        def get_station(self):
            """ get the station. terminate the process if a station can not be found. """
            try:  # if the page has no station information, then break the loop.
                result = re.search("Restricted USPS T&A Information(.*)Employee Everything Report",
                                   self.parent.pages[0], re.DOTALL)
                self.parent.station = result.group(1).strip()
                self.parent.station = self.parent.station.split('\n')[0]
                if not self.parent.station:
                    result = re.search("Employee Everything Report(.*)Weekly", self.parent.pages[0], re.DOTALL)
                    self.parent.station = result.group(1).strip()
                    self.parent.station = self.parent.station.split('\n')[0]
            except AttributeError:
                self.parent.validation_error("nostation")  # show error message and terminate process.
                return False
            if not self.parent.station:
                self.parent.validation_error("nostation")  # show error message and terminate process.
                return False
            return True

        def error_unreadable(self):
            """ generates a message box if the pdf is not readable. """
            msg = "The pdf file you are attempting to convert is unreadable. \n\n" \
                  "\nTroubleshooting: \n\n" \
                  "1. Make sure this is a Employee Everything Report generated by TACS. " \
                  "Klusterbox PDF Converter can not read Employee Everything Reports scanned from paper hardcopies. " \
                  "Other kinds of USPS reports, such as Overtime Alert Reports, can not be read by " \
                  "Klusterbox Automatic Data Entry. \n" \
                  "2. Try opening the pdf in Adobe Acrobat and resaving." \
                  "This might overwrite corrupted data.\n" \
                  "3. Contact the developer if those steps do not work. \n" \
                  "4. You will either have to obtain the Employee Everything Report " \
                  "in the csv format from management or manually enter in the " \
                  "information"
            messagebox.showerror("PDF Converter", msg, parent=self.parent.frame)
            self.parent.close_destroy()  # close and destroy files if the conversion process fails.

    class PageAnalysis:
        """ this class breaks the document into pages and then loops to anaylse each page"""

        def __init__(self, parent):
            self.parent = parent

        def run(self):
            """ this is a master method to run all other methods in proper order. """
            # start the progress bar
            self.parent.pb = ProgressBarDe(title="Klusterbox PDF Converter - translating pdf",
                                           label="Translating PDF: ",
                                           text="This process will finish shortly. Please wait for results.")
            self.parent.pb.max_count(len(self.parent.pages) - 1)  # set length of progress bar
            self.parent.pb.start_up()  # start the progress bar
            self.parent.pbi = 1
            self.parent.pb.move_count(self.parent.pbi)  # increment progress bar
            self.parent.pb.change_text("This process is underway. Please wait for results.")
            if not self.start_analysis():  # go page by page for analysis.
                self.parent.pb.stop()  # stop and destroy the progress bar
                return False
            self.parent.pb.stop()
            return True

        def get_payperiod(self):
            """
            find the pay period. this is a number formatted as ####-##-# """
            self.parent.yyppwk = None
            # find pay period using search - criteria one
            start = "YrPPWk:\nSub-Unit:\n\n"
            end = "\n"
            if self.find_pp_search(start, end):
                return True
            # find pay period using search - criteria two
            start = "YrPPWk:\n"
            end = "\nSub-Unit:\n"
            if self.find_pp_search(start, end):
                return True
            # find pay period using findall - critera three
            if self.find_pp_findall():
                return True
            if not self.parent.yyppwk:
                self.parent.validation_error("nopayperiod")
                return False
            # find pay period using search - criteria four
            start = "YrPPWk:\n\n"
            end = "\n\nFin. #:"
            if self.find_pp_search(start, end):
                return True
            return True

        def find_pp_search(self, start, end):
            """ look for the pay period using re.search.
            start arg is where the search begins. end arg is where the search ends"""
            try:
                result = re.search('%s(.*)%s' % (start, end), self.parent.page).group(1)
            except AttributeError:
                return False
            result = result.strip()  # remove any white space surrounding the result
            if re.match(r'[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9]', result):  # if the format is correct
                self.parent.yyppwk = result  # save the result as the pay period
                return True
            return False

        def find_pp_findall(self):
            """ look for the pay period using re"""
            multiple_pp = False  # does that report cover more than one pay period
            result = re.findall(r'[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9] to [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9]',
                                self.parent.page)
            if result:  # if there is a result, there are multiple pay periods.
                multiple_pp = True
            result = re.findall(r'[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9]', self.parent.page)
            try:
                if multiple_pp and len(result) >= 3:
                    self.parent.yyppwk = result[2]  # select the third one
                else:
                    self.parent.yyppwk = result[0]  # select the first one
            except (AttributeError, IndexError):
                return False
            return True

        def start_analysis(self):
            """ check that there is information on the page, otherwise skip it. """
            for page in self.parent.pages:
                if page:  # analyse the page if it contains something
                    self.parent.page = page
                    if not self.analyse_page():  # send for page analysis
                        return False
                self.parent.page_num += 1  # increment the page number
            self.parent.endtime = time.time()  # record the end time for the error report
            return True

        def analyse_page(self):
            """ anaylse individual pages sent by start_analysis(). """
            if not self.get_payperiod():  # find the pay period
                return
            if self.parent.saved_pp != self.parent.yyppwk:  # if there is a change in the pay period
                if self.parent.saved_pp:
                    if not self.new_csv():  # create a new file path and create a new csv file.
                        return False  # if the document being written to is open, return False
                self.set_saved_pp()  # set/update the dates if the pay period is changed
            if self.parent.gen_error_report:  # write to the error report
                self.parent.kbpc_rpt.write(
                    "\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
                    "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")
            if self.parent.page[0:6] == "Report" or self.parent.page[0:6] == "":
                pass
            else:
                if self.parent.gen_error_report:
                    self.parent.kbpc_rpt.write("Out of Sequence Problem!\n")
                self.parent.eid_count = 0
            if self.parent.gen_error_report:
                datainput = "Page: {}\n".format(self.parent.page_num)
                self.parent.kbpc_rpt.write(datainput)
            self.parent.GroupAnalysis(self.parent).run()  # analysis groups of text on a page
            self.parent.WriteCSV(self.parent).run()
            return True

        def new_csv(self):
            """ create a new csv file when the pay period changes. """
            self.parent.csv_doc.close()
            self.parent.csv_count += 1
            add_on = "_kbpc_{}".format(self.parent.csv_count)
            new_path = self.parent.get_path(add_on, ".csv")  # get a new name for the new file path
            self.parent.new_file_path = new_path  # update the self.new_file_path
            if not self.parent.create_new_file():  # create the headers for the new file with self.create_new_file()
                return False
            # add shortened file name to an array for completion messagebox
            self.parent.multi_csv.append(self.parent.get_shortname())
            return True

        def set_saved_pp(self):
            """ if the pay period changes update the dates """
            exploded = self.parent.yyppwk.split("-")  # break up the year/pp string from the ee rpt pdf
            year = exploded[0]  # get the year
            if self.parent.gen_error_report:
                datainput = "Year: {}\n".format(year)
                self.parent.kbpc_rpt.write(datainput)
            pp = exploded[1]  # get the pay period
            if self.parent.gen_error_report:
                datainput = "Pay Period: {}\n".format(pp)
                self.parent.kbpc_rpt.write(datainput)
            pp_wk = exploded[2]  # get the week of the pay period
            if self.parent.gen_error_report:
                datainput = "Pay Period Week: {}\n".format(pp_wk)
                self.parent.kbpc_rpt.write(datainput)
            pp += pp_wk  # join the pay period and the week
            first_date = find_pp(int(year), pp)  # get the first day of the pay period
            if self.parent.gen_error_report:
                datainput = "{}\n".format(str(first_date))
                self.parent.kbpc_rpt.write(datainput)
            self.parent.pp_days = []  # build an array of date/time objects for each day in the pay period
            self.parent.daily_array_days = []  # build an array of formatted days with just month/ day
            for _ in range(7):
                self.parent.pp_days.append(first_date)
                self.parent.daily_array_days.append(first_date.strftime("%m/%d"))
                first_date += timedelta(days=1)
            if self.parent.gen_error_report:
                datainput = "Days in Pay Period: {}\n".format(self.parent.pp_days)
                self.parent.kbpc_rpt.write(datainput)
            self.parent.saved_pp = self.parent.yyppwk  # hold the year/pp to check if it changes

    class GroupAnalysis:
        """
        this class will separate the page into groups separated by empty lines and then anaylse those groups
        """

        def __init__(self, parent):
            self.parent = parent

        def run(self):
            """ anaylse the c. This will process groups of text """
            b = self.parent.page.split("\n\n")
            for c in b:
                # find, categorize and record daily times
                if self.parent.lookfortimes:
                    self.get_times(c)
                self.find_base(c)  # resets the lookfortimes to True if Base is read
                self.find_stevens(c)  # solve for stevens problem / H/L base times not being read
                self.build_underscoreslasharray(c)  # build daily underscoreslash array
                self.parent.LineAnalysis(self.parent).run(c)

        def find_base(self, c):
            """ resets the lookfortimes to True if Base is read"""
            if re.match(r"Base", c):
                self.parent.lookfortimes = True  # resets the lookfortimes to True

        def get_times(self, c):
            """ if lookfortimes is true, then process the input to look for times information. """
            if re.match(r"0[0-9]{4}:\s0[0-9]{2}\.[0-9]{2}$", c):
                to_add = [self.parent.base_counter, c]
                self.parent.base_time.append(to_add)
                self.parent.base_chg = self.parent.base_counter  # value to check for errors+
            # solve for robertson basetime problem / Base followed by H/L
            elif re.match(r"0[0-9]{4}:\s0[0-9]{2}\.[0-9]{2}\n0[0-9]{4}:\s0[0-9]{2}\.[0-9]{2}", c):
                if "\n" not in c:  # check that there are no multiple times in the line
                    to_add = [self.parent.base_counter, c]
                    self.parent.base_time.append(to_add)
                    self.parent.base_chg = self.parent.base_counter  # value to check for errors
                    self.parent.robert_rpt.append(self.parent.lastname)  # data for robertson baseline problem
                elif "\n" in c:  # if there are multiple times in the line
                    split_base = c.split("\n")  # split the times by the line break
                    for sb in split_base:  # add each time individually
                        to_add = [self.parent.base_counter, sb]  # combine the base counter with the time
                        self.parent.base_time.append(to_add)  # add that time to the array of base times
                        self.parent.base_chg = self.parent.base_counter  # value to check for errors
            else:
                self.parent.base_counter += 1
                self.parent.lookfortimes = False

        def find_stevens(self, c):
            """ find the stevens problem and fix it """
            if len(self.parent.finance_holder) == 0 and re.match(r"H/L\s", c):  # set trap to catch daily times
                self.parent.lookfortimes = True
                self.parent.stevens_rpt.append(self.parent.lastname)

        def build_underscoreslasharray(self, c):  #
            """ build daily underscoreslash array """
            checker = False
            one_mistake = False
            underscore_slash = c.split("\n")
            for us in underscore_slash:  # loop through items to detect matches
                if re.match(r"[0-1][0-9]/[0-9][0-9]", us) or us == "__/__":
                    checker = True
                else:
                    one_mistake = True
            if len(underscore_slash) > 1 and checker and not one_mistake:
                self.parent.daily_underscoreslash.append(underscore_slash)

    class LineAnalysis:
        """
        this class will separate the groups into individual lines and then anaylse those lines
        """

        def __init__(self, parent):
            self.parent = parent

        def run(self, c):
            """ anaylse c. Takes the groups from analyse c and breaks them down into individual lines. """
            d = c.split("\n")
            for e in d:
                # build the daily array
                self.get_routeholder(e)  # get the route following the chain
                self.get_movecode(e)  # get the move code following the chain
                self.get_financeholder(e)  # get the finance number following the chain
                self.get_timeholder(e)  # look for the time zone following chain
                self.get_dateholder(e)  # look for time following date/mv desig
                self.find_franklin(e)  # look for items in franklin array to solve for franklin problem
                self.fix_rodriguez(e)  # solve for rodriguez problem / multiple consecutive mv desigs
                self.get_dateholder2(e)  # look for date following move desig
                self.fix_franklin(e)  # solve for franklin problem: two mv desigs appear consecutively
                self.find_movedesig(e)  # look for move desig and add to mv_holder
                self.find_rose(e)  # solve for rose problem: mv desig and date appearing on same line
                self.find_days(e)  # find and record all days on the report
                if e == "Processed Clock Rings":
                    self.parent.eid_count = 0
                # when "Employee ID" is read, activate the write loop
                if e == "Employee ID":
                    self.parent.eid_label = True
                    self.writeloop_errorreport()  # write to the klusterbox pdf converter report
                    self.bind_underscores()  # bind all underscore slash items in one array
                    self.write_primeinfo()  # write the first part of the csv line
                    self.add_underscoreslash()  # adds the underscore slash results to the daily array
                    self.detect_basecounter_error()  # finds basecounter errors and writes the error to a report.
                    self.init_csvdayarray()  # set up array for each day in the week
                    # reorder the found days to ensure the correct order
                    self.parent.found_days = self.parent.pdf_converter_reorder_founddays()
                    self.fix_basetimes()  # fix problem with miscounted base times
                    self.get_founddays()  # load the multi array with array for each day
                    self.order_founddays()  # reorder the found days to ensure the correct order
                    self.errorrpt_eidcount()  # write the employee id count to the error report
                    self.build_csv_output()  # build the csv output array
                    self.csvoutput_time()  # write csv output containing times or moves
                    self.csvoutput_base()  # write csv output containing base or temp lines
                    self.writeloop_init()  # initialize arrays
                    self.report_eid(e)  # write employee id to error report
                    self.parent.eid_count = 0  # reset eid count to zero
                self.find_fi(e)  # look for first initial
                self.find_name(e)  # look for the last name
                self.find_job(e)  # find the job or d/a code - there might be two
                self.find_job_alt(e)  # same method as find_job, except looks for alternate format
                self.find_temproute(e)  # look for temp route
                self.find_mainroute(e)  # look for the main route
                self.trap_route(e)  # set trap to catch route # on the next line
                self.find_secondlevel(e)  # intercept the second level
                self.find_level(e)  # intercept the level
                self.trap_lvl(e)  # set trap to catch Lvl on the next line
                self.find_underscore(e)  # find the underscore dash string
                self.find_unprocessed(e)  # after unprocessed rings label, add no new rings to daily array
                self.find_eid(e)  # find the emp id / it is the first 8 digit number on the page

        def get_routeholder(self, e):
            """ get the route following the chain """
            if re.match(r"[0-9]{6}$", e) and len(self.parent.movecode_holder) != 0:
                self.parent.movecode_holder.append(e)
                route_holder = self.parent.movecode_holder
                if self.parent.unprocessedrings == "":
                    self.parent.daily_array.append(route_holder)
                else:
                    self.parent.unprocessed_counter += 1  # handle carroll problem
                    self.parent.carroll_rpt.append(self.parent.lastname)  # append carroll report

        def get_movecode(self, e):
            """ get the move code following the chain """
            self.parent.movecode_holder = []
            if len(self.parent.finance_holder) != 0:  # get the move code following the chain
                if re.match(r"[0-9]{4}-[0-9]{2}$", e):
                    self.parent.finance_holder.append(e)
                    self.parent.movecode_holder = self.parent.finance_holder
                # solve for robertson problem / "H/L" is in move code
                if re.match(r"H/L", e):  # if the move code is a higher level assignment
                    self.parent.finance_holder.append(e)
                    self.parent.finance_holder.append("000000")  # insert zeros for route number
                    if self.parent.unprocessedrings == "":
                        # skip getting the route and create append daily array
                        self.parent.daily_array.append(self.parent.finance_holder)
                    else:
                        self.parent.unprocessed_counter += 1  # handle carroll problem
                        self.parent.carroll_rpt.append(self.parent.lastname)  # append carroll report

        def get_financeholder(self, e):
            """ get the finance number following the chain """
            self.parent.finance_holder = []
            if len(self.parent.timezone_holder) != 0:  # get the finance number following the chain
                self.parent.timezone_holder.append(e)
                self.parent.finance_holder = self.parent.timezone_holder

        def get_timeholder(self, e):
            """ look for the time zone following chain """
            self.parent.timezone_holder = []
            if re.match(r"[A-Z]{2}T", e) and len(
                    self.parent.time_holder) != 0:  # look for the time zone following chain
                self.parent.time_holder.append(e)
                self.parent.timezone_holder = self.parent.time_holder
            # solve for salih problem / missing time zone in ...
            elif len(self.parent.time_holder) != 0 and self.parent.unprocessedrings != "":
                self.parent.unprocessed_counter += 1  # unprocessed rings
                self.parent.salih_rpt.append(self.parent.lastname)
            self.parent.time_holder = []

        def get_dateholder(self, e):
            """ look for time following date/mv desig """
            if re.match(r" [0-2][0-9]\.[0-9][0-9]$", e) and len(self.parent.date_holder) != 0:
                self.parent.date_holder.append(e)
                self.parent.time_holder = self.parent.date_holder

        def find_franklin(self, e):
            """ look for items in franklin array to solve for franklin problem """
            # if franklin array and date
            if len(self.parent.franklin_array) > 0 and re.match(r"[0-1][0-9]/[0-3][0-9]$", e):
                frank = self.parent.franklin_array.pop(0)  # pop out the earliest mv desig
                self.parent.mv_holder = [self.parent.eid, frank]

        def fix_rodriguez(self, e):
            """ solve for rodriguez problem / multiple consecutive mv desigs """
            if len(self.parent.franklin_array) > 0:
                if re.match(r"0[0-9]{4}$", e) \
                        or re.match(r"0[0-9]{2}$", e) \
                        or e in self.parent.mv_desigs:  # look for move desig
                    self.parent.franklin_array.append(e)
                    self.parent.rod_rpt.append(self.parent.lastname)

        def get_dateholder2(self, e):
            """ look for date following move desig """
            self.parent.date_holder = []
            if re.match(r"[0-1][0-9]/[0-3][0-9]$", e) and len(
                    self.parent.mv_holder) != 0:  # look for date following move desig
                self.parent.mv_holder.append(e)
                self.parent.date_holder = self.parent.mv_holder

        def fix_franklin(self, e):
            """ solve for franklin problem: two mv desigs appear consecutively """
            if len(self.parent.mv_holder) > 0:
                if re.match(r"0[0-9]{4}$", e) or re.match(r"0[0-9]{2}$", e) \
                        or e in self.parent.mv_desigs:  # look for move desig
                    self.parent.franklin_array.append(self.parent.mv_holder[1])
                    self.parent.franklin_array.append(e)
                    self.parent.frank_rpt.append(self.parent.lastname)

        def find_movedesig(self, e):
            """ look for move desig and add to mv_holder """
            self.parent.mv_holder = []
            if len(self.parent.franklin_array) == 0:
                if re.match(r"0[0-9]{4}$", e) \
                        or re.match(r"0[0-9]{2}$", e) \
                        or e in self.parent.mv_desigs:  # look for move desig
                    self.parent.mv_holder.append(self.parent.eid)
                    self.parent.mv_holder.append(e)  # place in a holder and check the next line for a date

        def find_rose(self, e):
            """ solve for rose problem: mv desig and date appearing on same line """
            if re.match(r"0[0-9]{4}\s[0-2][0-9]/[0-9][0-9]$", e):
                rose = e.split(" ")
                self.parent.mv_holder.append(self.parent.eid)  # add the emp id to the daily array
                self.parent.mv_holder.append(rose[0])  # add the mv desig to the daily array
                self.parent.mv_holder.append(rose[1])  # add the date to the mv desig array
                self.parent.date_holder = self.parent.mv_holder  # transfer array items to date holder
                self.parent.rose_rpt.append(self.parent.lastname)

        def find_days(self, e):
            """ find and record all days on the report """
            if e in self.parent.days:
                if self.parent.eid_label:
                    self.parent.found_days.append(e)
                if not self.parent.eid_label:
                    self.parent.foundday_holder.append(e)

        # the write loop

        def writeloop_errorreport(self):
            """ write to the klusterbox pdf converter report """
            if self.parent.gen_error_report:
                if len(self.parent.jobs) > 0:
                    datainput = "Jobs: {}\n".format(self.parent.jobs)
                    self.parent.kbpc_rpt.write(datainput)
                if len(self.parent.routes) > 0:
                    datainput = "Routes: {}\n".format(self.parent.routes)
                    self.parent.kbpc_rpt.write(datainput)
                if len(self.parent.level) > 0:
                    datainput = "Levels: {}".format(self.parent.level)
                    self.parent.kbpc_rpt.write(datainput)
                    if len(self.parent.jobs) != len(self.parent.level):  # if there are not enough level elements
                        datainput = "   LEVEL INDEXERROR DETECTED!!!"  # report the index error
                        self.parent.kbpc_rpt.write(datainput)
                    self.parent.kbpc_rpt.write("\n")  # insert new line after Levels:
                if len(self.parent.base_time) > 0:
                    self.parent.kbpc_rpt.write("Base / Times:")
                    for bt in self.parent.base_time:
                        datainput = "{}\n".format(bt)
                        self.parent.kbpc_rpt.write(datainput)

        def bind_underscores(self):
            """ bind all underscore slash items in one array """
            if len(self.parent.daily_underscoreslash) > 0:
                self.parent.underscore_slash_result = sum(self.parent.daily_underscoreslash, [])

        def write_primeinfo(self):
            """ write to csv file. Since the first columns of the csv line are the same for distinct carriers,
             create the first part of the line. This is called prime info"""
            self.parent.prime_info = [self.parent.yyppwk.replace("-", ""), '"{}"'.format("000000"),
                                      '"{}"'.format(self.parent.station), '"{}"'.format("0000"),
                                      '"{}"'.format(self.parent.eid), '"{}"'.format(self.parent.lastname),
                                      '"{}"'.format(self.parent.fi[:1]), '"_"', '"010/0000"', '"N"', '"N"',
                                      '"N"', '"0"', '"0"', '"0"', '"0"', '"0"', '"0"']

        def add_underscoreslash(self):
            """ adds the underscore slash results to the daily array """
            count = 0
            for array in self.parent.daily_array:
                array.append(self.parent.underscore_slash_result[count])
                array.append(self.parent.underscore_slash_result[count + 1])
                count += 2

        def detect_basecounter_error(self):
            """ finds basecounter errors and writes the error to a report.  """
            if self.parent.base_chg + 1 != len(self.parent.found_days):  # add to basecounter error array
                to_add = (self.parent.lastname, self.parent.base_chg, len(self.parent.found_days))
                if len(self.parent.found_days) > 0:
                    self.parent.basecounter_error.append(to_add)

        def init_csvdayarray(self):
            """ set up array for each day in the week """
            self.parent.csv_sat = []
            self.parent.csv_sun = []
            self.parent.csv_mon = []
            self.parent.csv_tue = []
            self.parent.csv_wed = []
            self.parent.csv_thr = []
            self.parent.csv_fri = []
            self.parent.csv_output = [self.parent.csv_sat, self.parent.csv_sun, self.parent.csv_mon,
                                      self.parent.csv_tue,
                                      self.parent.csv_wed, self.parent.csv_thr, self.parent.csv_fri]

        def fix_basetimes(self):
            """ fix problem with miscounted base times """
            high_array = []
            for bt in self.parent.base_time:
                high_array.append(bt[0])
            if len(high_array) > 0:
                high_num = max(high_array)
                comp_array = []
                for i in range(high_num + 1):
                    comp_array.append(i)
                del_array = []
                for num in comp_array:
                    if num in high_array:
                        del_array.append(num)
                error_array = comp_array
                error_array = [x for x in error_array if x not in del_array]
                error_array.reverse()
                if len(error_array) > 0:
                    for error_num in error_array:
                        for bt in self.parent.base_time:
                            if bt[0] > error_num:
                                bt[0] -= 1

        def get_founddays(self):
            """ load the multi array with array for each day """
            if len(self.parent.foundday_holder) > 0:
                # solve for nguyen problem / day of week occurs prior to "employee id" label
                self.parent.found_days += self.parent.foundday_holder
                ordered_days = ["Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
                                "Friday"]
                for day in self.parent.days:  # re order days into correct order
                    if day not in self.parent.found_days:
                        ordered_days.remove(day)
                self.parent.found_days = ordered_days
                self.parent.nguyen_rpt.append(self.parent.lastname)

        def order_founddays(self):
            """ reorder the found days to ensure the correct order """
            if len(self.parent.found_days) > 0:  # write out found days
                # reorder the found days to ensure the correct order
                self.parent.found_days = self.parent.pdf_converter_reorder_founddays()
                if self.parent.gen_error_report:
                    datainput = "Found days: {}\n".format(self.parent.found_days)
                    self.parent.kbpc_rpt.write(datainput)

        def alt_founddays(self):
            """ an alternate method for getting found days."""
            self.parent.found_days = []  # re inintialized the alt founddays array.
            days = ("Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
            for day in days:
                if self.parent.page.count(day):
                    self.parent.found_days.append(day)

        def errorrpt_eidcount(self):
            """ write the employee id count to the error report """
            if self.parent.gen_error_report:
                datainput = "proto emp id counter: {}\n".format(self.parent.eid_count)
                self.parent.kbpc_rpt.write(datainput)

        def build_csv_output(self):
            """ build the csv output array with data from base time and daily array. """
            for i in range(7):
                for bt in self.parent.base_time:
                    try:
                        if self.parent.found_days[bt[0]] == self.parent.days[i]:
                            self.parent.csv_output[i].append(bt)
                    except IndexError:
                        # this handles an error where the found days can not be detected by the normal means
                        self.alt_founddays()  # get the found days by alternate means.
                        self.parent.denton_rpt.append(self.parent.lastname)
                        if self.parent.found_days[bt[0]] == self.parent.days[i]:
                            self.parent.csv_output[i].append(bt)
                for da in self.parent.daily_array:
                    if da[2] == self.parent.pp_days[i].strftime("%m/%d"):
                        self.parent.csv_output[i].append(da)

        def csvoutput_time(self):
            """ This code writes the csv output for lines containing either daily times ie 5200 time,
            5500 time, etc or lines containing moves ie ET, MV, BT, 093, etc. """
            for co in self.parent.csv_output:  # for each time in the array, write a line
                for array in co:
                    if self.parent.gen_error_report:
                        datainput = "{}\n".format(array)
                        self.parent.kbpc_rpt.write(datainput)
                    # put the data into the csv file
                    if len(array) == 2:  # if the line comes from base/time data
                        add_this = [self.parent.found_days[int(array[0])], '"_0-00"', '"{}"'.format(array[1])]
                        whole_line = self.parent.prime_info + add_this
                        self.parent.writer = csv.writer(self.parent.csv_doc, dialect='myDialect')
                        self.parent.writer.writerow(whole_line)
                    if len(array) == 10:  # if the line comes from daily array
                        if array[9] != "__/__":
                            end_notes = "(W)Ring Deleted From PC"
                        else:
                            end_notes = ""
                        add_this = ["000-00", '"{}"'.format(array[1]),
                                    '"{}"'.format(
                                        self.parent.pp_days[self.parent.daily_array_days.index(array[2])].strftime(
                                            "%d-%b-%y").upper()),
                                    '"{}"'.format(array[3].strip()), '"{}"'.format(array[5]),
                                    '"{}"'.format(array[6]),
                                    '"{}"'.format(array[7]), '""', '""', '""', '"0"', '""', '""', '"0"',
                                    '"{}"'.format(end_notes)]
                        whole_line = self.parent.prime_info + add_this
                        self.parent.writer = csv.writer(self.parent.csv_doc, dialect='myDialect')
                        self.parent.writer.writerow(whole_line)

        def csvoutput_base(self):
            """ This code writes lines to the csv file that contain the Base or Temp info. This line will
            contain the D/A designation. """
            if len(self.parent.jobs) > 0:
                for i in range(len(self.parent.jobs)):
                    self.parent.solve_level_indexerror(i)
                    self.parent.solve_routes_indexerror(i)
                    base_line = [self.parent.base_temp[i], '"{}"'.format(self.parent.jobs[i].replace("-", "").strip()),
                                 '"0000"', '"7220-10"',
                                 '"Q0"', '"{}"'.format(self.parent.level[i]), '"N"',
                                 '"{}"'.format(self.parent.routes[i]), '""',
                                 '"0000000"',
                                 '"0000000"', '"0"', '"0"', '"0"', '"N"', '"N"', '"N"', '"MDT"', '"N"']
                    whole_line = self.parent.prime_info + base_line
                    self.parent.writer = csv.writer(self.parent.csv_doc, dialect='myDialect')
                    self.parent.writer.writerow(whole_line)

        # def __solve_level_indexerror(self, i):
        #     """ this will handle rare cases were length of the jobs array is longer than the level array by
        #     adding to the level array. """
        #     try:
        #         if self.parent.level[i]:  # if there is an index for the level array
        #             pass  # no nothing
        #     except IndexError:  # if there is not an index for the level array
        #         self.parent.levelindexerror_rpt.append(self.parent.lastname)
        #         self.parent.level.append(self.parent.level[0])  # use the first element of the level array

        def writeloop_init(self):
            """ initialize arrays """
            self.parent.found_days = []
            self.parent.lookfortimes = False
            self.parent.base_time = []
            self.parent.eid = ""
            self.parent.base_chg = 0
            self.parent.base_counter = 0
            self.parent.daily_array = []
            self.parent.daily_underscoreslash = []
            self.parent.unprocessed_counter = 0
            self.parent.jobs = []
            self.parent.level = []

        def report_eid(self, e):
            """ write employee id to error report """
            if self.parent.gen_error_report:
                datainput = "{}\n".format(e)
                self.parent.kbpc_rpt.write(datainput)

        def find_fi(self, e):
            """ look for first initial """
            if self.parent.lookforfi:
                if re.fullmatch("[A-Z] [A-Z]", e) or re.fullmatch("([A-Z])", e):
                    if self.parent.gen_error_report:
                        datainput = "FI: {}\n".format(e)
                        self.parent.kbpc_rpt.write(datainput)
                    self.parent.fi = e
                    self.parent.lookforfi = False

        def find_name(self, e):
            """ look for the last name """
            if self.parent.lookforname:  # look for the name
                if re.fullmatch(r"([A-Z]+)", e) \
                        or re.fullmatch(r"([A-Z]+.[A-Z]+)", e) \
                        or re.fullmatch(r"([A-Z]+.[A-Z]+.[A-Z]+)", e) \
                        or re.fullmatch(r"([A-Z]+.[A-Z]+.[A-Z]+.[A-Z]+)", e) \
                        or re.fullmatch(r"([A-Z]+.[A-Z]+.[A-Z]+.[A-Z]+.[A-Z]+)", e):
                    self.parent.lastname = e.replace("'", " ")
                    if self.parent.gen_error_report:
                        datainput = "Name: {}\n".format(e)
                        self.parent.kbpc_rpt.write(datainput)
                    self.parent.lookforname = False
                    self.parent.lookforfi = True

        def find_job(self, e):
            """ find the job or d/a code - there might be two """
            if re.match(r"\s[0-9]{2}-[0-9]$", e):
                self.parent.jobs.append(e)

        def find_job_alt(self, e):
            """ does the same as find_job method, except searchs for an alternate format """
            if re.match(r"[0-9]{2}-[0-9]$", e):
                self.parent.jobs_alt.append(e)

        def find_temproute(self, e):
            """ look for temp route """
            if self.parent.lookfor2route:
                if re.match(r"[0-9]{6}$", e):
                    self.parent.routes.append(e)  # add route to routes array
                self.parent.lookfor2route = False

        def find_mainroute(self, e):
            """ look for main route """
            if self.parent.lookforroute:
                if re.match(r"[0-9]{6}$", e):  #
                    self.parent.routes.append(e)  # add route to routes array
                    self.parent.lookfor2route = True
                self.parent.lookforroute = False

        def find_secondlevel(self, e):
            """ intercept the second level. extra elements can be pulled into the level array. these should be
             harmless. the goal is to capture the first two"""
            if self.parent.lookfor2level:
                if re.match(r"[0-9]{2}$", e):
                    self.parent.level.append(e)
                self.parent.lookfor2level = False

        def trap_route(self, e):
            """ set trap to catch route # on the next line """
            if e == "Route #":
                self.parent.lookforroute = True

        def find_level(self, e):
            """ intercept the level """
            if self.parent.lookforlevel:
                if re.match(r"[0-9]{2}$", e):
                    self.parent.level.append(e)
                    self.parent.lookfor2level = True  # set trap to catch the second level next line
                self.parent.lookforlevel = False
                # in case that the two levels are back to back on the same line eg '0202'
                if re.match(r"[0-2,7]{4}$", e):  # look for a four digit number where numbers are 0, 1, 2 or 7
                    split_lvl1 = e[0]+e[1]
                    split_lvl2 = e[2]+e[3]
                    self.parent.level.append(split_lvl1)
                    self.parent.level.append(split_lvl2)
                self.parent.lookforlevel = False

        def trap_lvl(self, e):
            """ set trap to catch Lvl on the next line """
            if e == "Lvl":
                self.parent.lookforlevel = True

        def find_underscore(self, e):
            """ find the underscore dash string """
            if self.parent.eid != "" and self.parent.new_page == False:
                if re.match(r"[0-9]{8}", e):
                    self.parent.eid_count += 1
                if re.match(r"xxx-xx-[0-9]{4}", e):
                    self.parent.eid_count += 1
                if re.match(r"XXX-XX-[0-9]{4}", e):
                    self.parent.eid_count += 1
                if e == "___-___-____":
                    self.parent.eid_count += 1
                # solve for rose problem: time object is fused to emp id object - just increment the eid counter
                if re.match(r"\s[0-9]{2}\.[0-9]{10}", e) \
                        or re.match(r"__.__[0-9]{8}", e) \
                        or re.match(r"__._____-___-____", e):
                    self.parent.eid_count += 1
                    self.parent.rose_rpt.append(self.parent.lastname)

        def find_unprocessed(self, e):
            """ solve for carroll problem/ unprocessed rings do not have underscore slash counterparts """
            if e == "Un-Processed Rings":  # after unprocessed rings label, add no new rings to daily array
                self.parent.unprocessedrings = self.parent.eid

        def find_eid(self, e):
            """ find the emp id / it is the first 8 digit number on the page """
            if re.match(r"[0-9]{8}", e):
                if self.parent.eid_count == 0:
                    self.parent.eid = e
                    if self.parent.gen_error_report:
                        datainput = "Employee ID: {}\n".format(e)
                        self.parent.kbpc_rpt.write(datainput)
                    self.parent.lookforname = True
                    if self.parent.eid != self.parent.unprocessedrings:  # set unprocessedrings and new_page variables
                        self.parent.unprocessedrings = ""
                        self.parent.new_page = False
                    else:
                        self.parent.new_page = True
                        # increment the self.parent.eid counter to stop new self.parent.eid from being set
                        self.parent.eid_count += 1
                        if self.parent.gen_error_report:
                            self.parent.kbpc_rpt.write("NEW PAGE!!!\n")

    class WriteCSV:
        """
        This class writes the error report. This is called in PageAnalysis and runs at the end of each page. It
        adds information about the page to the error report.
        """

        def __init__(self, parent):
            self.parent = parent

        def run(self):
            """ use the collected info to make the csv and reports. """
            self.write_errorreport()  # write error report
            self.fix_jobs()  # fix potential errors with jobs array
            self.write_baseline()  # write the base line
            self.reorder_days()  # make sure the days are in the correct order
            self.handle_underscoreslash()  # handles the underscore slashes
            self.carroll_handler()
            self.problem_handling()
            self.increment_basechg()
            self.dailyarraylenght()
            self.reinitialize()
            self.parent.pbi += 1
            # end loop

        def write_errorreport(self):
            """ write error report. """
            if self.parent.gen_error_report:  # write to error report
                datainput = "Station: {}\n".format(self.parent.station)
                self.parent.kbpc_rpt.write(datainput)
                datainput = "Pay Period: {}\n".format(self.parent.yyppwk)
                self.parent.kbpc_rpt.write(datainput)  # show the pay period
                if len(self.parent.jobs) > 0:
                    datainput = "Jobs: {}\n".format(self.parent.jobs)
                    self.parent.kbpc_rpt.write(datainput)
                if len(self.parent.jobs_alt) > 0:
                    datainput = "Jobs (alternate): {}\n".format(self.parent.jobs_alt)
                    self.parent.kbpc_rpt.write(datainput)
                if len(self.parent.routes) > 0:
                    datainput = "Routes: {}\n".format(self.parent.routes)
                    self.parent.kbpc_rpt.write(datainput)
                if len(self.parent.level) > 0:
                    datainput = "Levels: {}".format(self.parent.level)
                    self.parent.kbpc_rpt.write(datainput)
                    if len(self.parent.jobs) != len(self.parent.level):  # detect any levelindex errors
                        datainput = "   LEVEL/JOB INDEXERROR DETECTED!!!"
                        self.parent.kbpc_rpt.write(datainput)
                    self.parent.kbpc_rpt.write("\n")

        def fix_jobs(self):
            """ if the self.jobs array is empty, use the self.jobs_alt array. """
            joint = self.parent.jobs + self.parent.jobs_alt  # join jobs and jobs_alt arrays
            if joint and self.parent.level:  # proceed only if both arrays have at least one value
                while len(joint) > len(self.parent.level):  # ensure jobs is not greater than level
                    del joint[-1]  # remove the last value from the joint array
                self.parent.jobs = joint  # reassign the jobs array with joint array.

        def write_baseline(self):
            """ write the base line """
            csv.register_dialect('myDialect',
                                 delimiter=',',
                                 quotechar="'",
                                 skipinitialspace=True,
                                 lineterminator=",\r"
                                 )  # define csv writer parameters
            # write to csv file
            self.parent.prime_info = [self.parent.yyppwk.replace("-", ""), '"{}"'.format("000000"),
                                      '"{}"'.format(self.parent.station), '"{}"'.format("0000"),
                                      '"{}"'.format(self.parent.eid), '"{}"'.format(self.parent.lastname),
                                      '"{}"'.format(self.parent.fi[:1]), '"_"', '"010/0000"', '"N"', '"N"',
                                      '"N"', '"0"', '"0"', '"0"', '"0"', '"0"', '"0"']
            if len(self.parent.jobs) > 0:
                # if the route count is less than the jobs count, fill the route count
                self.parent.routes = PdfConverterFix(self.parent.routes).route_filler(len(self.parent.jobs))
                for i in range(len(self.parent.jobs)):
                    self.parent.solve_level_indexerror(i)
                    self.parent.solve_routes_indexerror(i)
                    base_line = [self.parent.base_temp[i],
                                 '"{}"'.format(self.parent.jobs[i].replace("-", "").strip()),
                                 '"0000"', '"7220-10"',
                                 '"Q0"', '"{}"'.format(self.parent.level[i]), '"N"',
                                 '"{}"'.format(self.parent.routes[i]),
                                 '""', '"0000000"', '"0000000"', '"0"', '"0"', '"0"', '"N"',
                                 '"N"', '"N"', '"MDT"', '"N"']
                    whole_line = self.parent.prime_info + base_line
                    self.parent.writer = csv.writer(self.parent.csv_doc, dialect='myDialect')
                    self.parent.writer.writerow(whole_line)

        def reorder_days(self):
            """ make sure the days are in the correct order"""
            if len(self.parent.foundday_holder) > 0:
                # solve for nguyen problem / day of week occurs prior to "employee id" label
                self.parent.found_days += self.parent.foundday_holder
                ordered_days = ["Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
                for day in self.parent.days:  # re order days into correct order
                    if day not in self.parent.found_days:
                        ordered_days.remove(day)
                self.parent.found_days = ordered_days
                self.parent.nguyen_rpt.append(self.parent.lastname)
            if len(self.parent.found_days) > 0:  # show found days
                # reorder the found days to ensure the correct order
                self.parent.found_days = self.parent.pdf_converter_reorder_founddays()
                if self.parent.gen_error_report:
                    datainput = "Found days: {}\n".format(self.parent.found_days)
                    self.parent.kbpc_rpt.write(datainput)
            if self.parent.gen_error_report:
                datainput = "proto emp id counter: {}\n".format(self.parent.eid_count)
                self.parent.kbpc_rpt.write(datainput)

        def handle_underscoreslash(self):
            """ handles the underscore slashes. """
            if len(self.parent.daily_underscoreslash) > 0:  # bind all underscore slash items in one array
                self.parent.underscore_slash_result = sum(self.parent.daily_underscoreslash, [])
            if self.parent.mcgrath_indicator and \
                    len(self.parent.underscore_slash_result) > 0:  # solve for mcgrath indicator
                # add underscore slash to carryover
                self.parent.mcgrath_carryover.append(self.parent.underscore_slash_result[0])
                self.parent.mcgrath_indicator = False  # reset the indicator
                if self.parent.gen_error_report:
                    datainput = "MCGRATH CARRYOVER: {}\n".format(self.parent.mcgrath_carryover)
                    self.parent.kbpc_rpt.write(datainput)  # display a notice.
                del self.parent.underscore_slash_result[0]  # delete the ophan underscore slash

            count = 0
            for array in self.parent.daily_array:
                array.append(self.parent.underscore_slash_result[count])
                try:
                    array.append(self.parent.underscore_slash_result[count + 1])
                except IndexError:  # solve for the mcgrath problem
                    self.parent.mcgrath_carryover = array
                    self.parent.mcgrath_indicator = True
                    self.parent.mcgrath_rpt.append(self.parent.lastname)
                    if self.parent.gen_error_report:
                        self.parent.kbpc_rpt.write("MCGRATH ERROR DETECTED!!!\n")
                # if self.parent.mcgrath_indicator == False:
                count += 2
            # if there is a carryover, remove the daily array item from the list
            if self.parent.mcgrath_carryover in self.parent.daily_array:
                self.parent.daily_array.remove(self.parent.mcgrath_carryover)
            # if there is a carryover to be added
            if not self.parent.mcgrath_indicator and self.parent.mcgrath_carryover != "":
                # put the carryover at the front of the daily array
                self.parent.daily_array.insert(0, self.parent.mcgrath_carryover)
                self.parent.mcgrath_carryover = ""  # reset the carryover
                self.parent.eid_count += 1  # increment the emp id counter

            # set up array for each day in the week
            self.parent.csv_sat = []
            self.parent.csv_sun = []
            self.parent.csv_mon = []
            self.parent.csv_tue = []
            self.parent.csv_wed = []
            self.parent.csv_thr = []
            self.parent.csv_fri = []
            self.parent.csv_output = [self.parent.csv_sat, self.parent.csv_sun, self.parent.csv_mon,
                                      self.parent.csv_tue, self.parent.csv_wed,
                                      self.parent.csv_thr, self.parent.csv_fri]
            # reorder the found days to ensure the correct order
            self.parent.found_days = self.parent.pdf_converter_reorder_founddays()
            # fix problem with miscounted base times
            high_array = []
            for bt in self.parent.base_time:
                high_array.append(bt[0])
            if len(high_array) > 0:
                high_num = max(high_array)
                comp_array = []
                for i in range(high_num + 1):
                    comp_array.append(i)
                del_array = []
                for num in comp_array:
                    if num in high_array:
                        del_array.append(num)
                error_array = comp_array
                error_array = [x for x in error_array if x not in del_array]
                error_array.reverse()
                if len(error_array) > 0:
                    for error_num in error_array:
                        for bt in self.parent.base_time:
                            if bt[0] > error_num:
                                bt[0] -= 1
            # load the multi array with array for each day
            for i in range(7):
                for bt in self.parent.base_time:
                    if self.parent.found_days[bt[0]] == self.parent.days[i]:
                        self.parent.csv_output[i].append(bt)
                for da in self.parent.daily_array:
                    if da[2] == self.parent.pp_days[i].strftime("%m/%d"):
                        self.parent.csv_output[i].append(da)
            for co in self.parent.csv_output:  # for each time in the array, display a line
                for array in co:
                    if self.parent.gen_error_report:
                        datainput = "{}\n".format(str(array))
                        self.parent.kbpc_rpt.write(datainput)
                    # put the data into the csv file
                    if len(array) == 2:  # if the line comes from base/time data
                        add_this = [self.parent.found_days[int(array[0])], '"_0-00"', '"{}"'.format(array[1])]
                        whole_line = self.parent.prime_info + add_this
                        self.parent.writer = csv.writer(self.parent.csv_doc, dialect='myDialect')
                        self.parent.writer.writerow(whole_line)
                    if len(array) == 10:  # if the line comes from daily array
                        if array[9] != "__/__":
                            end_notes = "(W)Ring Deleted From PC"
                        else:
                            end_notes = ""
                        add_this = ["000-00", '"{}"'.format(array[1]),
                                    '"{}"'.format(self.parent.pp_days[self.parent.daily_array_days.index(array[2])].
                                                  strftime("%d-%b-%y").upper()),
                                    '"{}"'.format(array[3].strip()), '"{}"'.format(array[5]), '"{}"'.format(array[6]),
                                    '"{}"'.format(array[7]), '""', '""', '""', '"0"', '""', '""', '"0"',
                                    '"{}"'.format(end_notes)]
                        whole_line = self.parent.prime_info + add_this
                        self.parent.writer = csv.writer(self.parent.csv_doc, dialect='myDialect')
                        self.parent.writer.writerow(whole_line)

        def carroll_handler(self):
            """ Handle Carroll problems """
            if not self.parent.mcgrath_indicator:
                if self.parent.eid_count == 1:  # handle widows
                    self.parent.eid_count = 0
                    if self.parent.gen_error_report:
                        datainput = "WIDOW HANDLING: Carroll Mod emp id counter: {}\n".format(self.parent.eid_count)
                        self.parent.kbpc_rpt.write(datainput)
                elif self.parent.eid_count % 2 != 0:  # handle self.parent.eid counts where there has been a cut off
                    self.parent.eid_count += 1
                    if self.parent.gen_error_report:
                        datainput = "CUT OFF CONTROL: Carroll Mod emp id counter: {}\n".format(self.parent.eid_count)
                        self.parent.kbpc_rpt.write(datainput)
            else:
                self.parent.eid_count -= 1
            self.parent.eid_count -= self.parent.unprocessed_counter * 2

        def problem_handling(self):
            """
            detect Franklin Problems: Consecutive MV Desigs
            detect Unruh Problem: Underscore dash cut off in unprecessed rings.
            """
            if self.parent.unprocessed_counter > 0:
                if self.parent.gen_error_report:
                    datainput = "Unprocessed Rings: {}\n".format(self.parent.unprocessed_counter)
                    self.parent.kbpc_rpt.write(datainput)
                if len(self.parent.daily_array) == self.parent.eid_count / 2:
                    pass
                # Solve for Unruh error / when a underscore dash is missing after unprocessed rings
                elif len(self.parent.daily_array) == max((self.parent.eid_count + 2) / 2, 0):
                    if self.parent.gen_error_report:
                        datainput = "Unruh Mod emp id counter: {}\n".format(self.parent.eid_count + 2)
                        self.parent.kbpc_rpt.write(datainput)
                        self.parent.kbpc_rpt.write("UNRUH PROBLEM DETECTED!!!")
                    self.parent.unruh_rpt.append(self.parent.lastname)
                else:
                    if self.parent.gen_error_report:
                        self.parent.kbpc_rpt.write(
                            "FRANKLIN ERROR DETECTED!!! ALERT! (Unprocessed counter)!\n")
                    self.parent.unresolved.append(self.parent.lastname)
            else:
                if len(self.parent.daily_array) != max(self.parent.eid_count / 2, 0):
                    if self.parent.gen_error_report:
                        self.parent.kbpc_rpt.write("FRANKLIN ERROR DETECTED!!! ALERT! ALERT!\n")
                    self.parent.unresolved.append(self.parent.lastname)

        def increment_basechg(self):
            """ increment base change """
            if self.parent.base_chg + 1 != len(self.parent.found_days):  # add to basecounter error array
                to_add = (self.parent.lastname, self.parent.base_chg, len(self.parent.found_days))
                if len(self.parent.found_days) > 0:
                    self.parent.basecounter_error.append(to_add)

        def dailyarraylenght(self):
            """ write daily array lenght to report"""
            if self.parent.gen_error_report:
                datainput = "daily array lenght: {}\n".format(len(self.parent.daily_array))
                self.parent.kbpc_rpt.write(datainput)

        def reinitialize(self):
            """ initialize arrays """
            self.parent.found_days = []
            self.parent.foundday_holder = []
            self.parent.base_time = []
            self.parent.eid = ""
            self.parent.eid_label = False
            # perez_switch = False
            self.parent.base_counter = 0
            self.parent.base_chg = 0
            self.parent.daily_array = []
            self.parent.daily_underscoreslash = []
            self.parent.unprocessed_counter = 0
            self.parent.jobs = []
            self.parent.jobs_alt = []
            self.parent.routes = []
            self.parent.level = []
            self.parent.franklin_array = []
            if self.parent.gen_error_report:
                datainput = "emp id counter: {}\n".format(max(self.parent.eid_count, 0))
                self.parent.kbpc_rpt.write(datainput)
            self.parent.pb.move_count(self.parent.pbi)

    class FinishUp:
        """
        this class finishes up by generating an summary at the end of the error report and showing one or two 
        messageboxes. 
        """

        def __init__(self, parent):
            self.parent = parent

        def run(self):
            """ this is a master method for running other methods in proper order """
            self.build_error_report()  # if the error report is opted - generate it.
            self.error_messagebox()  # prompt user with messagebox if any carriers failed to be read.
            self.completion_messagebox()  # prompt user with messagebox to show successful completion.
            self.close_errorreport()  # close the error report if it was being generated.
            self.parent.csv_doc.close()

        def build_error_report(self):
            """ create a text document with the data stored in variables to show detected errors which are
            corrected by the their respective handlings. """
            if self.parent.gen_error_report:
                self.parent.kbpc_rpt.write("Potential Problem Reports ___________"
                                           "______________________________________\n")
                datainput = "runtime: {} seconds\n".format(round(self.parent.endtime - self.parent.starttime, 4))
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Franklin Problems: Consecutive MV Desigs \n")
                datainput = "\t>>> {}\n".format(self.parent.frank_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Rodriguez Problem: This is the Franklin Problem X 4. \n")
                datainput = "\t>>> {}\n".format(self.parent.rod_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Rose Problem: The MV Desig and date are on the same line.\n")
                datainput = "\t>>> {}\n".format(self.parent.rose_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Robertson Baseline Problem: The base count is jumping when H/L basetimes "
                                           "are put into the basetime array.\n")
                datainput = "\t>>> {}\n".format(self.parent.robert_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Stevens Problem: Basetimes begining with H/L do not show up and are "
                                           "not entered into the basetime array.\n")
                datainput = "\t>>> {}\n".format(self.parent.stevens_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Carroll Problem: Unprocessed rings at the end of the page do not "
                                           "contain __/__ or times.'n")
                datainput = ">>> {}\n".format(self.parent.carroll_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Nguyen Problem: Found day appears above the Emp ID.\n")
                datainput = "\t>>> {}\n".format(self.parent.nguyen_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("Unruh Problem: Underscore dash cut off in unprecessed rings.\n")
                datainput = "\t>>> {}\n".format(self.parent.unruh_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write(  # display salih problem
                    "Salih Problem: Unprocessed rings are missing a timezone, so that unprocessed rings counter is not"
                    " incremented.\n")
                datainput = "\t>>> {}\n".format(self.parent.salih_rpt)
                self.parent.kbpc_rpt.write(datainput)
                self.parent.kbpc_rpt.write("McGrath Problem: \n")  # display mcgrath problem
                datainput = " \t>>> {}\n".format(self.parent.mcgrath_rpt)
                self.parent.kbpc_rpt.write(datainput)
                # denton error - employee id not showing up till end of page causes error with found days.
                self.parent.kbpc_rpt.write("Denton Problem: \n")
                datainput = " \t>>> {}\n".format(self.parent.denton_rpt)
                self.parent.kbpc_rpt.write(datainput)
                # display level index error
                self.parent.kbpc_rpt.write("LevelIndex Error: length of level array does not match jobs array\n")
                datainput = "\t>>> {}\n".format(self.parent.levelindexerror_rpt)
                self.parent.kbpc_rpt.write(datainput)
                # display routes index error
                self.parent.kbpc_rpt.write("RoutesIndex Error: length of routes array does not match jobs array\n")
                datainput = "\t>>> {}\n".format(self.parent.routesindexerror_rpt)
                self.parent.kbpc_rpt.write(datainput)
                datainput = "Unresolved: {}\n".format(self.parent.unresolved)
                self.parent.kbpc_rpt.write(datainput)
                datainput = "Base Counter Error: {}\n".format(self.parent.basecounter_error)
                self.parent.kbpc_rpt.write(datainput)

        def error_messagebox(self):
            """ show any failures in a messagebox at the end of the conversion process. """
            if len(self.parent.failed) > 0:  # create messagebox to show any errors
                failed_daily = ""  # initialize a text string
                for f in self.parent.failed:
                    # add to the text string - name and new line for each name.
                    failed_daily = failed_daily + " \n " + f
                messagebox.showerror("Klusterbox PDF Converter",
                                     "Errors have occured for the following carriers {}."
                                     .format(failed_daily),
                                     parent=self.parent.frame)

        def completion_messagebox(self):
            """ create messagebox for completion """
            if not self.parent.multi_csv:
                grammar = "name is"
                file_name = self.parent.short_file_name
            else:
                grammar = "names are: \n"
                file_name = "\t" + self.parent.short_file_name + "\n"
                for name in self.parent.multi_csv:
                    file_name += "\t" + name + "\n"
            messagebox.showinfo("Klusterbox PDF Converter",
                                "The PDF Convertion is complete. "
                                "The file {} {}. ".format(grammar, file_name),
                                parent=self.parent.frame)

        def close_errorreport(self):
            """ close the error report document if that option was selected. """
            if self.parent.gen_error_report:
                self.parent.kbpc_rpt.close()


class PdfReorder:
    """ read a list of carrier names from a spreadsheet.
    covert the names into employee id numbers
    search an employee everything report to find any occurances of the employee id numbers
    sort the original pdf into a new pdf file """

    def __init__(self):
        self.frame = ""
        self.file_path = ""
        self.new_file_path = ""
        self.short_file_name = ""
        self.namelist_path = ""
        self.names_list = []  # a list of carrier names
        self.no_match = []  # a list of names with no matching employee id numbers
        self.empid_pagenum = []  # a multi dimensional array for empid ids and related page numbers.

    def run(self, frame):
        """ a master method for running the methods in proper order """
        self.frame = frame
        if not self.select_namelist():
            return
        if not self.read_namelist():
            return
        self.build_empid_pagenum_array()
        if not self.empid_pagenum_completion_msg():
            return
        if not self.select_pdf():
            return
        if not self.get_newpdfpath():  # if the file already exist and user opts to not overwrite.
            return  # end the process
        self.fill_empid_pagenum()
        self.copy_pages_to_new_pdf()
        self.completion_messagebox()

    def select_namelist(self):
        """ select a text file of names from a file dialog and place the names into an array """
        if not messagebox.askokcancel("PDF Sorter", "Select a text file containing the names.\n\n "
                                                    "Those names must appear as they are spelled in the Klusterbox "
                                                    "database and must be on their own line in the text file. ",
                                      parent=self.frame):
            return False
        try:
            self.namelist_path = filedialog. \
                askopenfilename(initialdir=dir_filedialog(), filetypes=[("Text files", "*.txt")])  # get the pdf file
        except FileNotFoundError:  # end process if the user fails to select a valid file.
            return False  # end the process
        if not self.namelist_path:  # return if no file is selected.
            return False  # end the process
        return True

    def read_namelist(self):
        """ use 'open' to read the text file """
        f = open(self.namelist_path, "r")
        lines = f.readlines()
        for line in lines:
            line = line.strip()  # eliminate any white space
            if line:  # do not add blank lines
                if line not in self.names_list:  # do not add duplicate names
                    self.names_list.append(line)  # add the names to the names list
        self.names_list = sorted(self.names_list)  # sort names alphabetically
        if not len(self.names_list):  # if the names list is empty
            messagebox.showinfo("Klusterbox PDF Reorder",
                                "Klusterbox can not read any names from the selected text document. \n\n"
                                "The document can not be created.\n\n",
                                parent=self.frame)
            return False
        return True

    def build_empid_pagenum_array(self):
        """ build a multi dimensional array with employee ids and page numbers, e.g.
        [[00000000, [1, 2 , 3]], [00000001, [4, 5, 6]]]"""
        for name in self.names_list:
            sql = "SELECT * FROM name_index WHERE kb_name = '%s'" % name
            result = inquire(sql)
            if result:
                to_add = [result[0][2], []]
                self.empid_pagenum.append(to_add)
            else:
                self.no_match.append(name)

    def empid_pagenum_completion_msg(self):
        """ shows a message for the completion matching the names to the employee id numbers. """
        nameform = "name was"  # if the lenght of self.names_list is one - use singular
        if len(self.names_list) > 0 or len(self.names_list) == 0:  # if more than one or zero - use plural
            nameform = "names were"
        if not self.no_match:  # if the no match array is empty
            messagebox.showinfo("Klusterbox PDF Reorder",
                                "Klusterbox has finished matching carrier names to their employee id numbers. \n\n"
                                "All names were matched. {} {} matched".format(str(len(self.names_list)), nameform),
                                parent=self.frame)
            return True
        elif len(self.empid_pagenum) == len(self.names_list):
            messagebox.showinfo("Klusterbox PDF Reorder",
                                "Klusterbox has finished matching carrier names to their employee id numbers. \n\n"
                                "No names could be matched. So the document can not be created.\n\n",
                                parent=self.frame)
            return False
        else:
            string = Convert(self.no_match).array_to_string()
            messagebox.showinfo("Klusterbox PDF Reorder",
                                "Klusterbox has finished matching carrier names to their employee id numbers. \n\n"
                                "The following names could not be matched to employee id numbers: \n\n"
                                "{}".format(string),
                                parent=self.frame)
            return True

    def select_pdf(self):
        """ get a pdf file path and store it in the self.file_path variable. """
        if not messagebox.askokcancel("PDF Sorter", "Select the employee everything report.",
                                      parent=self.frame):
            return False
        try:
            self.file_path = filedialog. \
                askopenfilename(initialdir=dir_filedialog(), filetypes=[("PDF files", "*.pdf")])  # get the pdf file
        except FileNotFoundError:  # end process if the user fails to select a valid file.
            return False  # end the process
        if not self.file_path:  # return if no file is selected.
            return False  # end the process
        return True

    def get_newpdfpath(self):
        """ get the csv path and the shortened file name. if file already exist, ask before overwriting.  """
        self.new_file_path = self.get_path("_sorted", ".pdf")  # generate pdf file name and path
        self.short_file_name = self.get_shortname()
        if os.path.exists(self.new_file_path):  # if the file path already exist - ask for confirmation
            if not messagebox.askokcancel("Possible File Name Discrepancy",
                                          "There is already a file named {}. "
                                          "If you proceed, the file will be overwritten. "
                                          "Did you want to proceed?".format(self.short_file_name),
                                          parent=self.frame):
                return False
        return True

    def get_path(self, add_on, extension):
        """ generate pdf file name and path """
        file_parts = self.file_path.split("/")  # split path into folders and file
        file_name_xten = file_parts[len(file_parts) - 1]  # get the file name from the end of the path
        file_name = file_name_xten[:-4]  # remove the file extension from the file name
        path = self.file_path[:-len(file_name_xten)]  # get the path back to the source folder
        new_fname = file_name + add_on  # add suffix to to show converted pdf to csv
        new_file_path = path + new_fname + extension  # new path with modified file name
        return new_file_path

    def get_shortname(self):
        """ get the last part of the file name"""
        file_parts = self.new_file_path.split("/")  # split path into folders and file
        file_name_xten = file_parts[len(file_parts) - 1]  # get the file name from the end of the path
        return file_name_xten

    def fill_empid_pagenum(self):
        """ find the pages where the employee id number appears. record the page number in the empid_pagenum array """
        pdf_document = fitz.open(self.file_path)
        # Regular expression to find text between "Sub-Unit:" and "Employee ID" keywords
        pattern = re.compile(re.escape("Sub-Unit:") + '(.*?)' + re.escape("Employee ID"), re.S)
        for page_num in range(len(pdf_document)):  # Iterate through each page and extract text
            text_between_keywords = ""
            page = pdf_document.load_page(page_num)  # Get the page
            text = page.get_text()  # Extract text from the page
            matches = pattern.findall(text)
            for match in matches:  # If matches are found, add them to the result
                text_between_keywords += match
            for i in range(len(self.empid_pagenum)):
                if self.empid_pagenum[i][0] in text_between_keywords:
                    self.empid_pagenum[i][1].append(page_num)

    def copy_pages_to_new_pdf(self):
        """ copies pages from input pdf into a new pdf. """
        input_pdf = fitz.open(self.file_path)  # Open the input PDF file
        output_pdf = fitz.open()  # Create a new PDF file
        # Add the specified pages to the new PDF file
        for array in self.empid_pagenum:
            for page_number in array[1]:
                if 0 <= page_number < len(input_pdf):  # Ensure the page number is within the valid range
                    page = input_pdf.load_page(page_number)  # Get the page from the input PDF
                    # Insert the page into the new PDF
                    output_pdf.insert_pdf(input_pdf, from_page=page_number, to_page=page_number)
        if not output_pdf.page_count:  # if there are no pages in the output pdf
            messagebox.showinfo("Klusterbox PDF Reorder",
                                "The PDF Reorder is complete. \n\n "
                                "No pages were input into the new PDF document, so the document was not created.",
                                parent=self.frame)
            output_pdf.close()
            input_pdf.close()
            return False
        output_pdf.save(self.new_file_path)  # Save the new PDF file
        output_pdf.close()
        input_pdf.close()

    def completion_messagebox(self):
        """ create messagebox for completion """
        file_name = self.short_file_name
        messagebox.showinfo("Klusterbox PDF Reorder",
                            "The PDF Reorder is complete. "
                            "The file name is {}. ".format(file_name),
                            parent=self.frame)
