import csv
import os

outputFilename = 'codebook-output.csv'
# remove the output file if it already exists
os.remove(outputFilename)
# get all the csv files in the same directory os the python script
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.lower().endswith('.csv')]
outputData = {}
labels = set()

for f in files:
    # use the filename (without .csv) as the category name
    categoryName = os.path.splitext(f)[0]
    with open(f, 'r') as file:
        reader = csv.reader(file)
        entryName = ''
        labelName = ''
        for row in reader:
            for i in range(0,len(row)):
                val = row[i];
                if (len(val) > 0):
                    if (i == 0):
                        # first item ==> entry name
                        entryName = val
                        if outputData.__contains__(categoryName) == False:
                            outputData[categoryName] = {}
                        outputData[categoryName][entryName] = {}
                    elif (i % 2 == 1):
                        # check if it's the last column (this will hold a note field with no label)
                        if (i == len(row) - 1):
                            labelName = "Note"
                            labels.add(labelName)
                            outputData[categoryName][entryName][labelName] = [val.replace('|', '\\|')]
                        else: 
                            # label name
                            labelName = val.title().strip()
                            labels.add(labelName)                 
                    else:
                        if (outputData[categoryName][entryName].__contains__(labelName) == False):
                            outputData[categoryName][entryName][labelName] = []
                        # field value
                        outputData[categoryName][entryName][labelName].append(val.replace('|', '\\|'))
with open(outputFilename, 'w') as outputFile:
    writer = csv.writer(outputFile)
    # write the header row Category, Entry, Field Label, Field Label 2, ...
    writer.writerow(['Category', 'Entry'] + list(labels))
    for category in outputData:
        for entry in outputData[category]:
            dict = outputData[category][entry]
            # write the category name and entry name for the entry
            row = [category, entry]
            for label in labels:
                valForLabel = dict.get(label)
                if (valForLabel is not None and len(valForLabel) > 0 ):
                    # if we have a value for this field label, add it to the row
                    row.append("|".join(valForLabel))
                else:
                    # no value for this field label, add an empty string
                    row.append('')
            writer.writerow(row) 
    



