Parse XML to CSV for DMARC Reporting

The following Google App Script takes XML files created for DMARC reporting and converts them to CSV files.  CSV is one of the file formats that Google BigQuery can upload.  After the data is uploaded, it can be queried using SQL from within BigQuery or Google Looker Studio to create dashboards.

function parseXml() {
csvrecord = "";
records = 0;
outputfile = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd-HH:mm") + ".csv"
savefolder = DriveApp.getFoldersByName("ToBeUploaded").next();
parsedfolder = DriveApp.getFoldersByName("Parsed").next();
folder = DriveApp.getFoldersByName("ToBeParsed").next();
files = folder.getFiles();
filecount = 0;
while (files.hasNext()){
xml = files.next();
filecount++;
orgname = "";
email = "";
contact = "";
reportid= "";
begindate = 0;
enddate = 0;
pubdomain = "";
adkim = "";
aspf = "";
p = "";
sp = "";
pct = "";
sourceip = "";
count= "";
disposition = "";
dkim = "";
spf = "";
type = "";
comment = "";
header = "";
envelopeto = "";
envelopefrom = "";
spfdomain = "";
spfresult = "";
spfalign = "";
dkimdomain = "";
dkimresult = "";
dkimalign = "";
dkimselector = "";
inputfile = xml.getName();
data = xml.getBlob().getDataAsString();
document = XmlService.parse(data);
root = document.getRootElement();
metadata = root.getChild('report_metadata');
if (metadata != null) {
field = metadata.getChild('org_name');
if (field != null) {orgname = field.getText()};
field = metadata.getChild('email');
if (field != null) {email = field.getText()};
field=metadata.getChild('extra_contact_info');
if (field != null) {contact=field.getText()};
field=metadata.getChild('report_id');
if (field != null) {reportid=field.getText()};
}
date_range=metadata.getChild('date_range');
if (date_range != null) {
field=date_range.getChild('begin');
if (field != null) {begindate=field.getText()};
field=date_range.getChild('end');
if (field != null) {enddate=field.getText()};
}
policy = root.getChild('policy_published');
if (policy != null) {
field = policy.getChild('domain');
if (field != null) {pubdomain = field.getText()};
field = policy.getChild('adkim');
if (field != null) {adkim = field.getText()};
if (adkim == 'r') {adkim = 'relaxed'};
if (adkim == 's') {adkim = 'strict'};
field = policy.getChild('aspf');
if (field != null) {aspf = field.getText()};
if (aspf == 'r') {aspf = 'relaxed'};
if (aspf == 's') {aspf = 'strict'};
field = policy.getChild('p');
if (field != null) {p = field.getText()};
field = policy.getChild('sp');
if (field != null) {sp = field.getText()};
field = policy.getChild('pct');
if (field != null) {pct = field.getText()};
}
record = root.getChildren('record');
if (record != null) {
for (var i = 0; i < record.length; i++) {
row = record[i].getChild('row');
if (row != null) {
field = row.getChild('source_ip');
if (field != null) {sourceip = field.getText()};
field = row.getChild('count');
if (field != null) {count = field.getText()};
policy = row.getChild('policy_evaluated');
}
if (policy != null) {
field = policy.getChild('disposition');
if (field != null) {disposition = field.getText()};
field = policy.getChild('dkim');
if (field != null) {dkim = field.getText()};
field = policy.getChild('spf');
if (field != null) {spf = field.getText()};
reason = policy.getChild('reason');
if (reason != null) {
field = reason.getChild('type');
if (field != null) {type = field.getText()};
field = reason.getChild('comment');
if (field != null) {comment = field.getText()};
}
}
identifiers = record[i].getChild('identifiers');
if (identifiers != null) {
field = identifiers.getChild('header_from');
if (field != null) {header = field.getText()};
field = identifiers.getChild('envelope_to');
if (field != null) {envelopeto = field.getText()};
field = identifiers.getChild('envelope_from');
if (field != null) {envelopefrom = field.getText()};
}
results = record[i].getChild('auth_results');
if (results != null) {
sender = results.getChild('spf');
if (sender != null) {
field = sender.getChild('domain');
if (field != null) {spfdomain = field.getText()};
field = sender.getChild('result');
if (field != null) {spfresult = field.getText()};
}
domain = results.getChild('dkim');
if (domain != null) {
field = domain.getChild('domain');
if (field != null) {dkimdomain = field.getText()};
field = domain.getChild('result');
if (field != null) {dkimresult = field.getText()};
field = domain.getChild('selector');
if (field != null) {dkimselector = field.getText()};
}
spfalign = "spf-unaligned";
if (spfdomain == pubdomain) {
spfalign = "spf-aligned";
}
dkimalign = "dkim-unaligned";
if (dkimdomain == pubdomain) {
dkimalign = "dkim-aligned";
}
var date_begin = new Date(begindate*1000);
var date_end = new Date(enddate*1000);
var fmtBeginDate = Utilities.formatDate(date_begin, "GMT", "yyyy-MM-dd");
var fmtEndDate = Utilities.formatDate(date_end, "GMT", "yyyy-MM-dd");
var fmtBeginTime = Utilities.formatDate(date_begin, "GMT", "HH:mm:ss");
var fmtEndTime = Utilities.formatDate(date_end, "GMT", "HH:mm:ss");

part1 = '"' + orgname + '","' + fmtBeginDate + '","' + fmtBeginTime + '","' + fmtEndDate + '","' + fmtEndTime;
part2 = '","' + spfdomain + '","' + spfresult + '","' + spfalign + '","' + dkimdomain + '","' + dkimresult + '","' + dkimalign + '","' + dkimselector;
part3 = '","' + sourceip + '","' + count + '","' + envelopeto + '","' + envelopefrom + '","' + disposition + '","' + dkim + '","' + spf;
part4 = '","' + header + '","' + email + '","' + contact + '","' + reportid + '","' + pubdomain + '","' + adkim + '","' + aspf + '","' + p + '","' + type;
part5 = '","' + comment + '","' + sp + '","' + pct + '","' + inputfile + '","' + outputfile + '"' + "\n";

csvrecord = csvrecord + part1 + part2 + part3 + part4 + part5;
records++;
}
}
}
xml.moveTo(parsedfolder);
}
file = savefolder.createFile(outputfile, csvrecord);
Logger.log('Number of files processed: ' + filecount);
Logger.log('Number of CSV records created: ' + records);
}

No comments:

Post a Comment

Upload CSV Files to Google BigQuery

This Google App Script gets each file in the "ToBeUploaded" folder in Google Drive and creates a Google BigQuery load job with tha...