The following Google App Script takes XML files created for DMARC reporting and converts them to CSV files. CSV is one of the file formats that Google BigQuery can upload. After the data is uploaded, it can be queried using SQL from within BigQuery or Google Looker Studio to create dashboards.
function parseXml() {
csvrecord = "";
records = 0;
outputfile = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd-HH:mm") + ".csv"
savefolder = DriveApp.getFoldersByName("ToBeUploaded").next();
parsedfolder = DriveApp.getFoldersByName("Parsed").next();
folder = DriveApp.getFoldersByName("ToBeParsed").next();
files = folder.getFiles();
filecount = 0;
while (files.hasNext()){
xml = files.next();
filecount++;
orgname = "";
email = "";
contact = "";
reportid= "";
begindate = 0;
enddate = 0;
pubdomain = "";
adkim = "";
aspf = "";
p = "";
sp = "";
pct = "";
sourceip = "";
count= "";
disposition = "";
dkim = "";
spf = "";
type = "";
comment = "";
header = "";
envelopeto = "";
envelopefrom = "";
spfdomain = "";
spfresult = "";
spfalign = "";
dkimdomain = "";
dkimresult = "";
dkimalign = "";
dkimselector = "";
inputfile = xml.getName();
data = xml.getBlob().getDataAsString();
document = XmlService.parse(data);
root = document.getRootElement();
metadata = root.getChild('report_metadata');
if (metadata != null) {
field = metadata.getChild('org_name');
if (field != null) {orgname = field.getText()};
field = metadata.getChild('email');
if (field != null) {email = field.getText()};
field=metadata.getChild('extra_contact_info');
if (field != null) {contact=field.getText()};
field=metadata.getChild('report_id');
if (field != null) {reportid=field.getText()};
}
date_range=metadata.getChild('date_range');
if (date_range != null) {
field=date_range.getChild('begin');
if (field != null) {begindate=field.getText()};
field=date_range.getChild('end');
if (field != null) {enddate=field.getText()};
}
policy = root.getChild('policy_published');
if (policy != null) {
field = policy.getChild('domain');
if (field != null) {pubdomain = field.getText()};
field = policy.getChild('adkim');
if (field != null) {adkim = field.getText()};
if (adkim == 'r') {adkim = 'relaxed'};
if (adkim == 's') {adkim = 'strict'};
field = policy.getChild('aspf');
if (field != null) {aspf = field.getText()};
if (aspf == 'r') {aspf = 'relaxed'};
if (aspf == 's') {aspf = 'strict'};
field = policy.getChild('p');
if (field != null) {p = field.getText()};
field = policy.getChild('sp');
if (field != null) {sp = field.getText()};
field = policy.getChild('pct');
if (field != null) {pct = field.getText()};
}
record = root.getChildren('record');
if (record != null) {
for (var i = 0; i < record.length; i++) {
row = record[i].getChild('row');
if (row != null) {
field = row.getChild('source_ip');
if (field != null) {sourceip = field.getText()};
field = row.getChild('count');
if (field != null) {count = field.getText()};
policy = row.getChild('policy_evaluated');
}
if (policy != null) {
field = policy.getChild('disposition');
if (field != null) {disposition = field.getText()};
field = policy.getChild('dkim');
if (field != null) {dkim = field.getText()};
field = policy.getChild('spf');
if (field != null) {spf = field.getText()};
reason = policy.getChild('reason');
if (reason != null) {
field = reason.getChild('type');
if (field != null) {type = field.getText()};
field = reason.getChild('comment');
if (field != null) {comment = field.getText()};
}
}
identifiers = record[i].getChild('identifiers');
if (identifiers != null) {
field = identifiers.getChild('header_from');
if (field != null) {header = field.getText()};
field = identifiers.getChild('envelope_to');
if (field != null) {envelopeto = field.getText()};
field = identifiers.getChild('envelope_from');
if (field != null) {envelopefrom = field.getText()};
}
results = record[i].getChild('auth_results');
if (results != null) {
sender = results.getChild('spf');
if (sender != null) {
field = sender.getChild('domain');
if (field != null) {spfdomain = field.getText()};
field = sender.getChild('result');
if (field != null) {spfresult = field.getText()};
}
domain = results.getChild('dkim');
if (domain != null) {
field = domain.getChild('domain');
if (field != null) {dkimdomain = field.getText()};
field = domain.getChild('result');
if (field != null) {dkimresult = field.getText()};
field = domain.getChild('selector');
if (field != null) {dkimselector = field.getText()};
}
spfalign = "spf-unaligned";
if (spfdomain == pubdomain) {
spfalign = "spf-aligned";
}
dkimalign = "dkim-unaligned";
if (dkimdomain == pubdomain) {
dkimalign = "dkim-aligned";
}
var date_begin = new Date(begindate*1000);
var date_end = new Date(enddate*1000);
var fmtBeginDate = Utilities.formatDate(date_begin, "GMT", "yyyy-MM-dd");
var fmtEndDate = Utilities.formatDate(date_end, "GMT", "yyyy-MM-dd");
var fmtBeginTime = Utilities.formatDate(date_begin, "GMT", "HH:mm:ss");
var fmtEndTime = Utilities.formatDate(date_end, "GMT", "HH:mm:ss");
part1 = '"' + orgname + '","' + fmtBeginDate + '","' + fmtBeginTime + '","' + fmtEndDate + '","' + fmtEndTime;
part2 = '","' + spfdomain + '","' + spfresult + '","' + spfalign + '","' + dkimdomain + '","' + dkimresult + '","' + dkimalign + '","' + dkimselector;
part3 = '","' + sourceip + '","' + count + '","' + envelopeto + '","' + envelopefrom + '","' + disposition + '","' + dkim + '","' + spf;
part4 = '","' + header + '","' + email + '","' + contact + '","' + reportid + '","' + pubdomain + '","' + adkim + '","' + aspf + '","' + p + '","' + type;
part5 = '","' + comment + '","' + sp + '","' + pct + '","' + inputfile + '","' + outputfile + '"' + "\n";
csvrecord = csvrecord + part1 + part2 + part3 + part4 + part5;
records++;
}
}
}
xml.moveTo(parsedfolder);
}
file = savefolder.createFile(outputfile, csvrecord);
Logger.log('Number of files processed: ' + filecount);
Logger.log('Number of CSV records created: ' + records);
}
No comments:
Post a Comment