Upload CSV Files to Google BigQuery

This Google App Script gets each file in the "ToBeUploaded" folder in Google Drive and creates a Google BigQuery load job with that CSV data.  The routine then moves the CSV file to the "Uploaded" folder.  To use this code, replace 'YOURPROJECTID', 'YOURDATASET', and 'YOURTABLE' in the code below with your values.  Also, you must add Google BigQuery to the Services your script can use or the insert job will fail.

function UploadCSV() {
tobeuploadedfolder = DriveApp.getFoldersByName("ToBeUploaded").next();
uploadedfolder = DriveApp.getFoldersByName("Uploaded").next();
files = tobeuploadedfolder.getFiles();
projectId = 'YOURPROJECTID';
datasetId = 'YOURDATASET';
tableId = 'YOURTABLE';
while (files.hasNext()){
csvfile = files.next();
data = csvfile.getBlob().setContentType('application/octet-stream');
job = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
skipLeadingRows: 0,
}
}
}
try {
jobResult = BigQuery.Jobs.insert(job, projectId, data);
console.log(`Load job started. Status: ${jobResult.status.state}`);
} catch (err) {
console.log('unable to insert job');
}
csvfile.moveTo(uploadedfolder);
}
}

Parse XML to CSV for DMARC Reporting

The following Google App Script takes XML files created for DMARC reporting and converts them to CSV files.  CSV is one of the file formats that Google BigQuery can upload.  After the data is uploaded, it can be queried using SQL from within BigQuery or Google Looker Studio to create dashboards.

function parseXml() {
csvrecord = "";
records = 0;
outputfile = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd-HH:mm") + ".csv"
savefolder = DriveApp.getFoldersByName("ToBeUploaded").next();
parsedfolder = DriveApp.getFoldersByName("Parsed").next();
folder = DriveApp.getFoldersByName("ToBeParsed").next();
files = folder.getFiles();
filecount = 0;
while (files.hasNext()){
xml = files.next();
filecount++;
orgname = "";
email = "";
contact = "";
reportid= "";
begindate = 0;
enddate = 0;
pubdomain = "";
adkim = "";
aspf = "";
p = "";
sp = "";
pct = "";
sourceip = "";
count= "";
disposition = "";
dkim = "";
spf = "";
type = "";
comment = "";
header = "";
envelopeto = "";
envelopefrom = "";
spfdomain = "";
spfresult = "";
spfalign = "";
dkimdomain = "";
dkimresult = "";
dkimalign = "";
dkimselector = "";
inputfile = xml.getName();
data = xml.getBlob().getDataAsString();
document = XmlService.parse(data);
root = document.getRootElement();
metadata = root.getChild('report_metadata');
if (metadata != null) {
field = metadata.getChild('org_name');
if (field != null) {orgname = field.getText()};
field = metadata.getChild('email');
if (field != null) {email = field.getText()};
field=metadata.getChild('extra_contact_info');
if (field != null) {contact=field.getText()};
field=metadata.getChild('report_id');
if (field != null) {reportid=field.getText()};
}
date_range=metadata.getChild('date_range');
if (date_range != null) {
field=date_range.getChild('begin');
if (field != null) {begindate=field.getText()};
field=date_range.getChild('end');
if (field != null) {enddate=field.getText()};
}
policy = root.getChild('policy_published');
if (policy != null) {
field = policy.getChild('domain');
if (field != null) {pubdomain = field.getText()};
field = policy.getChild('adkim');
if (field != null) {adkim = field.getText()};
if (adkim == 'r') {adkim = 'relaxed'};
if (adkim == 's') {adkim = 'strict'};
field = policy.getChild('aspf');
if (field != null) {aspf = field.getText()};
if (aspf == 'r') {aspf = 'relaxed'};
if (aspf == 's') {aspf = 'strict'};
field = policy.getChild('p');
if (field != null) {p = field.getText()};
field = policy.getChild('sp');
if (field != null) {sp = field.getText()};
field = policy.getChild('pct');
if (field != null) {pct = field.getText()};
}
record = root.getChildren('record');
if (record != null) {
for (var i = 0; i < record.length; i++) {
row = record[i].getChild('row');
if (row != null) {
field = row.getChild('source_ip');
if (field != null) {sourceip = field.getText()};
field = row.getChild('count');
if (field != null) {count = field.getText()};
policy = row.getChild('policy_evaluated');
}
if (policy != null) {
field = policy.getChild('disposition');
if (field != null) {disposition = field.getText()};
field = policy.getChild('dkim');
if (field != null) {dkim = field.getText()};
field = policy.getChild('spf');
if (field != null) {spf = field.getText()};
reason = policy.getChild('reason');
if (reason != null) {
field = reason.getChild('type');
if (field != null) {type = field.getText()};
field = reason.getChild('comment');
if (field != null) {comment = field.getText()};
}
}
identifiers = record[i].getChild('identifiers');
if (identifiers != null) {
field = identifiers.getChild('header_from');
if (field != null) {header = field.getText()};
field = identifiers.getChild('envelope_to');
if (field != null) {envelopeto = field.getText()};
field = identifiers.getChild('envelope_from');
if (field != null) {envelopefrom = field.getText()};
}
results = record[i].getChild('auth_results');
if (results != null) {
sender = results.getChild('spf');
if (sender != null) {
field = sender.getChild('domain');
if (field != null) {spfdomain = field.getText()};
field = sender.getChild('result');
if (field != null) {spfresult = field.getText()};
}
domain = results.getChild('dkim');
if (domain != null) {
field = domain.getChild('domain');
if (field != null) {dkimdomain = field.getText()};
field = domain.getChild('result');
if (field != null) {dkimresult = field.getText()};
field = domain.getChild('selector');
if (field != null) {dkimselector = field.getText()};
}
spfalign = "spf-unaligned";
if (spfdomain == pubdomain) {
spfalign = "spf-aligned";
}
dkimalign = "dkim-unaligned";
if (dkimdomain == pubdomain) {
dkimalign = "dkim-aligned";
}
var date_begin = new Date(begindate*1000);
var date_end = new Date(enddate*1000);
var fmtBeginDate = Utilities.formatDate(date_begin, "GMT", "yyyy-MM-dd");
var fmtEndDate = Utilities.formatDate(date_end, "GMT", "yyyy-MM-dd");
var fmtBeginTime = Utilities.formatDate(date_begin, "GMT", "HH:mm:ss");
var fmtEndTime = Utilities.formatDate(date_end, "GMT", "HH:mm:ss");

part1 = '"' + orgname + '","' + fmtBeginDate + '","' + fmtBeginTime + '","' + fmtEndDate + '","' + fmtEndTime;
part2 = '","' + spfdomain + '","' + spfresult + '","' + spfalign + '","' + dkimdomain + '","' + dkimresult + '","' + dkimalign + '","' + dkimselector;
part3 = '","' + sourceip + '","' + count + '","' + envelopeto + '","' + envelopefrom + '","' + disposition + '","' + dkim + '","' + spf;
part4 = '","' + header + '","' + email + '","' + contact + '","' + reportid + '","' + pubdomain + '","' + adkim + '","' + aspf + '","' + p + '","' + type;
part5 = '","' + comment + '","' + sp + '","' + pct + '","' + inputfile + '","' + outputfile + '"' + "\n";

csvrecord = csvrecord + part1 + part2 + part3 + part4 + part5;
records++;
}
}
}
xml.moveTo(parsedfolder);
}
file = savefolder.createFile(outputfile, csvrecord);
Logger.log('Number of files processed: ' + filecount);
Logger.log('Number of CSV records created: ' + records);
}

Process Attachments from DMARC Reporting Emails

Below is the Google App Script code I wrote to process emails and save attachments.  The purpose is stated in the comments at the top of the function below.  Using a unique Gmail inbox to gather data for a specific purpose and programmatically convert that data for my needs is very valuable.  Specifically, this program is the first of three that will (1) get XML files from emails, (2) convert those XML files into CSV files, and (3) upload those CSV files to Google's BigQuery service.  From there I can query the data with BigQuery and develop a dashboard using Google Looker.

//
// This function processes emails in the Inbox and saves ZIP, GZ, and XML files
// to Google Drive. This was developed to process DMARC files sent by receiving
// email servers that have implemented DMARC. DMARC is Domain-based Message
// Authentication Reporting & Conformance and it leverages the SPF and DKIM
// email authentication protocols.
//
function AttachmentsToDrive() {
savefolder = DriveApp.getFoldersByName("ToBeParsed").next();
var start = 0;
emails = 0;
attachs = 0;
// Get up to 50 emails from the inbox
var threads = GmailApp.getInboxThreads(start, 50);
// Process each email
for (var i = 0; i < threads.length; i++) {
emails++;
var message = threads[i].getMessages();
for (var m = 0; m < message.length; m++) {
message[m].star();
threads[i].markRead();
var subject = message[m].getSubject();
var att = message[m].getAttachments();
// Process each attachment int he email
for (var j=0; j<att.length; j++) {
attachs++;
var type = att[j].getContentType();
// If the attachment is a ZIP file, unzip it and add each attachment to Google Drive
if (type.indexOf("application/zip")>-1) {
att[j].setContentType('application/zip');
var files = Utilities.unzip(att[j]);
for (var k=0; k<files.length; k++) {
var attachname = files[k].getName();
Logger.log(attachname);
try {file = savefolder.createFile(files[k]);}
catch (e) {Logger.log(e.toString());}
}
}
// If the attachment is a GZ file, uncompress it and add it to Google Drive
if (type.indexOf("application/gzip")>-1) {
att[j].setContentType('application/x-gzip');
var blob = Utilities.ungzip(att[j]);
Logger.log(blob.getName());
blob.setContentType('application/xml');
try {file = savefolder.createFile(blob);}
catch (e) {Logger.log(e.toString());}
}
// If the attachment is an XML file, add it to Google Drive
if (type.indexOf("application/xml")>-1) {
try {file = savefolder.createFile(att[j]);}
catch (e) {Logger.log(e.toString());}
}
}
// Move the email to the Archive
threads[i].moveToArchive();
}
}
Logger.log('Number of emails processed: ' + emails);
Logger.log('Number of attachments processed: ' + attachs);
}

Upload CSV Files to Google BigQuery

This Google App Script gets each file in the "ToBeUploaded" folder in Google Drive and creates a Google BigQuery load job with tha...