Upload CSV Files to Google BigQuery

This Google App Script gets each file in the "ToBeUploaded" folder in Google Drive and creates a Google BigQuery load job with that CSV data.  The routine then moves the CSV file to the "Uploaded" folder.  To use this code, replace 'YOURPROJECTID', 'YOURDATASET', and 'YOURTABLE' in the code below with your values.  Also, you must add Google BigQuery to the Services your script can use or the insert job will fail.

function UploadCSV() {
tobeuploadedfolder = DriveApp.getFoldersByName("ToBeUploaded").next();
uploadedfolder = DriveApp.getFoldersByName("Uploaded").next();
files = tobeuploadedfolder.getFiles();
projectId = 'YOURPROJECTID';
datasetId = 'YOURDATASET';
tableId = 'YOURTABLE';
while (files.hasNext()){
csvfile = files.next();
data = csvfile.getBlob().setContentType('application/octet-stream');
job = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
skipLeadingRows: 0,
}
}
}
try {
jobResult = BigQuery.Jobs.insert(job, projectId, data);
console.log(`Load job started. Status: ${jobResult.status.state}`);
} catch (err) {
console.log('unable to insert job');
}
csvfile.moveTo(uploadedfolder);
}
}

Parse XML to CSV for DMARC Reporting

The following Google App Script takes XML files created for DMARC reporting and converts them to CSV files.  CSV is one of the file formats that Google BigQuery can upload.  After the data is uploaded, it can be queried using SQL from within BigQuery or Google Looker Studio to create dashboards.

function parseXml() {
csvrecord = "";
records = 0;
outputfile = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd-HH:mm") + ".csv"
savefolder = DriveApp.getFoldersByName("ToBeUploaded").next();
parsedfolder = DriveApp.getFoldersByName("Parsed").next();
folder = DriveApp.getFoldersByName("ToBeParsed").next();
files = folder.getFiles();
filecount = 0;
while (files.hasNext()){
xml = files.next();
filecount++;
orgname = "";
email = "";
contact = "";
reportid= "";
begindate = 0;
enddate = 0;
pubdomain = "";
adkim = "";
aspf = "";
p = "";
sp = "";
pct = "";
sourceip = "";
count= "";
disposition = "";
dkim = "";
spf = "";
type = "";
comment = "";
header = "";
envelopeto = "";
envelopefrom = "";
spfdomain = "";
spfresult = "";
spfalign = "";
dkimdomain = "";
dkimresult = "";
dkimalign = "";
dkimselector = "";
inputfile = xml.getName();
data = xml.getBlob().getDataAsString();
document = XmlService.parse(data);
root = document.getRootElement();
metadata = root.getChild('report_metadata');
if (metadata != null) {
field = metadata.getChild('org_name');
if (field != null) {orgname = field.getText()};
field = metadata.getChild('email');
if (field != null) {email = field.getText()};
field=metadata.getChild('extra_contact_info');
if (field != null) {contact=field.getText()};
field=metadata.getChild('report_id');
if (field != null) {reportid=field.getText()};
}
date_range=metadata.getChild('date_range');
if (date_range != null) {
field=date_range.getChild('begin');
if (field != null) {begindate=field.getText()};
field=date_range.getChild('end');
if (field != null) {enddate=field.getText()};
}
policy = root.getChild('policy_published');
if (policy != null) {
field = policy.getChild('domain');
if (field != null) {pubdomain = field.getText()};
field = policy.getChild('adkim');
if (field != null) {adkim = field.getText()};
if (adkim == 'r') {adkim = 'relaxed'};
if (adkim == 's') {adkim = 'strict'};
field = policy.getChild('aspf');
if (field != null) {aspf = field.getText()};
if (aspf == 'r') {aspf = 'relaxed'};
if (aspf == 's') {aspf = 'strict'};
field = policy.getChild('p');
if (field != null) {p = field.getText()};
field = policy.getChild('sp');
if (field != null) {sp = field.getText()};
field = policy.getChild('pct');
if (field != null) {pct = field.getText()};
}
record = root.getChildren('record');
if (record != null) {
for (var i = 0; i < record.length; i++) {
row = record[i].getChild('row');
if (row != null) {
field = row.getChild('source_ip');
if (field != null) {sourceip = field.getText()};
field = row.getChild('count');
if (field != null) {count = field.getText()};
policy = row.getChild('policy_evaluated');
}
if (policy != null) {
field = policy.getChild('disposition');
if (field != null) {disposition = field.getText()};
field = policy.getChild('dkim');
if (field != null) {dkim = field.getText()};
field = policy.getChild('spf');
if (field != null) {spf = field.getText()};
reason = policy.getChild('reason');
if (reason != null) {
field = reason.getChild('type');
if (field != null) {type = field.getText()};
field = reason.getChild('comment');
if (field != null) {comment = field.getText()};
}
}
identifiers = record[i].getChild('identifiers');
if (identifiers != null) {
field = identifiers.getChild('header_from');
if (field != null) {header = field.getText()};
field = identifiers.getChild('envelope_to');
if (field != null) {envelopeto = field.getText()};
field = identifiers.getChild('envelope_from');
if (field != null) {envelopefrom = field.getText()};
}
results = record[i].getChild('auth_results');
if (results != null) {
sender = results.getChild('spf');
if (sender != null) {
field = sender.getChild('domain');
if (field != null) {spfdomain = field.getText()};
field = sender.getChild('result');
if (field != null) {spfresult = field.getText()};
}
domain = results.getChild('dkim');
if (domain != null) {
field = domain.getChild('domain');
if (field != null) {dkimdomain = field.getText()};
field = domain.getChild('result');
if (field != null) {dkimresult = field.getText()};
field = domain.getChild('selector');
if (field != null) {dkimselector = field.getText()};
}
spfalign = "spf-unaligned";
if (spfdomain == pubdomain) {
spfalign = "spf-aligned";
}
dkimalign = "dkim-unaligned";
if (dkimdomain == pubdomain) {
dkimalign = "dkim-aligned";
}
var date_begin = new Date(begindate*1000);
var date_end = new Date(enddate*1000);
var fmtBeginDate = Utilities.formatDate(date_begin, "GMT", "yyyy-MM-dd");
var fmtEndDate = Utilities.formatDate(date_end, "GMT", "yyyy-MM-dd");
var fmtBeginTime = Utilities.formatDate(date_begin, "GMT", "HH:mm:ss");
var fmtEndTime = Utilities.formatDate(date_end, "GMT", "HH:mm:ss");

part1 = '"' + orgname + '","' + fmtBeginDate + '","' + fmtBeginTime + '","' + fmtEndDate + '","' + fmtEndTime;
part2 = '","' + spfdomain + '","' + spfresult + '","' + spfalign + '","' + dkimdomain + '","' + dkimresult + '","' + dkimalign + '","' + dkimselector;
part3 = '","' + sourceip + '","' + count + '","' + envelopeto + '","' + envelopefrom + '","' + disposition + '","' + dkim + '","' + spf;
part4 = '","' + header + '","' + email + '","' + contact + '","' + reportid + '","' + pubdomain + '","' + adkim + '","' + aspf + '","' + p + '","' + type;
part5 = '","' + comment + '","' + sp + '","' + pct + '","' + inputfile + '","' + outputfile + '"' + "\n";

csvrecord = csvrecord + part1 + part2 + part3 + part4 + part5;
records++;
}
}
}
xml.moveTo(parsedfolder);
}
file = savefolder.createFile(outputfile, csvrecord);
Logger.log('Number of files processed: ' + filecount);
Logger.log('Number of CSV records created: ' + records);
}

Process Attachments from DMARC Reporting Emails

Below is the Google App Script code I wrote to process emails and save attachments.  The purpose is stated in the comments at the top of the function below.  Using a unique Gmail inbox to gather data for a specific purpose and programmatically convert that data for my needs is very valuable.  Specifically, this program is the first of three that will (1) get XML files from emails, (2) convert those XML files into CSV files, and (3) upload those CSV files to Google's BigQuery service.  From there I can query the data with BigQuery and develop a dashboard using Google Looker.

//
// This function processes emails in the Inbox and saves ZIP, GZ, and XML files
// to Google Drive. This was developed to process DMARC files sent by receiving
// email servers that have implemented DMARC. DMARC is Domain-based Message
// Authentication Reporting & Conformance and it leverages the SPF and DKIM
// email authentication protocols.
//
function AttachmentsToDrive() {
savefolder = DriveApp.getFoldersByName("ToBeParsed").next();
var start = 0;
emails = 0;
attachs = 0;
// Get up to 50 emails from the inbox
var threads = GmailApp.getInboxThreads(start, 50);
// Process each email
for (var i = 0; i < threads.length; i++) {
emails++;
var message = threads[i].getMessages();
for (var m = 0; m < message.length; m++) {
message[m].star();
threads[i].markRead();
var subject = message[m].getSubject();
var att = message[m].getAttachments();
// Process each attachment int he email
for (var j=0; j<att.length; j++) {
attachs++;
var type = att[j].getContentType();
// If the attachment is a ZIP file, unzip it and add each attachment to Google Drive
if (type.indexOf("application/zip")>-1) {
att[j].setContentType('application/zip');
var files = Utilities.unzip(att[j]);
for (var k=0; k<files.length; k++) {
var attachname = files[k].getName();
Logger.log(attachname);
try {file = savefolder.createFile(files[k]);}
catch (e) {Logger.log(e.toString());}
}
}
// If the attachment is a GZ file, uncompress it and add it to Google Drive
if (type.indexOf("application/gzip")>-1) {
att[j].setContentType('application/x-gzip');
var blob = Utilities.ungzip(att[j]);
Logger.log(blob.getName());
blob.setContentType('application/xml');
try {file = savefolder.createFile(blob);}
catch (e) {Logger.log(e.toString());}
}
// If the attachment is an XML file, add it to Google Drive
if (type.indexOf("application/xml")>-1) {
try {file = savefolder.createFile(att[j]);}
catch (e) {Logger.log(e.toString());}
}
}
// Move the email to the Archive
threads[i].moveToArchive();
}
}
Logger.log('Number of emails processed: ' + emails);
Logger.log('Number of attachments processed: ' + attachs);
}

Spoofing MAC Addresses

I developed this bash script for my MacBook Air to simply the process of getting devices without a keyboard and mouse authenticated to a wireless network that requires a userid and password, which in my case were an Amazon Echo and Amazon Fire TV Cube that we wanted connected at our rental condo.  

This was accomplished by setting the MacBook Air’s MAC address to the MAC address of each of the Amazon devices, which allowed the MacBook Air to complete the authentication process.

-------------
#!/usr/bin/env bash
#
# This script is used to temporarily change the Macbook Air's MAC address
# to the Amazon Echo 3 and the Amazon Fire TV Cube.  This will allow the
# Macbook Air to authenticate to networks that require a userid and password
# by spoofing their MAC addresses.
#

echo Do you want to configure the Amazon Echo 3? [y/n]
read answer
if [ $answer == 'y' ]
then
  echo Make sure the Amazon Echo 3 is powered off, then press Enter.
  read enter
  echo Disconnect from the current Wifi network by holding the Option Key, clicking the Wifi icon at the top and clicking Disconnect from ...
  echo Press enter when completed.
  read enter
  echo Changing the MAC address to the Amazon Echo 3.  Enter the sudo password if prompted.

  sudo ifconfig en0 ether 08:84:9D:17:F3:B7

  echo Now spoofing the Amazon Echo 3 MAC address.  Connect and authorize to the wireless network, then press Enter.
  read enter
fi

echo Do you want to configure the Amazon Fire TV Cube? [y/n]
read answer
if [ $answer == 'y' ]
then
  echo Make sure the Amazon Fire TV Cube is powered off, then press Enter.
  read enter
  echo Disconnect from the current Wifi network by holding the Option Key, clicking the Wifi icon at the top and clicking Disconnect from ...
  echo Press enter when completed.
  read enter
  echo Changing the MAC address to the Amazon Fire TV Cube.  Enter the sudo password if prompted.

  sudo ifconfig en0 ether 00:71:47:77:80:0F

  echo Now spoofing the Amazon Fire TV Cube MAC address.  Connect and authorize to the wireless network, then press Enter.
  read enter
fi

echo Resetting the Macbook Air to its regular MAC address.

echo Disconnect from the current Wifi network by holding the Option Key, clicking the Wifi icon at the top and clicking Disconnect from ...
echo Press enter when completed.
read enter

echo Changing the MAC address to the Macbook Air.  Enter the sudo password if prompted.

sudo ifconfig en0 ether 7C:D1:C3:DF:EC:1F

echo The Macbook Air MAC address has been reset.  Connect to the wireless network.

echo Press enter when completed.
read enter

echo An ifconfig will be performed.  Verify the MAC address is 7C:D1:C3:DF:EC:1F
ifconfig

Remove PDF Restrictions

The Java code below removes any PDF restrictions if they exist on the document .  The program accepts two arguments, the input and output file names, which can be the same if desired.  The program requires a couple pieces from the PDFBox subproject library, available at https://pdfbox.apache.org/download.cgi

This code will not open a PDF that requires a master password, but it's a simple addition to add that to the PDDocument.load().


import java.io.File;
import java.io.IOException;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;

public class pdfRemove {

  public static void main(String[] args) throws InvalidPasswordException, IOException {
    File file = new File(args[0]); 
PDDocument document = PDDocument.load(file);

    if (document.isEncrypted()) {
      document.setAllSecurityToBeRemoved(true);
}

    document.save(new File(args[1]));
document.close(); 
    }
  
}

Adding Text To PDF Files

The Java code below uses a number of classes from PDFBox, an Apache open source project that allows for working with PDF documents.  In the code below, an existing PDF ,"C:\Template.pdf", is loaded, the first page (i.e. 0) is accessed and a ContentStream is built in APPEND mode.  The font is set to 20 point Times Roman and three lines are added to the PDF using the text in the first three passed arguments.  The fourth argument is used to make the output filename unique by appending it, and ".pdf", to "C:\Page-".  The file is saved with its new name and the document closed.

The purpose of this code is to add mailing addresses to a newsletter, but could to modified to customize any type of document, making it both personal and professional.


import java.io.File;
import java.io.IOException;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.font.PDType1Font;

public class labels {

public static void main(String[] args) throws InvalidPasswordException, IOException {
  File file = new File("C:\Template.pdf"); 
  PDDocument document = PDDocument.load(file);
  PDPage page = document.getPage(0);
  PDPageContentStream contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true);
  contentStream.setFont(PDType1Font.TIMES_ROMAN, 20);

  contentStream.beginText();
  contentStream.newLineAtOffset(280, 630);
  String text1 = args[0];
  contentStream.showText(text1);
  contentStream.endText();

  contentStream.beginText();
  contentStream.newLineAtOffset(280, 600);
  String text2 = args[1];
  contentStream.showText(text2);
  contentStream.endText();

  contentStream.beginText();
  contentStream.newLineAtOffset(280, 570);
  String text3 = args[2];
  contentStream.showText(text3);
  contentStream.endText();

  contentStream.close();
  String outputfile = "C:\Page-" + args[3] + ".pdf";
  document.save(new File(outputfile));
  document.close(); 
  }
}

R - Prime Numbers

This R code will generate tables containing the first 2 million numbers, whether that number is prime (divisible only itself and 1) or not, its lowest divisor (or 0 for primes).  The final two statements result in displaying the percent of non-prime numbers (92.55%) and prime numbers (7.45%).


num <- 1:2000000
prime <- rep("Prime", 2000000)
divisor <- rep(0, 2000000)
for (x in seq(from=2, to=2000000, by=1)) {
  z <- sqrt(x);
  for (y in 2:z) {
     if (x%%y == 0) {
       prime[x] <- "Non-Prime";
       divisor[x] <- y;
       break;
       }    
    }
}
ptable <- table(prime)
prop.table(ptable)


Upload CSV Files to Google BigQuery

This Google App Script gets each file in the "ToBeUploaded" folder in Google Drive and creates a Google BigQuery load job with tha...