pygaR: Example for pygar_form() Function

Load Libraries

library(pygaR)
library(ggplot2)
library(stringr)
library(XML)

Setup Default ggplot2 Theme

d <- theme_bw()
d <- d + theme(
    axis.text.x = element_text(angle=90, size=15),
    axis.title = element_text(size=20),
    plot.title = element_text(size=30)
)
def_theme <- d

https://www.bloomberg.com/news/articles/2017-04-28/alphabet-loves-google-ceo-so-much-he-gets-hundreds-of-millions

https://www.wsj.com/articles/google-ceo-tops-other-alphabet-execs-with-200-million-pay-1493424255

Mentions the day “Friday”, translating to 2017-04-28

Data Analysis

Search for Filings That Day

filings <- pygar_master(date=20170428, company='/alphabet inc/i')
filings
##       CIK  Company.Name Form.Type Date.Filed
## 1 1652044 Alphabet Inc.   DEF 14A   20170428
## 2 1652044 Alphabet Inc.   DEFA14A   20170428
##                                     File.Name Quarter     Date
## 1 edgar/data/1652044/0001308179-17-000170.txt       2 20170428
## 2 edgar/data/1652044/0001308179-17-000171.txt       2 20170428

Grab Form Information

form <- pygar_form(filings$File.Name[1])

Show Some Basic Form Information

names(form)
## [1] "Headers" "Body"
names(form$Header)
##  [1] "Acceptance.Datetime"        "Date.As.Of.Change"         
##  [3] "Conformed.Submission.Type"  "Filer"                     
##  [5] "Filed.As.Of.Date"           "Sec.Document"              
##  [7] "Accession.Number"           "Public.Document.Count"     
##  [9] "Effectiveness.Date"         "Conformed.Period.Of.Report"
## [11] "Sec.Header"
names(form$Body[[1]])
## [1] "Filename"    "Text"        "Description" "Text.Type"   "Type"       
## [6] "Sequence"

Find a Relevant Document

grepl('pichai', form$Body, ignore.case=TRUE) &
    grepl('schmidt', form$Body, ignore.case=TRUE)
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE

Grab Relevant Document

doc <- form$Body[[1]]

Write To File

writeLines(doc$Text, file("alphabet.html"))

alphabet.html

Display Summary Compensation Table

sct_xmlparse <- function(html, xpath){
    html_obj <- htmlParse(html)
    xpathApply(html_obj, xpath, xmlValue)
}
sct_grab <- function(data){
    #data <- gsub("\n", "", data, fixed=TRUE)
    data <- str_trim(data)
    data <- strsplit(data, "<TABLE")[[1]]
    for(table in data){
        table <- paste0(
            "<TABLE", strsplit(table, "</TABLE>")[[1]][1], "</TABLE>",
            collapse=""
        )

        if(!is.na(
            grep("Salary", table) &&
            grep("Bonus", table) &&
            grep("Principal", table)
        )){
            data <- table
            break
        }
    }

    data <- gsub("(<[A-Z]+)[ \n][^>]+\">", "\\1>", data, ignore.case=TRUE)
    data <- gsub(">[\n ]+<", "><", data)
    data <- gsub(
        "<[\\/]?(?:(?!(?:TABLE|TR|TH|TD|P|BR))[^>])+>", "", data,
        perl=TRUE, ignore.case=TRUE
    )

    data
}
doc_table <- sct_grab(doc$Text)
## Warning: closing unused connection 6 (alphabet.html)
doc_table

[1] “
Name and    Salary  Bonus  Stock
Awards
  Option
Awards
  Non-Equity
Incentive Plan
Compensation
  Non-Qualified
Deferred
Compensation
Earnings
  All Other
Compensation
  Total 
Principal Position  Year   (\()(1)  (\))(2)  (\()(3)  (\))   (\()   (\))(4)  (\()(5)  (\)) 
Larry Page(6)  2016   1                     1 
CEO,  2015   1                     1 
Alphabet, and Co-Founder  2014   1                     1 
Sergey Brin(6)  2016   1                     1 
President, Alphabet,  2015   1                     1 
and Co-Founder  2014   1                     1 
Eric E. Schmidt  2016   1,250,000               2,430,685   629,106(7)  4,309,791 
Executive Chairman,  2015   1,254,808   6,000,000               783,370   8,038,178 
Alphabet  2014   1,250,000   6,000,000   100,443,838            996,934   108,690,772 
Sundar Pichai  2016   650,000      198,695,790            372,410(8)  199,718,200 
Chief Executive\n Officer, Google  2015   652,500      99,829,142            150,460   100,632,102 
Ruth M. Porat  2016   650,000      38,313,173            110,956(9)  39,074,129 
Senior Vice President\n and Chief Financial Officer, Alphabet and Google  2015   395,000   5,000,000   25,052,554            603,932   31,051,486 
David C. Drummond  2016   650,000                  14,387(10)  664,387 
Senior Vice President,  2015   652,500                  20,323   672,823 
Corporate Development,\n Chief Legal Officer, and Secretary, Alphabet  2014   650,000   3,500,000   40,092,200            16,688   44,258,888 

Single Parse Out

sct_table_df <- function(doc_table){

}
sct_parse <- function(doc_text){
    doc_table <- sct_grab(doc_text)
    doc_df <- sct_table_df(doc_table)
}
sct_parse(doc$Text)
alphabet_data <- pygar_master(
    startqtr=201001, endqtr=201604,
    cik=1652044, form="DEF 14A"
)
alphabet_data
##       CIK  Company.Name Form.Type Date.Filed
## 1 1652044 Alphabet Inc.   DEF 14A 2016-04-29
##                                      Filename Quarter
## 1 edgar/data/1652044/0001308179-16-000384.txt  201602

Get Google CIK

google_one <- pygar_master(qtr=201001, company='/google inc/i', form="DEF 14A")
google_cik <- google_one$CIK[1]
google_cik
## [1] 1288776

Grab More Google Data

google_data <- pygar_master(
    startqtr=200201, endqtr=201504,
    cik=google_cik, form="DEF 14A"
)
google_data
##        CIK Company.Name Form.Type Date.Filed
## 1  1288776  Google Inc.   DEF 14A 2005-04-08
## 2  1288776  Google Inc.   DEF 14A 2006-03-31
## 3  1288776  Google Inc.   DEF 14A 2007-04-04
## 4  1288776  Google Inc.   DEF 14A 2008-03-25
## 5  1288776  Google Inc.   DEF 14A 2009-03-24
## 6  1288776  Google Inc.   DEF 14A 2010-03-29
## 7  1288776  Google Inc.   DEF 14A 2011-04-20
## 8  1288776  Google Inc.   DEF 14A 2012-05-09
## 9  1288776  Google Inc.   DEF 14A 2013-04-24
## 10 1288776  Google Inc.   DEF 14A 2014-03-28
## 11 1288776  Google Inc.   DEF 14A 2015-04-23
##                                       Filename Quarter
## 1  edgar/data/1288776/0001193125-05-072803.txt  200502
## 2  edgar/data/1288776/0001193125-06-070406.txt  200601
## 3  edgar/data/1288776/0001193125-07-073756.txt  200702
## 4  edgar/data/1288776/0001193125-08-064574.txt  200801
## 5  edgar/data/1288776/0001193125-09-061999.txt  200901
## 6  edgar/data/1288776/0001193125-10-070028.txt  201001
## 7  edgar/data/1288776/0001193125-11-103802.txt  201102
## 8  edgar/data/1288776/0001193125-12-222158.txt  201202
## 9  edgar/data/1288776/0001308179-13-000248.txt  201302
## 10 edgar/data/1288776/0001308179-14-000114.txt  201401
## 11 edgar/data/1288776/0001308179-15-000157.txt  201502

Grab Google Forms

google_docs <- NULL
for(file in google_data$Filename){
    google_docs <- c(google_docs,
        pygar_form(file)$Body[[1]]$Text
    )
}
sct_grab(google_docs[10])

[1] “
Name\n and
Principal
Position
  Year Salary(1)
(\()
  Bonus(2)
(\))
  Stock\n
Awards(3)
(\()
  Option\n
Awards(4)
(\))
 Non-Equity
Incentive Plan
Compensation
(\()
 Non-Qualified\n
Deferred Compensation
Earnings(5)
(\))
 All\n Other
Compensation(6)
(\()
 Total\n
(\))
Larry\n Page(7) 2013 1       1
Chief\n Executive 2012 1       1
Officer\n and 2011 1       1
Co-Founder                  
Sergey\n Brin(7) 2013 1       1
Co-Founder 2012 1       1
  2011 1       1
Eric\n E. Schmidt(8) 2013 1,250,000 6,000,000 11,365,184(9)    708,196(10) 19,323,380
Executive 2012 1,250,000 6,000,000    35,320 343,304 7,628,624
Chairman\n of the Board of Directors 2011 937,500  55,643,040 38,136,040 6,000,000  263,682 100,980,262
Patrick\n Pichette 2013 650,000 3,000,000 1,489,917(11)    13,159 5,153,076
Senior\n Vice 2012 650,000 2,800,000 21,964,757 13,314,569   11,780 38,741,106
President\n and Chief Financial Officer 2011 650,000  8,408,292 6,238,440 3,000,000  10,238 18,306,970
Nikesh\n Arora 2013 650,000 3,500,000 1,548,117(11)    11,486 5,709,603
Senior\n Vice 2012 650,000 10,800,000 24,709,875(12)14,978,818(13)   7,175 51,145,868
President\n and Chief Business Officer 2011 650,000  11,210,865 8,317,778 3,000,000  8,910 23,187,553
David\n C. 2013 650,000 3,000,000 1,134,369(11)    13,289 4,797,658
Drummond 2012 650,000 3,300,000 17,022,655 10,318,728   10,475 31,301,858
Senior\n Vice President, Corporate Development, Chief Legal Officer, and Secretary 2011 650,000  8,408,292 6,238,440 3,000,000  9,240 18,305,972

Still figuring out a good way to gather this info for a graph…