library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rio)
#devtools::install_github("smach/rmiscutils")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(scales)
## Warning: package 'scales' was built under R version 3.6.2
library(DT)

Getting the Data

This is downloaded from the County Auditor and Recorder's Data. This can be any spreadsheet of county data as long as it has the parcel number, mortgage amount, tax district, effective rate, owner name, date of mortgage, and (Auditor's) market value.

This is just for commerical properties with land use codes >199 and <500.Exempt properties are not included.

#importing data
Commercial_Properties <- rio::import("Franklin County Combined Data.csv") #OR each County

Calculating Baseline

What is the amount owed to each county?

note: in theory we would just add all the taxes_paid and there's always going to be a certain percentage of people behind on their taxes, and we're not interested in that. So here we are calculating the total amount to taxes collected in theory

Commercial_Properties <- mutate(Commercial_Properties, Amount_Owed_to_County=market_value*`Effective Rate`*.35*.001)

How much does each district raise?

## `summarise()` ungrouping output (override with `.groups` argument)
tax_district Total_Per_District
10 4.678788e+08
20 3.816094e+06
25 2.383993e+07
26 7.288317e+05
27 1.936113e+05
30 1.485952e+07
35 7.533877e+04
40 3.909998e+07
50 3.355547e+07
52 1.433581e+04
53 3.224211e+05
60 1.021794e+07
67 4.224967e+06
70 1.557877e+07
72 2.563967e+05
75 1.260238e+06
76 9.569694e+05
80 1.753620e+07
90 1.142380e+07
100 1.456180e+07
101 1.010282e+05
110 3.260511e+06
112 1.142223e+04
113 3.916823e+05
115 1.400092e+05
120 3.435345e+04
130 5.333846e+06
140 7.402933e+06
141 6.177436e+04
143 1.184292e+06
144 3.578033e+04
145 4.482054e+03
146 3.040743e+04
147 4.306046e+06
150 5.656615e+05
151 1.991723e+03
152 9.575325e+06
153 6.092809e+05
154 1.962117e+03
160 5.942533e+05
161 3.144031e+06
170 1.406509e+06
171 9.915634e+05
180 1.310555e+06
181 8.724973e+04
184 6.111243e+06
185 2.729423e+07
186 7.604809e+06
188 6.724510e+03
190 8.336594e+05
191 2.666551e+05
195 3.035784e+04
200 1.928881e+04
203 7.878329e+04
210 1.836187e+05
212 2.014288e+05
213 6.066596e+05
216 1.688912e+05
219 1.030502e+04
220 6.392833e+05
221 1.834264e+03
222 2.190994e+07
223 2.176442e+04
230 5.677002e+05
234 2.899422e+04
240 4.014537e+06
241 1.098844e+05
245 4.217797e+04
246 3.343981e+04
250 4.829477e+05
252 1.613625e+04
253 5.137028e+04
254 2.431737e+04
255 6.159112e+04
260 2.031459e+05
263 7.122019e+03
264 8.597881e+04
270 8.651862e+04
271 2.058622e+04
272 6.017215e+05
273 7.205606e+07
274 2.425657e+06
275 1.729893e+06
410 1.213146e+04
420 1.833540e+06
425 2.610045e+05
430 1.239658e+06
435 1.256162e+05
440 4.732887e+05
445 1.163825e+05
455 2.591292e+05
470 1.136766e+06
480 6.951828e+03
490 9.206254e+05
495 1.403526e+07
510 1.267988e+06
512 7.253279e+05
515 2.769358e+06
520 1.386468e+07
525 2.135945e+04
530 9.692373e+06
535 3.247751e+05
540 8.536614e+05
545 5.760513e+06
550 1.295398e+06
560 3.559169e+07
570 2.581267e+07
580 9.376575e+04
590 3.394341e+07
600 1.510285e+07
610 3.388596e+07

How much is that altogether?

  sum(na.omit(Amount_Due_Per_Tax_District$Total_Per_District))
## [1] 1051053753

That same number, but written as currency with commas so you can read:

scales::dollar(.Last.value)
## character(0)

Putting single purchases together

First, our data is in the wrong format. When the date, owner, and amount are the same, the sale amount is actually for ALL the properties together. But there are different parcel numbers because large properties are usually composed of several parcels next to each other.

So for example, parcels 123, 456, and 789 are are listed as sold to Company LLC on June 22, 2019 for 6 million dollars. But each is not worth 6 million. They are worth 1, million, 2 million, and 3 million dollars respectively. All the properties together are worth valued at 6 million dollars, BUT the mortgage amount field on each reads 6 million dollars. And we don't want to accidentally log it as 18 million dollars.

So here we will group by owner, date of mortgage, and the mortgage amount. But then we run into another problem: Some of these have multiple tax districts:

Looking_For_Sort_of_dupulicates <- Commercial_Properties %>% 
  group_by(owner,date_of_mortgage,mortgage_amount) %>% 
  count(tax_district)
datatable(Looking_For_Sort_of_dupulicates)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html