knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rio)
library(rmiscutils)
## 
## Attaching package: 'rmiscutils'
## The following object is masked from 'package:dplyr':
## 
##     add_row
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(scales)
library(DT)

Getting the Data

This is downloaded from the County Auditor and Recorder’s Data. This can be any spreadsheet of county data as long as it has the parcel number, mortgage amount, tax district, effective rate, owner name, sale date, and (Auditor’s) market value.

Note: the Warren County Auditor’s website lists 4,606 properties with a land use code > 199 and < 500 whereas our data pulled up 4860.

This is just for commerical properties with land use codes >199 and <500.

#importing data
Commercial_Properties <- rio::import("Warren County Properties Last Verified 7-21-19.csv") #OR each County
#R doesn't like commas in the data, and won't read that as a number so this changes it. 
Commercial_Properties$Amount_Owed_to_County <- rmiscutils::number_with_commas(Commercial_Properties$Amount_Owed_to_County)
Commercial_Properties$mortgage_amount <- rmiscutils::number_with_commas(Commercial_Properties$mortgage_amount)
Commercial_Properties$True_Value <- rmiscutils::number_with_commas(Commercial_Properties$True_Value)
Commercial_Properties$Mortgage_Share <- rmiscutils::number_with_commas(Commercial_Properties$Mortgage_Share)
Commercial_Properties$New_Taxable_Value <- rmiscutils::number_with_commas(Commercial_Properties$New_Taxable_Value)
Commercial_Properties$Auditors_Total_Value_of_Parcels <- rmiscutils::number_with_commas(Commercial_Properties$Auditors_Total_Value_of_Parcels)
#As per the auditor checkint our math on this, we have to take out the exempt properties
Commercial_Properties <- filter(Commercial_Properties, taxable_value>0)

Calculating Baseline

What is the amount owed to each county?

note: in theory we would just add all the taxes_paid and there’s always going to be a certain percentage of people behind on their taxes, and we’re not interested in that. So here we are calculating the total amount to taxes collected in theory

Commercial_Properties <- mutate(Commercial_Properties, Amount_Owed_to_County=market_value*`Effective Rate`*.35*.001)

How much does each district raise?

tax_district Total_Per_District
01 - CLEARCREEK TWP-SPRINGBORO CSD 276407.0498
02 - CLEARCREEK TWP-LEBANON CSD 44785.4039
03 - CLEARCREEK TWP-WAYNE LSD 18493.4568
04 - CLEARCREEK TWP LCSD - FLD 33938.7748
05 - CLEARCREEK TWP-SPRINGBORO CORP 3959707.3200
06 - CLEARCREEK TWP-SPRBORO CSD FLD 159238.0339
07 - CLEARCREEK TWP-SPRINGBORO CORP 2139102.9752
08 - DEERFIELD TWP-MASON CSD 9045914.4227
09 - DEERFIELD TWP-KINGS LSD 7143797.0826
11 - MASON CORP - MASON CSD 9866944.1058
12 - MASON CORP - KINGS LSD 3959231.7295
14 - FRANKLIN TWP-FRANKLIN CSD 139402.9400
15 - FRANKLIN TWP-MIDDLETOWN CSD 14397.5473
16 - FRANKLIN TWP-CARLISLE LSD 89167.2141
18 - FRANKLIN TWP-CARLISLE CORP 650142.8949
19 - FRANKLIN TWP-CARLISLE CORP-FSD 51110.6924
21 - FRANKLIN CITY FCSD 4736595.7417
23 - MIDDLETOWN CORP-MIDDLETOWN CSD 2901139.5129
24 - FRANKLIN TWP-MIDDLETOWN CO-FSD 667895.7600
26 - HAMILTON TWP-LITTLE MIAMI LSD 1898174.8362
27 - HAMILTON TWP-LOVELAND CSD 2265.1534
28 - HAMILTON TWP-KINGS LSD 53151.8932
30 - LOVELAND CORP-LOVELAND CSD 34574.4000
31 - HAMILTON TWP-SO. LEBANON CORP 173150.7301
32 - HAMILTON TWP-MAINEVILLE CORP 167734.4306
33 - LOVELAND CORP-LITTLE MIAMI LSD 33791.1165
35 - HARLAN TWP-LITTLE MIAMI LSD 19712.9285
36 - HARLAN TWP-BLANCHESTER LSD 5014.6633
37 - HARLAN TWP-GOSHEN LSD 107115.2722
39 - HARLAN TWP-BUTLERVILLE CORP 2319.9960
40 - HARLAN TWP-PLEASANT PLAIN CORP 9614.4231
41 - HARLAN TWP-BLANCHESTER CORP. 3373.3819
43 - MASSIE TWP-CLINTON MASSIE LSD 1472.3685
45 - MASSIE TWP-HARVEYSBURG CORP 59377.6666
48 - SALEM TWP-LITTLE MIAMI LSD 110467.8748
50 - SALEM TWP-MORROW CORP 337565.7531
51 - TURTLECRK-MIDDLETOWN CORP-MCSD 173880.3189
52 - TURTLECRK-MIDDLETOWN CORP-LCSD 80608.0845
53 - TURTLECREEK TWP-LEBANON CSD 1029819.3867
55 - TURTLECREEK TWP-MIDDLETOWN CSD 85484.4984
56 - TURTLECREEK TWP-LITTLE MIAMI 657.2912
58 - TRTLCRK TP-MNROE CP-MONROE LSD 1283883.5724
59 - TRTLCRK TP-MNROE CP-MONROE LSD 253106.4641
61 - LEBANON CORP-LEBANON C.S.D. 5454171.4840
62 - LEBANON CORP.-KINGS LSD 626218.0218
63 - UNION TWP-LEBANON CSD 6016.0728
64 - UNION TWP-LITTLE MIAMI LSD 34580.5948
65 - UNION TWP-MASON CSD 297.3953
66 - UNION TWP-KINGS LSD 130340.3836
67 - UNION TWP-LEB CORP-KINGS LSD 1899.0048
68 - UNION TWP-SOUTH LEBANON CORP 313147.2500
70 - WASHINGTON TWP-LMLSD- CWJ FIRE 10677.9250
71 - WASHINGTON TWP-LITTLE MIAMI LS 7545.5789
72 - WASHINGTON TWP-CLINTON MASSIE 8490.9766
73 - WASHINGTON TWP-LEBANON CSD 11952.9158
74 - WASHINGTON TWP-CMLSD- CWJ FIRE 830.8550
76 - WAYNE TWP-WAYNE LSD 198740.2914
80 - WAYNE TWP-CORWIN CORP-WAYNE LS 77237.1464
81 - WAYNE TWP-WAYNESVILLE CORP 551075.1352
85 - WASH TWP CMLSD CWJFD MLCLD 18240.8543
92 - WASH TWP LMLSD MLCLD 35854.4113
94 - DEERFIELD TWP- MASON CORP-KLSD 1494596.3995
95 - TURTLCRK TWP LEB CORP LEB CSD 23477.0782

How much is that altogether?

  sum(na.omit(Amount_Due_Per_Tax_District$Total_Per_District))
## [1] 60829119

That same number, but written as currency with commas so you can read:

scales::dollar(.Last.value)
## character(0)

Putting single purchases together

First, our data is in the wrong format. When the date, owner, and amount are the same, the sale amount is actually for ALL the properties together. But there are different parcel numbers because large properties are usually composed of several parcels next to each other.

So for example, parcels 123, 456, and 789 are are listed as sold to Company LLC on June 22, 2019 for 6 million dollars. But each is not worth 6 million. They are worth 1, million, 2 million, and 3 million dollars respectively. All the properties together are worth valued at 6 million dollars, BUT the mortgage amount field on each reads 6 million dollars. And we don’t want to accidentally log it as 18 million dollars.

So here we will group by owner, date of mortgage, and the mortgage amount. But then we run into another problem: Some of these have multiple tax districts:

Looking_For_Sort_of_dupulicates <- Commercial_Properties %>% 
  group_by(owner,date_of_mortgage,mortgage_amount) %>% 
  count(tax_district)
datatable(Looking_For_Sort_of_dupulicates)

If you sort by n (which stands for the number of tax districts,) you can see that each purchase goes from 1 to 106 properties, though thankfully most are just 1. This really complicates our analysis, though, because we need the tax district to get the effective rate which is what we will multiply our new value by.

So first, we need to create a proportion for our tax district. A purchase that is in just one tax district will have a multiplier of 1. The rest will be a proportion of the current value.

For example, let’s say we purchased an LLC business for one million dollars (the mortgage) on two parcels of land in two tax districts and it is on the books as being 500,000 dollars total, (the Auditor’s market value.) Parcel A is listed as being worth 100,000 dollars in tax district 1 and Parcel B is listed being worth 400,000 dollars in tax district 2. Then we would have a parcel A multiplier of .2 (or 20 percent of the value) and parcel B having a multiplier of .8 (eighty percent of the value.)

Commercial_Properties <- Commercial_Properties %>% 
  group_by(owner,date_of_mortgage,mortgage_amount) %>% 
  mutate(Auditors_Total_Value_of_Parcels =sum(market_value)) %>%  #This creates a total for each transaction
  ungroup() %>% 
  mutate(Share_of_Market_Value = market_value/Auditors_Total_Value_of_Parcels) #This creates the share of the value
#Some parcels are valued at Zero, and therefore, dividing by 0 gives you NA. Which screws up our calculations further on. So for the 173 NAs here we will just set them to zero
Commercial_Properties$Share_of_Market_Value[is.na(Commercial_Properties$Share_of_Market_Value)] <- 0

Okay now we are looking for outliers.

In the Athens apartment case that is our example, (https://realestate.cleveland.com/realestate-news/2018/08/big_big_money_legal_loopholes.html) the mortgage was for so much more than the assessed value because it hadn’t be sold in like ~20 years. Every year the state takes the property’s past value, and estimates what someone would want to buy it for, given how many bedrooms and square footage and the neighborhood and whatnot. But really, the value of something is how much someone else wants to pay for it. When the missing tax value is because it was an LLC transfer, that’s what we are looking for.

Assessors generally try to assess a property at about 80 to 90 percent of its “true value” to allow for property prices to ebb and flow a bit. So in our case, we are going to see if a mortgage is 20 percent more than its listed value.

Looking for Outliers

#First we need to set the NAs in mortgage amount to zero or we get an error further down when we are calulating things from it
Commercial_Properties$mortgage_amount[is.na(Commercial_Properties$mortgage_amount)] <- 0
Commercial_Properties$mortgage_amount <- replace(Commercial_Properties$mortgage_amount, Commercial_Properties$mortgage_amount == -999.99, 0)


#Creating a column that shows what twenty percent more is
Commercial_Properties <- Commercial_Properties %>% 
  group_by(owner,date_of_mortgage,mortgage_amount) %>%
  mutate(Twenty_Percent_More = (Auditors_Total_Value_of_Parcels+.2*Auditors_Total_Value_of_Parcels)) %>% 
#Creating a column that shows if the mortgage value is twenty percent more than the property value. If a bank is willing to bet a significant amount more thanthe property value, then that is tax dollars lost. 
  mutate(Is_This_A_Potential_Outlier = ifelse(mortgage_amount > Twenty_Percent_More, "TRUE", "FALSE" )) %>% 
  ungroup()

Verification

Now this is the part where we want to certify that all our numbers are correct.

For example, in Warren County parcel numbers 15045020040, 16365020010, 16365020011, 12205020010, 12355020010, 11065020010, and 12145020020 have a mortgage of 2.55 million but their auditor’s “market value” is only ~125K. Why would a bank give a railroad 2.55 million dollars for property only worth $125K? They wouldn’t.

If you look at the recorder’s info, this is a mortgage for not just Warren county but the whole railroad up from Cincy to Columbus And fixtures, too. Probably trains in this case.

So here we export the data, create a “verified” column, and re-import it.

#so Uncomment this when you want to export
#rio::export(Commercial_Properties, "Warren County Properties to Verify 7-3-19.csv")

Data Verification Part II

This gets exported, then a human looks at the biggest outliers to see if they are true, or if they are like the example above. Then the file is re-imported. You can skip the data verification part if this doesn’t apply.

#Uncomment this ro run it when you import
#Commercial_Properties <- rio::import("Warren County Properties Last Verified 7-21-19.csv")

Calculating the new values

Now we want to take the mortgage amount as the market amount, assuming we have verified it.

So this says, if this is a potential outlier, and we have verifed that the outlier is true, then take the mortgage amount as the value. If not take the market value.

Commercial_Properties <- Commercial_Properties %>% 
  mutate(Mortgage_Share=mortgage_amount*Share_of_Market_Value) %>% 
  mutate(True_Value=ifelse(Is_This_A_Potential_Outlier==TRUE & Verified!="RED", Mortgage_Share, market_value))

Recalculating potential taxes

How much COULD the county raise if we knew the value of LLC businesses?

What was the value of taxable assets before and after finding more value?

scales::dollar(sum(Commercial_Properties$market_value))
## [1] "$2,742,038,190"
Commercial_Properties$True_Value[is.na(Commercial_Properties$True_Value)] <- 0
scales::dollar(sum(Commercial_Properties$True_Value))
## [1] "$48,274,085,806"

Calculating each district

What would the new tax rate be compared to the old tax rate?

First, we can’t average the rates, as shown by this very high-tech post-it: Can’t average the rates Sorry this is badly labeled, which I realized later. But essentially, on the top is the original calculation. There are two parcels worth 100 and 200 dollars. Both are in a district where the tax rate is .5. If we later determine that those parcel are actually worth 200 and 300, then the district is still going to collect 50 and 100 respectively. But the tax rate for each goes down.

We calculated the proportion that each parcel of land is, because sometimes businesses bought parcels that were in multiple tax districts. But to figure out the new tax rate, we will have to determine the amount per district. And then determine the new tax rate per district.

What’s the median property value in each tax district? What do they pay now? how much would they pay if we knew the true value of LLC transfers?

Tax_Rate_Comparison <- Commercial_Properties %>% 
  group_by(tax_district) %>% 
  summarize(
    Total_Per_District =sum(Amount_Owed_to_County),
    New_Taxable_Value=sum(True_Value*.35),
    Old_Rate=mean(`Effective Rate`)*.001,
    New_Rate=(Total_Per_District/New_Taxable_Value),
    Median_Property_Value=median(market_value),
    Current_Median_Tax_Bill=(Median_Property_Value*Old_Rate),
    Possible_Median_Tax_Bill=Median_Property_Value*New_Rate,
    Amount_Overpaid_By_Median_Property=Current_Median_Tax_Bill-Possible_Median_Tax_Bill)
datatable(Tax_Rate_Comparison)

This looks at the summary of what the median overpaid. It ranges from nothing to several thousand dollars. And again that’s just the median. We could enter find any property and figure out what they would pay.

summary(Tax_Rate_Comparison$Amount_Overpaid_By_Median_Property)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0    483.3   3385.8   9490.1   7964.3 146732.6