geohash

Geometry Object

Always collects your location

Geo blacklist

Geohash explorer

https://geohash.softeng.co/

Index Precision Bounding Box
1 ≤ 5,000km X 5,000 Km
2 ≤ 1,250km X 625km
3 ≤ 156km X 156km
4 ≤ 39.1km X 19.5km
5 ≤ 4.89km X 4.89km
6 ≤ 1.22km X 0.61km
7 ≤ 153m X 153m
8 ≤ 38.2m X 19.1m
9 ≤ 4.77m X 4.77m
10 ≤ 1.19m X 0.596m
11 ≤ 149mm X 149mm
12 ≤ 37.2mm X 18.6mm

(23.321, 42.697)

(23.321, 42.697)

sx8dfsymrrcc

Spatial Index

Locations without coordinates

H3

S2

Shared Characters

Shared Characters

  • sx8dfsymrrcc (reference point)
  • sx8dfsymrrc9 (0.13 meters away)
  • sx8dfsymrr11 (0.62 meters away)
  • sx8dfsymr1qn (5.19 meters away)
  • sx8dfsy2j3dm (141.8 meters away)

Strings are cheap

Distance matrices are expensive

Search for a shared string

SELECT 
  substring(geohash, 1, 6) AS target_gh
FROM locations
WHERE target_gh = 'sx8dfsy'

library(dplyr)
library(geohashTools)
library(populartimes)

# store park street coordinates
park_street_loc <- c(42.35675660592967, -71.06224330327713)

# encode the geohash
park_street <- gh_encode(park_street_loc[1], park_street_loc[2], 12)
park_street
[1] "drt2yyy1cxwy"

res <- text_search(
  location = park_street_loc,
  query = "espresso",
  type = "cafe"
)

res[c("name", "rating", "price_level")]
# A tibble: 60 × 3
   name                           rating price_level
   <chr>                           <dbl>       <int>
 1 Espresso Love                     4.4           1
 2 Caffè Nero                        4.2           2
 3 Ogawa Coffee                      4.6           2
 4 George Howell Coffee              4.6           2
 5 Render Coffee                     4.4           1
 6 Jaho Coffee Roaster & Wine Bar    4.3           2
 7 The Well Coffee House             4.7           1
 8 Kohi Coffee Company               4.7           1
 9 Caffè Nero                        4.1           2
10 Caffè Nero                        4.4           2
# ℹ 50 more rows

fn count_seq_chars_to_ref(x: Strings, y: &str) -> Integers {
  x.iter()
        .map(|xi|
            xi.chars().zip(y.chars())
            .take_while(|a| a.0 == a.1)
            .count()
        ).map(|x|(x as i32).into())
        .collect()
}

res |> 
  mutate(
      geohash = gh_encode(lat, long, 12),
      shared_chars = count_seq_chars_to_ref(geohash, park_street)
  ) |> 
  filter(shared_chars >= 6) |>
  select(name, geohash, shared_chars)
# A tibble: 7 × 3
  name                 geohash      shared_chars
  <chr>                <chr>               <int>
1 Caffè Nero           drt2yyx9mj6t            6
2 George Howell Coffee drt2yyqwe8r7            6
3 Caffè Nero           drt2yynp0vqs            6
4 Thinking Cup         drt2yykfq0r5            6
5 Starbucks            drt2yywg71qc            6
6 Starbucks            drt2yyrmzf78            6
7 Starbucks            drt2yy8zuexj            6

Understand
Know

Understand
Know

Ready?

Z-order curves

2 Dimensions -> 1 Dimension

I M P O R T A N T

Z-coordinate

(x, y) -> Z

x = 23
y = 42
z = ??

23

00010111

42

00101010

Interleaving

interleave manim video

x & y are 8 bits

z is 16 bits

that’s pretty much it.

Back to bits

double precision
floating point

64 bits

01000000 00110011 00000111 10101110
00010100 01111010 11100001 01000111

128 bits

01000000 00110011 00000111 10101110
00010100 01111010 11100001 01000111
01000000 00110011 00000111 10101110
00010100 01111010 11100001 01000111

Truncate to 32 bits

01000000 00110011 00000111 10101110

Long & lat to binary

Encoding longitude

  • x = 19
  • interval: [-180, 180)
  • partition: [-180, 0), [0, 180)
  • Left = 0
  • Right = 1

Encoding longitude

  • x = 19
  • new interval [0, 180)
  • partition: [0, 90), [90, 180)
  • Bit = 0

Encoding latitude

  • y = 47
  • interval: [-90, 90)
  • partition: [-90, 0), [0, 90)
  • Bit = 1

Binary != GeoHash

Base32 encoding

Base 10 (decimal)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9

Base 2 (binary)

0, 1

Base n

  • the number of characters in the alphabet

Base 32

A-Z
2-7
RFC 4648

Geohash Alphabet

0-9
a-z (except a, i, l, o)

const BASE32: [char; 32] = [
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', 'b', 'c', 'd', 'e', 'f', 'g',
    'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r',
    's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
];

Base 32 == 5 bits

00000 == 0

11111 == 31

2^5 possible values

Decoding 32 bit

110100001010011011010100100000

11010 00010 10011 01101 01001 00000

11010

11010 == 26

00010 == 2

10011 == 20

01101 == 14

01001 == 9

00000 == 0

26 2 20 14 9 0

const BASE32: [char; 32] = [
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', 'b', 'c', 'd', 'e', 'f', 'g',
    'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r',
    's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
];

BASE32[26]

BASE32[26] == 'u'

BASE32[2] == '2'

u 2 n f 9 0

RECAP

Z-coord

1 dim mapping from (x, y)

Encode X & Y as binary

Partition number line

[-180, 0) [0, 180)

1

[-180, -90) [-90, 0)

1 0

Interleave bits

1   0   0   1   1
  0   1   1   0   0
1 0 0 1 0 1 1 0 1 0

Break up the bits

11001 00100 00001 11111

Translate into integers

25 4 1 31

Base 32 encode

A note on bits

Coding Geohash

Define x and y

x <- 19
y <- 47

Specify precision

precision <- 12

Define search vectors

# instantiate search vectors
xvals <- c(-180, 0, 180)
yvals <- c(-90, 0, 90)

Allocate binary vectors

# 32 * 2 = 64 bits after interleaving
xbin <- integer(32)
ybin <- integer(32)

Encode X

Encode X

for (i in 1:32) {
}

Encode X

for (i in 1:32) {
  if (x >= xvals[2]) { # upper interval
  }
}

Encode X

for (i in 1:32) {
  if (x >= xvals[2]) {
    xbin[i] <- 1L
  }
}

Encode X

for (i in 1:32) {
  if (x >= xvals[2]) {
    xbin[i] <- 1L
    xvals[1] <- xvals[2] # new lower bound
  }
}

Encode X

for (i in 1:32) {
  if (x >= xvals[2]) {
    xbin[i] <- 1L
    xvals[1] <- xvals[2] # new lower bound
  } else {
    xbin[i] <- 0L
    xvals[3] <- xvals[2] # new upper bound
  }
}

Encode X

for (i in 1:32) {
  if (x >= xvals[2]) {
    xbin[i] <- 1L
    xvals[1] <- xvals[2] # new lower bound
  } else {
    xbin[i] <- 0L
    xvals[3] <- xvals[2] # new upper bound
  }
  xvals[2] <- (xvals[1] + xvals[3]) / 2 # new mid point
}

Encode X

for (i in 1:32) {
  if (x >= xvals[2]) {
    xbin[i] <- 1L
    xvals[1] <- xvals[2] # new lower bound
  } else {
    xbin[i] <- 0L
    xvals[3] <- xvals[2] # new upper bound
  }
  xvals[2] <- (xvals[1] + xvals[3]) / 2 # new mid point
}

xbin
 [1] 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 0 1 1 0 1 1 0 0 0 0 0 1 0 1 1 0 1

Repeat for Y

Encode Y

for (i in 1:32) {
  if (y >= yvals[2]) {
    ybin[i] <- 1L
    yvals[1] <- yvals[2]
  } else {
    ybin[i] <- 0L
    yvals[3] <- yvals[2]
  }
  yvals[2] <- (yvals[1] + yvals[3]) / 2
}

ybin
 [1] 1 1 0 0 0 0 1 0 1 1 0 1 1 0 0 0 0 0 1 0 1 1 0 1 1 0 0 0 0 0 1 0

Interleave

# interleave results into a 64 bit binary
z_coord <- vctrs::vec_interleave(xbin, ybin)
z_coord
 [1] 1 1 0 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1
[39] 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0

Split into 5 bits

num_id <- ceiling(1:64 / 5)
bits <- split(z_coord, num_id)

12’s the limit!

tail(bits, 3)
$`11`
[1] 0 0 1 0 0

$`12`
[1] 0 1 0 1 0

$`13`
[1] 0 1 1 0

Drop unused bits

bits <- split(z_coord, num_id)[1:precision]

Base 32 alphabet

bit_lu <- c(
  "0", "1", "2", "3", "4", "5", "6", "7",
  "8", "9", "b", "c", "d", "e", "f", "g",
  "h", "j", "k", "m", "n", "p", "q", "r",
  "s", "t", "u", "v", "w", "x", "y", "z"
)

Bits to int

Bits to int

five_bits <- bits[[1]]

Bits to int

five_bits <- bits[[1]]
position <- paste(five_bits, collapse = "")
position
[1] "11010"

Bits to int

five_bits <- bits[[1]]
position <- paste(five_bits, collapse = "")
b32_pos <- strtoi(position, base = 2)
b32_pos
[1] 26

Bits to int

five_bits <- bits[[1]]
position <- paste(five_bits, collapse = "")
b32_pos <- strtoi(position, base = 2)

bit_lu[b32_pos + 1]
[1] "u"

Make it into a function

Base 32 conversion

base32_conversion <- function(x) {
  bit_lu <- c('0', '1', '2', '3', '4', '5', '6', '7',
              '8', '9', 'b', 'c', 'd', 'e', 'f', 'g',
              'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r',
              's', 't', 'u', 'v', 'w', 'x', 'y', 'z')

  # add 1 b/c R is 1 base indexed
  position <- strtoi(paste(x, collapse = ""), 2) + 1
  bit_lu[position]
}

Encode all

Encode all

indexes <- purrr::map_chr(bits, base32_conversion)
 [1] "u" "2" "m" "e" "2" "k" "5" "6" "u" "5" "4" "b"

Encode all

indexes <- purrr::map_chr(bits, base32_conversion)
geohash <- paste(indexes, collapse = "")
[1] "u2me2k56u54b"