{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploratory Analysis of Spatial Data: Spatial Autocorrelation \n", "\n", "\n", "In this notebook we introduce methods of _exploratory spatial data analysis_\n", "that are intended to complement geovizualization through formal univariate and\n", "multivariate statistical tests for spatial clustering.\n", "\n", "\n", "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import esda\n", "import pandas as pd\n", "import geopandas as gpd\n", "from geopandas import GeoDataFrame\n", "import libpysal as lps\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from shapely.geometry import Point\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Our data set comes from the Berlin airbnb scrape taken in April 2018. This dataframe was constructed as part of the [GeoPython 2018 workshop](https://github.com/ljwolf/geopython) by [Levi Wolf](https://ljwolf.org) and [Serge Rey](https://sergerey.org). As part of the workshop a geopandas data frame was constructed with one of the columns reporting the median listing price of units in each neighborhood in Berlin:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "gdf = gpd.read_file('data/berlin-neighbourhoods.geojson')\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "bl_df = pd.read_csv('data/berlin-listings.csv')\n", "geometry = [Point(xy) for xy in zip(bl_df.longitude, bl_df.latitude)]\n", "crs = {'init': 'epsg:4326'} \n", "bl_gdf = GeoDataFrame(bl_df, crs=crs, geometry=geometry)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "neighbourhood_group\n", "Charlottenburg-Wilm. 58.556408\n", "Friedrichshain-Kreuzberg 55.492809\n", "Lichtenberg 44.584270\n", "Marzahn - Hellersdorf 54.246754\n", "Mitte 60.387890\n", "Neukölln 45.135948\n", "Pankow 60.282516\n", "Reinickendorf 43.682465\n", "Spandau 48.236561\n", "Steglitz - Zehlendorf 54.445683\n", "Tempelhof - Schöneberg 53.704407\n", "Treptow - Köpenick 51.222004\n", "Name: price, dtype: float32" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bl_gdf['price'] = bl_gdf['price'].astype('float32')\n", "sj_gdf = gpd.sjoin(gdf, bl_gdf, how='inner', op='intersects', lsuffix='left', rsuffix='right')\n", "median_price_gb = sj_gdf['price'].groupby([sj_gdf['neighbourhood_group']]).mean()\n", "median_price_gb" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | neighbourhood | \n", "neighbourhood_group | \n", "geometry | \n", "median_pri | \n", "
---|---|---|---|---|
0 | \n", "Blankenfelde/Niederschönhausen | \n", "Pankow | \n", "(POLYGON ((13.411909 52.614871, 13.411826 52.6... | \n", "60.282516 | \n", "
1 | \n", "Helmholtzplatz | \n", "Pankow | \n", "(POLYGON ((13.414053 52.549288, 13.414222 52.5... | \n", "60.282516 | \n", "
2 | \n", "Wiesbadener Straße | \n", "Charlottenburg-Wilm. | \n", "(POLYGON ((13.307476 52.467882, 13.307434 52.4... | \n", "58.556408 | \n", "
3 | \n", "Schmöckwitz/Karolinenhof/Rauchfangswerder | \n", "Treptow - Köpenick | \n", "(POLYGON ((13.709727 52.396299, 13.709263 52.3... | \n", "51.222004 | \n", "
4 | \n", "Müggelheim | \n", "Treptow - Köpenick | \n", "(POLYGON ((13.737622 52.408498, 13.737734 52.4... | \n", "51.222004 | \n", "
5 | \n", "Biesdorf | \n", "Marzahn - Hellersdorf | \n", "(POLYGON ((13.566433 52.535103, 13.566974 52.5... | \n", "54.246754 | \n", "
6 | \n", "Nord 1 | \n", "Reinickendorf | \n", "(POLYGON ((13.336686 52.622651, 13.336632 52.6... | \n", "43.682465 | \n", "
7 | \n", "West 5 | \n", "Reinickendorf | \n", "(POLYGON ((13.281381 52.59958, 13.281575 52.59... | \n", "43.682465 | \n", "
8 | \n", "Frankfurter Allee Nord | \n", "Friedrichshain-Kreuzberg | \n", "(POLYGON ((13.453201 52.51682, 13.453212 52.51... | \n", "55.492809 | \n", "
9 | \n", "Buch | \n", "Pankow | \n", "(POLYGON ((13.464495 52.650553, 13.464566 52.6... | \n", "60.282516 | \n", "
10 | \n", "Kaulsdorf | \n", "Marzahn - Hellersdorf | \n", "(POLYGON ((13.621353 52.527041, 13.621956 52.5... | \n", "54.246754 | \n", "
11 | \n", "None | \n", "None | \n", "(POLYGON ((13.616591 52.58154, 13.614579 52.58... | \n", "NaN | \n", "
12 | \n", "None | \n", "None | \n", "(POLYGON ((13.616681 52.57868, 13.607031 52.57... | \n", "NaN | \n", "
13 | \n", "nördliche Luisenstadt | \n", "Friedrichshain-Kreuzberg | \n", "(POLYGON ((13.444305 52.500656, 13.442658 52.5... | \n", "55.492809 | \n", "
14 | \n", "Nord 2 | \n", "Reinickendorf | \n", "(POLYGON ((13.306802 52.586062, 13.30667 52.58... | \n", "43.682465 | \n", "