Data cleaning using SQL: London AirBnb listings March 2023

Soliver Mazo
June 2023

Objective

This project aims to prepare the data for analysis by filtering out data outside Uk.

Tools & Technologies

SQL, Data Analysis, BigQuery

Share

SQL

Data source: https://www.kaggle.com/datasets/minemartin/london-airbnb-listings-march-2023

/*
Cleaning Data in SQL Queries
Dataset:https://www.kaggle.com/datasets/minemartin/london-airbnb-listings-march-2023
*/

--SELECT * FROM `coursera-385403.london_airbnb_listings_march_2023.listings` LIMIT 1000;

--------------------------------------------------------------------------------------------------------------------------

-- Checking highly significant attributes for null values

WITH null_host_location AS
  (SELECT 
    COUNT(*) null_host_location_count
  FROM 
    `coursera-385403.london_airbnb_listings_march_2023.listings`
  WHERE 
    host_location IS NULL OR TRIM(host_location) = '' ) ,
  null_property_type AS
  (SELECT 
    COUNT(*) null_property_type_count
  FROM 
    `coursera-385403.london_airbnb_listings_march_2023.listings`
  WHERE 
    property_type IS NULL OR TRIM(property_type) = '' ),
  null_room_type AS
  (SELECT 
    COUNT(*) null_room_type_count
  FROM 
    `coursera-385403.london_airbnb_listings_march_2023.listings`
  WHERE 
    room_type IS NULL OR TRIM(room_type) = '' ),
  null_price_or_zero AS
  (SELECT 
    COUNT(*) null_price_or_zero_count
  FROM 
    `coursera-385403.london_airbnb_listings_march_2023.listings`
  WHERE 
    price IS NULL OR price = 0 )
SELECT 
  null_host_location_count, null_property_type_count, null_room_type_count,null_price_or_zero_count 
FROM null_host_location,null_property_type,null_room_type,null_price_or_zero;

-- Filtering null host_location and null or zero price
SELECT 
  *
FROM `coursera-385403.london_airbnb_listings_march_2023.listings`
WHERE host_location IS NOT NULL OR TRIM(host_location) != ''
  AND price > 0;


-- Classifying property_type to `entire_unit`, `private_room`, `others`
-- Classifying room_type_type to `entire_home_apt`, `private_room`, `others`
-- Classifying prices in to ranges 1-50 = `cheap` , 51-99 = `average` , 99+ = `expensive`
-- Filter out listing outside UK
SELECT 
  *,
  CASE
    WHEN LOWER(TRIM(property_type)) = 'entire rental unit' THEN 'entire_unit'
    WHEN LOWER(TRIM(property_type)) = 'private room in rental unit' THEN 'private_room'
    ELSE 'others'
  END AS property_type_classification,
  CASE
    WHEN LOWER(TRIM(room_type)) = 'entire home/apt' THEN 'entire_home_apt'
    WHEN LOWER(TRIM(room_type)) = 'private room' THEN 'private_room'
    ELSE 'others'
  END AS room_type_classification,
  CASE 
    WHEN price BETWEEN 1 AND 49 THEN 'cheap'
    WHEN price BETWEEN 50 AND 99 THEN 'average'
    ELSE 'expensive'
  END AS price_classification
FROM `coursera-385403.london_airbnb_listings_march_2023.listings`
WHERE 
  (host_location IS NOT NULL OR TRIM(host_location) != '')
  AND price > 0
  AND LOWER(TRIM(host_location)) LIKE '%united kingdom%';

--Calculate the sum Airbnb listings in each location

SELECT 
  host_location,
  COUNT(host_location) as listing_count
FROM `coursera-385403.london_airbnb_listings_march_2023.listings`
WHERE 
  (host_location IS NOT NULL OR TRIM(host_location) != '')
  AND price > 0
  AND LOWER(TRIM(host_location)) LIKE '%united kingdom%'
GROUP BY host_location;

Follow me

© 2023  -  Soliver Mazo.