Supabase Airbnb ETL – Notebook end-to-end

  1. Conexión | Leyendo credenciales desde .env
from supabase import create_client, Client
from dotenv import load_dotenv
import pandas as pd
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.supabase_utils import get_client
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
supabase = get_client()
  1. Carga de tablas
def fetch_full_table(table_name: str, chunk_size: int = 1_000) -> pd.DataFrame:
    """
    Descarga una tabla de Supabase completa, en bloques de `chunk_size`,
    evitando el límite de 1 000 filas por petición.
    """
    supabase = get_client()
    start = 0
    frames = []

    while True:
        resp = (
            supabase.table(table_name)
            .select("*")
            .range(start, start + chunk_size - 1)
            .execute()
        )
        rows = resp.data
        if not rows:                 # ⇐ cuando ya no vienen filas, cortamos
            break

        frames.append(pd.DataFrame(rows))
        start += chunk_size          # siguiente bloque

    return pd.concat(frames, ignore_index=True)

# Uso directo:
df_reviews_summary_airbnb = fetch_full_table("reviews_summary_airbnb")
df_listings_airbnb = fetch_full_table("listings_airbnb")
neighbourhoods_airbnb = supabase.table('neighbourhoods_airbnb').select('*').execute()
df_neighbourhoods_airbnb = pd.DataFrame(neighbourhoods_airbnb.data)
neighbourhoods_gejson_airbnb = supabase.table('neighbourhoods_gejson_airbnb').select('*').execute()
df_neighbourhoods_gejson_airbnb = pd.DataFrame(neighbourhoods_gejson_airbnb.data)
df_online_retail_ii = fetch_full_table("online_retail_ii")
df_global_superstore2 = fetch_full_table("global_superstore2")
  1. Exploración rápida
df_reviews_summary_airbnb.shape
(26964, 3)
df_reviews_summary_airbnb.head(3)
listing_id num_reviews last_date
0 11508 44 2025-01-26
1 14222 123 2025-01-18
2 16695 45 2019-11-30
df_listings_airbnb.shape
(26964, 15)
df_listings_airbnb.head(3)
id neigh_id latitude longitude property_type accommodates bedrooms beds price minimum_nights maximum_nights availability_365 number_of_reviews review_score_rating reviews_per_month
0 11508 22 -34.58184 -58.42415 Entire condo 2 1.0 1.0 67518.0 3 1125 300 44 4.82 0.29
1 14222 22 -34.58617 -58.41036 Entire rental unit 2 1.0 1.0 22375.0 7 40 44 123 4.77 0.80
2 16695 18 -34.61439 -58.37611 Entire loft 4 1.0 1.0 52511.0 2 1125 365 45 4.28 0.27
df_neighbourhoods_airbnb.shape
(49, 2)
df_neighbourhoods_airbnb.head(3)
neigh_id neighbourhood
0 1 Agronomia
1 2 Almagro
2 3 Balvanera
df_neighbourhoods_gejson_airbnb.shape
(49, 2)
df_neighbourhoods_gejson_airbnb.head(3)
geom neigh_id
0 {'type': 'MultiPolygon', 'crs': {'type': 'name... 1
1 {'type': 'MultiPolygon', 'crs': {'type': 'name... 2
2 {'type': 'MultiPolygon', 'crs': {'type': 'name... 3
df_online_retail_ii.shape
(787133, 10)
df_online_retail_ii.head(3)
invoice stock_code description quantity invoice_date unit_price customer_id country month year
0 560712 22781 GUMBALL MAGAZINE RACK 1 2011-07-20T13:06:00 7.65 14456 United Kingdom 7 2011
1 560712 84732D CUTE RABBIT CEATURE SCREEN 2 2011-07-20T13:06:00 0.39 14456 United Kingdom 7 2011
2 560712 84732B CUTE BIRD CEATURE SCREEN 2 2011-07-20T13:06:00 0.39 14456 United Kingdom 7 2011
df_global_superstore2.shape
(25753, 24)
df_global_superstore2.head(3)
row_id order_id order_date ship_date ship_mode customer_id customer_name segment city state ... category sub_category product_name sales quantity discount profit shipping_cost order_priority profit_margin
0 32298 CA-2012-124891 2012-07-31 2012-07-31 Same Day RH-19495 Rick Hansen Consumer New York City New York ... Technology Accessories Plantronics CS510 - Over-the-Head monaural Wir... 2309.65 7 0.0 762.18 933.57 Critical 33.00
1 26341 IN-2013-77878 2013-02-05 2013-02-07 Second Class JR-16210 Justin Ritter Corporate Wollongong New South Wales ... Furniture Chairs Novimex Executive Leather Armchair, Black 3709.40 9 0.1 -288.77 923.63 Critical -7.78
2 25330 IN-2013-71249 2013-10-17 2013-10-18 First Class CR-12730 Craig Reiter Consumer Brisbane Queensland ... Technology Phones Nokia Smart Phone, with Caller ID 5175.17 9 0.1 919.97 915.49 Medium 17.78

3 rows × 24 columns

Última actualización: 2025-07-30

  1. Mapa interactivo que muestra los reitings de los airbnb según la zona
import plotly.express as px
fig = px.scatter_map(df_listings_airbnb, lat='latitude', lon='longitude', hover_name='id', color='review_score_rating',
    color_continuous_scale='Cividis_r', zoom=10, height=600)
fig
  1. Tendencia temporal de reviews
df_reviews_summary_airbnb['last_date'] = pd.to_datetime(df_reviews_summary_airbnb['last_date'])
df_reviews_summary_airbnb['month'] = df_reviews_summary_airbnb['last_date'].dt.month
df_reviews_summary_airbnb['year'] = df_reviews_summary_airbnb['last_date'].dt.year
reviews_por_mes = df_reviews_summary_airbnb.groupby(['year', 'month'])['num_reviews'].sum().sort_index().reset_index()
reviews_por_mes_2024 = reviews_por_mes[reviews_por_mes['year'] >= 2024].sort_values('year')
fig = px.bar(
    x=[f"{row.year}-{row.month:02d}" for row in reviews_por_mes_2024.itertuples()],
    y=reviews_por_mes_2024["num_reviews"],
    labels={"x": "Período", "y": "num_reviews"},
    title="Reviews mensuales",
    height=600,
)

fig.update_layout(xaxis_tickangle=-45)  # opcional, inclina etiquetas
fig.show()
  1. Ganancias totales por mes
df_online_retail_ii['sales'] = df_online_retail_ii['quantity'] * df_online_retail_ii['unit_price']
df_online_retail_ii['invoice_date'] = pd.to_datetime(df_online_retail_ii['invoice_date'])
df_online_retail_ii['monthly_invoice'] = df_online_retail_ii['invoice_date'].dt.to_period('M')
df_online_retail_ii.groupby("monthly_invoice")
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001D886E92710>
df_o_r_ii_group = df_online_retail_ii.groupby('monthly_invoice', as_index=False).sum(numeric_only=True)
df_o_r_ii_group["monthly_invoice"] = df_o_r_ii_group["monthly_invoice"].astype(str)
fig2 = px.bar(
    df_o_r_ii_group,
    x='monthly_invoice',
    y='sales'
)

fig2.show()
  1. Tarjetas KPI
import plotly.graph_objects as go
from plotly.subplots import make_subplots

ventas_totales = df_online_retail_ii["sales"].sum()

reviews_totales_2024 = (
    df_reviews_summary_airbnb.query("year >= 2024")["num_reviews"].sum()
)

listings_activos = df_listings_airbnb.shape[0]

rating_medio = df_listings_airbnb["review_score_rating"].mean()

fig_ventas = go.Figure(
    go.Indicator(
        mode="number",
        value=ventas_totales,
        title={"text": "Ventas totales"},
        number={"prefix": "USD "}
    )
)
fig_ventas.show()

fig_reviews = go.Figure(
    go.Indicator(
        mode="number",
        value=reviews_totales_2024,
        title={"text": "Reviews totales (2024+)"},
    )
)
fig_reviews.show()

fig_listings = go.Figure(
    go.Indicator(
        mode="number",
        value=listings_activos,
        title={"text": "Listings activos"},
    )
)
fig_listings.show()

fig_rating = go.Figure(
    go.Indicator(
        mode="number",
        value=rating_medio,
        title={"text": "Rating medio"},
        number={"valueformat": ".2f"}
    )
)
fig_rating.show()