Supabase Airbnb ETL – Notebook end-to-end
- Conexión | Leyendo credenciales desde .env
from supabase import create_client, Client
from dotenv import load_dotenv
import pandas as pd
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.supabase_utils import get_client
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
- Carga de tablas
- reviews_summary_airbnb
- listings_airbnb
- neighbourhoods_airbnb
- neighbourhoods_gejson_airbnb
- online_retail_ii
- global_superstore2
def fetch_full_table(table_name: str, chunk_size: int = 1_000) -> pd.DataFrame:
"""
Descarga una tabla de Supabase completa, en bloques de `chunk_size`,
evitando el límite de 1 000 filas por petición.
"""
supabase = get_client()
start = 0
frames = []
while True:
resp = (
supabase.table(table_name)
.select("*")
.range(start, start + chunk_size - 1)
.execute()
)
rows = resp.data
if not rows: # ⇐ cuando ya no vienen filas, cortamos
break
frames.append(pd.DataFrame(rows))
start += chunk_size # siguiente bloque
return pd.concat(frames, ignore_index=True)
# Uso directo:
df_reviews_summary_airbnb = fetch_full_table("reviews_summary_airbnb")
df_listings_airbnb = fetch_full_table("listings_airbnb")
neighbourhoods_airbnb = supabase.table('neighbourhoods_airbnb').select('*').execute()
df_neighbourhoods_airbnb = pd.DataFrame(neighbourhoods_airbnb.data)
neighbourhoods_gejson_airbnb = supabase.table('neighbourhoods_gejson_airbnb').select('*').execute()
df_neighbourhoods_gejson_airbnb = pd.DataFrame(neighbourhoods_gejson_airbnb.data)
df_online_retail_ii = fetch_full_table("online_retail_ii")
df_global_superstore2 = fetch_full_table("global_superstore2")
- Exploración rápida
df_reviews_summary_airbnb.shape
df_reviews_summary_airbnb.head(3)
0 |
11508 |
44 |
2025-01-26 |
1 |
14222 |
123 |
2025-01-18 |
2 |
16695 |
45 |
2019-11-30 |
df_listings_airbnb.head(3)
0 |
11508 |
22 |
-34.58184 |
-58.42415 |
Entire condo |
2 |
1.0 |
1.0 |
67518.0 |
3 |
1125 |
300 |
44 |
4.82 |
0.29 |
1 |
14222 |
22 |
-34.58617 |
-58.41036 |
Entire rental unit |
2 |
1.0 |
1.0 |
22375.0 |
7 |
40 |
44 |
123 |
4.77 |
0.80 |
2 |
16695 |
18 |
-34.61439 |
-58.37611 |
Entire loft |
4 |
1.0 |
1.0 |
52511.0 |
2 |
1125 |
365 |
45 |
4.28 |
0.27 |
df_neighbourhoods_airbnb.shape
df_neighbourhoods_airbnb.head(3)
0 |
1 |
Agronomia |
1 |
2 |
Almagro |
2 |
3 |
Balvanera |
- df_neighbourhoods_gejson_airbnb
df_neighbourhoods_gejson_airbnb.shape
df_neighbourhoods_gejson_airbnb.head(3)
0 |
{'type': 'MultiPolygon', 'crs': {'type': 'name... |
1 |
1 |
{'type': 'MultiPolygon', 'crs': {'type': 'name... |
2 |
2 |
{'type': 'MultiPolygon', 'crs': {'type': 'name... |
3 |
df_online_retail_ii.shape
df_online_retail_ii.head(3)
0 |
560712 |
22781 |
GUMBALL MAGAZINE RACK |
1 |
2011-07-20T13:06:00 |
7.65 |
14456 |
United Kingdom |
7 |
2011 |
1 |
560712 |
84732D |
CUTE RABBIT CEATURE SCREEN |
2 |
2011-07-20T13:06:00 |
0.39 |
14456 |
United Kingdom |
7 |
2011 |
2 |
560712 |
84732B |
CUTE BIRD CEATURE SCREEN |
2 |
2011-07-20T13:06:00 |
0.39 |
14456 |
United Kingdom |
7 |
2011 |
df_global_superstore2.shape
df_global_superstore2.head(3)
0 |
32298 |
CA-2012-124891 |
2012-07-31 |
2012-07-31 |
Same Day |
RH-19495 |
Rick Hansen |
Consumer |
New York City |
New York |
... |
Technology |
Accessories |
Plantronics CS510 - Over-the-Head monaural Wir... |
2309.65 |
7 |
0.0 |
762.18 |
933.57 |
Critical |
33.00 |
1 |
26341 |
IN-2013-77878 |
2013-02-05 |
2013-02-07 |
Second Class |
JR-16210 |
Justin Ritter |
Corporate |
Wollongong |
New South Wales |
... |
Furniture |
Chairs |
Novimex Executive Leather Armchair, Black |
3709.40 |
9 |
0.1 |
-288.77 |
923.63 |
Critical |
-7.78 |
2 |
25330 |
IN-2013-71249 |
2013-10-17 |
2013-10-18 |
First Class |
CR-12730 |
Craig Reiter |
Consumer |
Brisbane |
Queensland |
... |
Technology |
Phones |
Nokia Smart Phone, with Caller ID |
5175.17 |
9 |
0.1 |
919.97 |
915.49 |
Medium |
17.78 |
3 rows × 24 columns
Última actualización: 2025-07-30
- Mapa interactivo que muestra los reitings de los airbnb según la zona
import plotly.express as px
fig = px.scatter_map(df_listings_airbnb, lat='latitude', lon='longitude', hover_name='id', color='review_score_rating',
color_continuous_scale='Cividis_r', zoom=10, height=600)
fig
- Tendencia temporal de reviews
- Cantidad de nuevos reviews por mes de 2024 a 2025
df_reviews_summary_airbnb['last_date'] = pd.to_datetime(df_reviews_summary_airbnb['last_date'])
df_reviews_summary_airbnb['month'] = df_reviews_summary_airbnb['last_date'].dt.month
df_reviews_summary_airbnb['year'] = df_reviews_summary_airbnb['last_date'].dt.year
reviews_por_mes = df_reviews_summary_airbnb.groupby(['year', 'month'])['num_reviews'].sum().sort_index().reset_index()
reviews_por_mes_2024 = reviews_por_mes[reviews_por_mes['year'] >= 2024].sort_values('year')
fig = px.bar(
x=[f"{row.year}-{row.month:02d}" for row in reviews_por_mes_2024.itertuples()],
y=reviews_por_mes_2024["num_reviews"],
labels={"x": "Período", "y": "num_reviews"},
title="Reviews mensuales",
height=600,
)
fig.update_layout(xaxis_tickangle=-45) # opcional, inclina etiquetas
fig.show()
- Ganancias totales por mes
df_online_retail_ii['sales'] = df_online_retail_ii['quantity'] * df_online_retail_ii['unit_price']
df_online_retail_ii['invoice_date'] = pd.to_datetime(df_online_retail_ii['invoice_date'])
df_online_retail_ii['monthly_invoice'] = df_online_retail_ii['invoice_date'].dt.to_period('M')
df_online_retail_ii.groupby("monthly_invoice")
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001D886E92710>
df_o_r_ii_group = df_online_retail_ii.groupby('monthly_invoice', as_index=False).sum(numeric_only=True)
df_o_r_ii_group["monthly_invoice"] = df_o_r_ii_group["monthly_invoice"].astype(str)
fig2 = px.bar(
df_o_r_ii_group,
x='monthly_invoice',
y='sales'
)
fig2.show()
- Tarjetas KPI
import plotly.graph_objects as go
from plotly.subplots import make_subplots
ventas_totales = df_online_retail_ii["sales"].sum()
reviews_totales_2024 = (
df_reviews_summary_airbnb.query("year >= 2024")["num_reviews"].sum()
)
listings_activos = df_listings_airbnb.shape[0]
rating_medio = df_listings_airbnb["review_score_rating"].mean()
fig_ventas = go.Figure(
go.Indicator(
mode="number",
value=ventas_totales,
title={"text": "Ventas totales"},
number={"prefix": "USD "}
)
)
fig_ventas.show()
fig_reviews = go.Figure(
go.Indicator(
mode="number",
value=reviews_totales_2024,
title={"text": "Reviews totales (2024+)"},
)
)
fig_reviews.show()
fig_listings = go.Figure(
go.Indicator(
mode="number",
value=listings_activos,
title={"text": "Listings activos"},
)
)
fig_listings.show()
fig_rating = go.Figure(
go.Indicator(
mode="number",
value=rating_medio,
title={"text": "Rating medio"},
number={"valueformat": ".2f"}
)
)
fig_rating.show()