import plotly.io as pio
pio.renderers.default = "notebook"

import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np

iris = px.data.iris()

iris.head()

fig = px.scatter(
    iris,
    x="sepal_width",
    y="sepal_length",
    color="species",
    title="Iris Dataset Scatter Plot: Plotly Express"
)

fig.show()

fig = go.Figure()

for species_name in iris["species"].unique():
    species_data = iris[iris["species"] == species_name]
    
    fig.add_trace(
        go.Scatter(
            x=species_data["sepal_width"],
            y=species_data["sepal_length"],
            mode="markers",
            name=species_name,
            text=species_data["species"],
            hovertemplate=(
                "Species: %{text}<br>"
                "Sepal Width: %{x}<br>"
                "Sepal Length: %{y}<extra></extra>"
            )
        )
    )

fig.update_layout(
    title="Iris Dataset Scatter Plot: Graph Objects",
    xaxis_title="Sepal Width",
    yaxis_title="Sepal Length"
)

fig.show()

fig = px.function_name(
    data_frame,
    x = "column_name",
    y = "column_name",
    color = "grouping_variable",
    title = "plot title"
)

fig.show()

px.scatter(
    data_frame,
    x,
    y,
    color = None,
    size = None,
    hover_name = None,
    hover_data = None,
    title = None
)

iris = px.data.iris()

iris.head()

fig = px.scatter(
    iris,
    x = "sepal_width",
    y = "sepal_length",
    color = "species",
    hover_name = "species",
    title = "Sepal Width vs. Sepal Length"
)

fig.show()

fig = px.scatter(
    iris,
    x = "sepal_width",
    y = "sepal_length",
    color = "species",
    hover_name = "species",
    hover_data = ["petal_width", "petal_length"],
    title = "Sepal Width vs. Sepal Length with Extra Hover Information"
)

fig.show()

px.line(
    data_frame,
    x,
    y,
    color = None,
    markers = False,
    title = None
)

gapminder = px.data.gapminder()

usa = gapminder[gapminder["country"] == "United States"]

usa.head()

fig = px.line(
    usa,
    x = "year",
    y = "lifeExp",
    markers = True,
    title = "Life Expectancy in the United States Over Time"
)

fig.show()

gapminder_small = gapminder[
    gapminder["country"].isin(["United States", "Canada", "Mexico"])
]

fig = px.line(
    gapminder_small,
    x = "year",
    y = "lifeExp",
    color = "country",
    markers = True,
    title = "Life Expectancy Over Time for Three Countries"
)

fig.show()

px.bar(
    data_frame,
    x,
    y,
    color = None,
    barmode = "relative",
    title = None
)

tips = px.data.tips()

tips.head()

avg_bill = (
    tips.groupby("day")["total_bill"]
    .mean()
    .reset_index()
)

avg_bill

fig = px.bar(
    avg_bill,
    x = "day",
    y = "total_bill",
    title = "Average Restaurant Bill by Day"
)

fig.show()

bill_by_day_sex = (
    tips.groupby(["day", "sex"])["total_bill"]
    .mean()
    .reset_index()
)

bill_by_day_sex

# barmode = "group"
fig = px.bar(
    bill_by_day_sex,
    x = "day",
    y = "total_bill",
    color = "sex",
    barmode = "group",
    title = "Average Restaurant Bill by Day and Sex"
)

fig.show()

# barmode = "stack"
fig = px.bar(
    bill_by_day_sex,
    x = "day",
    y = "total_bill",
    color = "sex",
    barmode = "stack",
    title = "Stacked Bar Chart Example"
)

fig.show()

px.histogram(
    data_frame,
    x,
    color = None,
    nbins = None,
    marginal = None,
    title = None
)

fig = px.histogram(
    tips,
    x = "total_bill",
    nbins = 20,
    title = "Distribution of Restaurant Bills"
)

fig.show()

fig = px.histogram(
    tips,
    x = "total_bill",
    nbins = 20,
    marginal = "box",
    title = "Distribution of Restaurant Bills with a Marginal Boxplot"
)

fig.show()

fig = px.some_plot(...)
fig.update_layout(...)
fig.update_traces(...)
fig.show()

fig = px.scatter(
    iris,
    x = "sepal_width",
    y = "sepal_length",
    color = "species",
    title = "Basic Plotly Express Figure"
)

fig.update_layout(
    template = "plotly_white",
    title_x = 0.5,
    xaxis_title = "Sepal Width",
    yaxis_title = "Sepal Length"
)

fig.update_traces(
    marker = dict(size = 9, opacity = 0.75)
)

fig.show()

import pandas as pd
import plotly.express as px

df = pd.read_csv("Film_Permits_20260410.csv")
df.head()

print(df.isna().sum())

# Convert StartDateTime to datetime
df["StartDateTime"] = pd.to_datetime(df["StartDateTime"], errors="coerce")

# Clean text fields
df["Borough"] = df["Borough"].astype(str).str.strip()
df["Category"] = df["Category"].astype(str).str.strip()

# Replace empty strings with missing values
df["Borough"] = df["Borough"].replace("", pd.NA)
df["Category"] = df["Category"].replace("", pd.NA)

EventID                 0
EventType               0
StartDateTime         326
EndDateTime           151
EnteredOn            1806
EventAgency             0
ParkingHeld             0
Borough                 0
CommunityBoard(s)      10
PolicePrecinct(s)      10
Category                0
SubCategoryName         0
Country                 0
ZipCode(s)             10
dtype: int64

# Data cleaning
# Drop rows with missing start dates
df_time = df.dropna(subset=["StartDateTime"]).copy()

# Create a year-month variable
df_time["year_month"] = df_time["StartDateTime"].dt.to_period("M").dt.to_timestamp()

monthly_counts = (
    df_time.groupby("year_month")
    .size()
    .reset_index(name="permit_count")
    .sort_values("year_month")
)

last_month = monthly_counts["year_month"].max()
df_last_month = df_time[df_time["year_month"] == last_month]

if df_last_month["StartDateTime"].dt.day.max() < 25:
    monthly_counts = monthly_counts[monthly_counts["year_month"] != last_month]

fig = px.line(
    monthly_counts,
    x="year_month",
    y="permit_count",
    markers=True,
    title="Monthly Film Permit Counts in New York City",
    labels={
        "year_month": "Month",
        "permit_count": "Number of Permits"
    },
    hover_data={
        "year_month": False,
        "permit_count": True
    }
)

fig.update_traces(
    hovertemplate="<b>%{x|%B %Y}</b><br>Permits: %{y}<extra></extra>"
)

fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    title_x=0.5,
    xaxis_title="Month",
    yaxis_title="Number of Permits",
    width=1100,
    height=500,
    font=dict(size=13)
)

fig.update_xaxes(range=["2023-01-01", "2025-12-31"])

fig.show()

df_borough = df.dropna(subset=["Borough"]).copy()
borough_counts = (
    df_borough.groupby("Borough")
    .size()
    .reset_index(name="permit_count")
    .sort_values("permit_count", ascending=False)
)

borough_counts["share_percent"] = (
    borough_counts["permit_count"] / borough_counts["permit_count"].sum() * 100
).round(1)

fig = px.bar(
    borough_counts,
    x="Borough",
    y="permit_count",
    title="Film Permit Counts by Borough",
    labels={"Borough": "Borough", "permit_count": "Permit Count"},
    text="permit_count"
)

fig.update_traces(
    textposition="outside",
    hovertemplate=(
        "<b>%{x}</b><br>"
        "Permits: %{y}<br>"
        "Share of total: %{customdata[0]}%<extra></extra>"
    ),
    customdata=borough_counts[["share_percent"]].values
)

fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    xaxis_title="Borough",
    yaxis_title="Permit Count",
    height=550,
    font=dict(size=13)
)

fig.show()

df_cat = df.dropna(subset=["Borough", "Category"]).copy()

df_cat = df_cat[(df_cat["Borough"] != "") & (df_cat["Category"] != "")]

top_categories = (
    df_cat["Category"]
    .value_counts()
    .head(5)
    .index
    .tolist()
)

df_cat_top = df_cat[df_cat["Category"].isin(top_categories)].copy()

borough_order = ["Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"]
df_cat_top["Borough"] = pd.Categorical(
    df_cat_top["Borough"],
    categories=borough_order,
    ordered=True
)

category_counts = (
    df_cat_top.groupby(["Borough", "Category"])
    .size()
    .reset_index(name="permit_count")
)

category_counts["share"] = (
    category_counts.groupby("Borough")["permit_count"]
    .transform(lambda x: x / x.sum())
)

category_counts["share_percent"] = (category_counts["share"] * 100).round(1)

category_order = (
    df_cat_top["Category"]
    .value_counts()
    .loc[top_categories]
    .index
    .tolist()
)

category_counts["Category"] = pd.Categorical(
    category_counts["Category"],
    categories=category_order,
    ordered=True
)

category_counts = category_counts.sort_values(["Borough", "Category"])

fig = px.bar(
    category_counts,
    x="Borough",
    y="share",
    color="Category",
    title="Composition of Top Film Permit Categories by Borough",
    labels={
        "Borough": "Borough",
        "share": "Share of Permits",
        "Category": "Category"
    },
    barmode="stack"
)

fig.update_traces(
    hovertemplate=(
        "<b>%{x}</b><br>"
        "Category: %{fullData.name}<br>"
        "Share within borough: %{customdata[0]}%<br>"
        "Permits: %{customdata[1]}<extra></extra>"
    ),
    customdata=category_counts[["share_percent", "permit_count"]].values
)

fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    xaxis_title="Borough",
    yaxis_title="Share of Permits",
    height=600,
    font=dict(size=13),
    legend_title_text="Category"
)

fig.update_yaxes(tickformat=".0%")

fig.show()

Library	Main Strength	Typical Output	When We Might Use It
matplotlib	Very flexible and highly customizable	Mostly static plots	When we need detailed control over every part of a figure, especially for publication-quality visualizations
seaborn	Statistical visualizations with simple syntax and attractive defaults	Mostly static plots	When we want quick statistical plots such as box plots, histograms, heatmaps, or regression plots
plotly	Interactive visualizations for exploration and dashboards	Interactive plots	When we want users to hover, zoom, filter, and explore the data visually, especially in web-based projects

Feature	Plotly Express (`px`)	Graph Objects (`go`)
Difficulty Level	Easier	More advanced
Amount of Code	Short and concise	More verbose
Best For	Quick analysis and exploration	Full customization
Works Well With	pandas DataFrames	Manual figure building
Learning Curve	Beginner-friendly	Steeper
Typical Use Case	Exploratory data analysis	Complex dashboards and custom visualizations

	sepal_length	sepal_width	petal_length	petal_width	species	species_id
0	5.1	3.5	1.4	0.2	setosa	1
1	4.9	3.0	1.4	0.2	setosa	1
2	4.7	3.2	1.3	0.2	setosa	1
3	4.6	3.1	1.5	0.2	setosa	1
4	5.0	3.6	1.4	0.2	setosa	1

	sepal_length	sepal_width	petal_length	petal_width	species	species_id
0	5.1	3.5	1.4	0.2	setosa	1
1	4.9	3.0	1.4	0.2	setosa	1
2	4.7	3.2	1.3	0.2	setosa	1
3	4.6	3.1	1.5	0.2	setosa	1
4	5.0	3.6	1.4	0.2	setosa	1

	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
1608	United States	Americas	1952	68.44	157553000	13990.48208	USA	840
1609	United States	Americas	1957	69.49	171984000	14847.12712	USA	840
1610	United States	Americas	1962	70.21	186538000	16173.14586	USA	840
1611	United States	Americas	1967	70.76	198712000	19530.36557	USA	840
1612	United States	Americas	1972	71.34	209896000	21806.03594	USA	840

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	day	sex	total_bill
0	Fri	Female	14.145556
1	Fri	Male	19.857000
2	Sat	Female	19.680357
3	Sat	Male	20.802542
4	Sun	Female	19.872222
5	Sun	Male	21.887241
6	Thur	Female	16.715312
7	Thur	Male	18.714667

	EventID	EventType	StartDateTime	EndDateTime	EnteredOn	EventAgency	ParkingHeld	Borough	CommunityBoard(s)	PolicePrecinct(s)	Category	SubCategoryName	Country	ZipCode(s)
0	911404	Theater Load in and Load Outs	01/09/2026 05:00:00 AM	01/11/2026 01:00:00 PM	01/06/2026 11:36:04 PM	Mayor's Office of Media & Entertainment	AMSTERDAM AVENUE between WEST 73 STREET and ...	Manhattan	7,	20,	Theater	Theater	United States of America	10023,
1	911038	Shooting Permit	01/08/2026 06:00:00 AM	01/08/2026 11:00:00 PM	01/06/2026 10:15:51 AM	Mayor's Office of Media & Entertainment	EAST 37 STREET between PARK AVENUE and MADIS...	Manhattan	5, 6,	14, 17,	Television	Cable-episodic	United States of America	10016, 10018,
2	911009	Shooting Permit	01/08/2026 07:30:00 AM	01/08/2026 11:00:00 PM	01/06/2026 09:11:12 AM	Mayor's Office of Media & Entertainment	MALCOLM X BOULEVARD between WEST 118 STREET a...	Manhattan	10,	28,	Television	Episodic series	United States of America	10026, 10027,
3	910739	Shooting Permit	01/08/2026 09:00:00 AM	01/09/2026 01:00:00 AM	01/05/2026 01:47:01 PM	Mayor's Office of Media & Entertainment	JOHNSON AVENUE between WHITE STREET and BOGART...	Brooklyn	1,	90,	Television	Episodic series	United States of America	11206, 11237,
4	910636	Shooting Permit	01/08/2026 07:00:00 AM	01/08/2026 08:00:00 PM	01/05/2026 11:35:43 AM	Mayor's Office of Media & Entertainment	EAST 66 STREET between MADISON AVENUE and PA...	Manhattan	8,	19,	Commercial	Commercial	United States of America	10065,

Plotly Tutorial for SDS 271 Students¶

Part 1: Introduction to Plotly and Basic Functions¶

1.What is Plotly and When to Use It?¶

When to use Plotly:¶

How it compares to tools you've already learned in class:¶

2. Installation and Imports¶

3. Two Core Interfaces in Plotly¶

Plotly Express (px)¶

Graph Objects (go)¶

Plotly Express vs. Graph Objects¶

Creating a Scatter Plot with Plotly Express¶

Creating the Same Scatter Plot with Graph Objects¶

Why This Tutorial Focuses on Plotly Express¶

Connection to SDS 271¶

4. Basic Plotly Express Functions¶

4.1 Scatter Plots with px.scatter()¶

Basic Syntax¶

When would we use px.scatter()?¶

Example¶

Adding More Information with hover_data¶

4.2 Line Charts with px.line()¶

Basic Syntax¶

When would we use px.line()?¶

Example¶

Comparing Multiple Groups with color¶

4.3 Bar Charts with px.bar()¶

Basic Syntax¶

When would we use px.bar()?¶

Example¶

Grouped and Stacked Bar Charts¶

4.4 Histograms with px.histogram()¶

Basic Syntax¶

When would we use px.histogram()?¶

Example¶

Adding a Marginal Plot¶

5. What Does a Plotly Function Return?¶

Example¶

6. Summary¶

Part 2: Dataset Introduction and Preparation¶

Why Plotly Works Well for This Dataset¶

Data Cleaning¶

Part 3: Interactive Visualizations With NYC Film Permit Dataset¶

Visualizing Permit Activity Over Time¶

Comparing Filming Activity Across Boroughs¶

Exploring Production Categories Interactively¶

Conclusion¶

Plotly Express (`px`)¶

Graph Objects (`go`)¶

4.1 Scatter Plots with `px.scatter()`¶

When would we use `px.scatter()`?¶

Adding More Information with `hover_data`¶

4.2 Line Charts with `px.line()`¶

When would we use `px.line()`?¶

Comparing Multiple Groups with `color`¶

4.3 Bar Charts with `px.bar()`¶

When would we use `px.bar()`?¶

4.4 Histograms with `px.histogram()`¶

When would we use `px.histogram()`?¶