One of the other benefits of tracking the index for filtering is we do not need to filter out data with the dataframe boolean bitwise filtering with .loc[] and &. At the point of filtering, we can just use the list of index values.
If we can create just the container of index values, the code at the point of filtering would look much simpler. We would only need to track the row index labels when subsetting the final dataframe, and the logic for how the index values were generated can be abstracted away. This potentially opens us up for the ability to do any more complex filtering later on since only need to provide a list of index labels.
Create List of Indices
Now, how do we create this list of indices? If we were doing the filtering one column at a time, we would be using the & operator across all the filters (see the code example above). We now need the equivalent of this on a list. We will separately get a list of indices that match the filter, and then filter down each list for values that match. In set theory, this is an intersect.
We will create a new @reactive.calc called filter_idx() that will perform the same if statement checks as we did before. The difference now is at the point where the data is filtered, we only need to know that we’re filtering based on all the index labels that matched the filter.
#| standalone: true
#| components: [editor, viewer]
#| viewerHeight: 650
import pandas as pd
from shiny import App, render, reactive, ui
app_ui = ui.page_fixed(
ui.output_ui("table_day_filter"),
ui.output_ui("table_time_filter"),
ui.output_data_frame("render_df"),
)
def server(input, output, session):
@reactive.calc
def filter_idx():
df = df_tips()
idx = set(df.index)
if input.filter_day():
current_idx = df.loc[df["day"].isin(input.filter_day())].index
idx = idx.intersection(set(current_idx))
if input.filter_time():
current_idx = df.loc[df["time"].isin(input.filter_time())].index
idx = idx.intersection(set(current_idx))
# convert to list because you get this error
# when passing into .loc[]
# TypeError: Passing a set as an indexer is not supported. Use a list instead.
return list(idx)
# filtered dataframe from filters
@reactive.calc
def data_filtered():
df = df_tips().loc[filter_idx()]
return df
# dataframe to view in app
@render.data_frame
def render_df():
return render.DataGrid(data_filtered())
# table day filter
@render.ui
def table_day_filter():
return ui.input_selectize(
"filter_day",
"table day filter:",
df_tips()["day"].unique().tolist(),
multiple=True,
remove_button=True,
options={"plugins": ["clear_button"]},
)
# table time filter
@render.ui
def table_time_filter():
return ui.input_selectize(
"filter_time",
"table time filter:",
df_tips()["time"].unique().tolist(),
multiple=True,
remove_button=True,
options={"plugins": ["clear_button"]},
)
# placeholder for joined dataframe
@reactive.calc
def df_tips():
# fmt: off
data = {
'total_bill': [
16.99, 10.34, 21.01, 23.68, 24.59,
25.29, 8.77, 26.88, 15.04, 14.78
],
'tip': [
1.01, 1.66, 3.50, 3.31, 3.61,
4.71, 2.00, 3.12, 3.52, 3.00
],
'sex': [
'Female', 'Male', 'Male', 'Male', 'Female',
'Male', 'Male', 'Male', 'Male', 'Female'
],
'smoker': [
'No', 'No', 'No', 'No', 'Yes',
'No', 'No', 'Yes', 'No', 'Yes'
],
'day': [
'Sun', 'Fri', 'Sun', 'Thu', 'Sun',
'Sun', 'Sat', 'Sat', 'Sat', 'Sat'
],
'time': [
'Dinner', 'Dinner', 'Lunch', 'Dinner', 'Lunch',
'Dinner', 'Lunch', 'Dinner', 'Lunch', 'Dinner'
],
'size': [2, 3, 3, 2, 4, 4, 2, 4, 2, 2]
}
# fmt: on
df = pd.DataFrame(data)
return df
app = App(app_ui, server)
Iterative Improvement
Now that we’ve modified the current code to roughly the parts we want, let’s make a few code improvements.
First, our new filter_idx() @reactive.calc still relies on a series of if statements. We want to eventually perform filters across all or an arbitrary number of columns and filter components, so we’d want to do the same if logic in a for loop. We will need to track the column name as well as the filter id used by Shiny.
Second, the way filter_idx() was implemented has a lot of data type changes and isn’t obvious what what type the idx variable is because the initial type is changed in the return statement, and used as if its an .index value. This makes the code extremely hard to reason with and maintain. We are expecting an index value, the initial idx variable is stored as a set so we can run .intersection(), but the return type is a list.
The way filter_idx() was implemented wasn’t written this way for pedagogical purposes. I really did write the code in this manner in the initial implementation.
The first issue is fixed by using a for loop and creating a few accompanying variables. The second issue can be fixed by taking another look through the pandas API reference and noticing that we did not actually need to use .intersection() on a set, the index object also has an .intersection() method! This means we can implement filter_idx() using only index objects.
Do not change the data type of your variables during an implementation.
#| standalone: true
#| components: [editor, viewer]
#| viewerHeight: 650
import pandas as pd
from shiny import App, render, reactive, ui
app_ui = ui.page_fixed(
ui.output_ui("table_day_filter"),
ui.output_ui("table_time_filter"),
ui.output_data_frame("render_df"),
)
def server(input, output, session):
@reactive.calc
def filter_idx():
df = df_tips()
idx = df.index
filters = ["filter_day", "filter_time"]
cols = ["day", "time"]
for fltr, col in zip(filters, cols):
if input[fltr]():
current_idx = df.loc[df[col].isin(input[fltr]())].index
idx = idx.intersection(current_idx)
return idx
# filtered dataframe from filters
@reactive.calc
def data_filtered():
df = df_tips().loc[filter_idx()]
return df
# dataframe to view in app
@render.data_frame
def render_df():
return render.DataGrid(data_filtered())
# table day filter
@render.ui
def table_day_filter():
return ui.input_selectize(
"filter_day",
"table day filter:",
df_tips()["day"].unique().tolist(),
multiple=True,
remove_button=True,
options={"plugins": ["clear_button"]},
)
# table time filter
@render.ui
def table_time_filter():
return ui.input_selectize(
"filter_time",
"table time filter:",
df_tips()["time"].unique().tolist(),
multiple=True,
remove_button=True,
options={"plugins": ["clear_button"]},
)
# placeholder for joined dataframe
@reactive.calc
def df_tips():
# fmt: off
data = {
'total_bill': [
16.99, 10.34, 21.01, 23.68, 24.59,
25.29, 8.77, 26.88, 15.04, 14.78
],
'tip': [
1.01, 1.66, 3.50, 3.31, 3.61,
4.71, 2.00, 3.12, 3.52, 3.00
],
'sex': [
'Female', 'Male', 'Male', 'Male', 'Female',
'Male', 'Male', 'Male', 'Male', 'Female'
],
'smoker': [
'No', 'No', 'No', 'No', 'Yes',
'No', 'No', 'Yes', 'No', 'Yes'
],
'day': [
'Sun', 'Fri', 'Sun', 'Thu', 'Sun',
'Sun', 'Sat', 'Sat', 'Sat', 'Sat'
],
'time': [
'Dinner', 'Dinner', 'Lunch', 'Dinner', 'Lunch',
'Dinner', 'Lunch', 'Dinner', 'Lunch', 'Dinner'
],
'size': [2, 3, 3, 2, 4, 4, 2, 4, 2, 2]
}
# fmt: on
df = pd.DataFrame(data)
return df
app = App(app_ui, server)
Since we made the mistake of mixing and converting data types, we can use Python type hints to make it clear to others what is happening with the data types, and also as a cue to ourself if we end up using types that we do not expect.
#| standalone: true
#| components: [editor, viewer]
#| viewerHeight: 650
import pandas as pd
from shiny import App, render, reactive, ui
app_ui = ui.page_fixed(
ui.output_ui("table_day_filter"),
ui.output_ui("table_time_filter"),
ui.output_data_frame("render_df"),
)
def server(input, output, session):
@reactive.calc
def filter_idx() -> pd.Index:
df = df_tips()
idx = df.index
filters = ["filter_day", "filter_time"]
cols = ["day", "time"]
for fltr, col in zip(filters, cols):
if input[fltr]():
current_idx = df.loc[df[col].isin(input[fltr]())].index
idx = idx.intersection(current_idx)
return idx
# filtered dataframe from filters
@reactive.calc
def data_filtered() -> pd.DataFrame:
df = df_tips().loc[filter_idx()]
return df
# dataframe to view in app
@render.data_frame
def render_df():
return render.DataGrid(data_filtered())
# table day filter
@render.ui
def table_day_filter():
return ui.input_selectize(
"filter_day",
"table day filter:",
df_tips()["day"].unique().tolist(),
multiple=True,
remove_button=True,
options={"plugins": ["clear_button"]},
)
# table time filter
@render.ui
def table_time_filter():
return ui.input_selectize(
"filter_time",
"table time filter:",
df_tips()["time"].unique().tolist(),
multiple=True,
remove_button=True,
options={"plugins": ["clear_button"]},
)
# placeholder for joined dataframe
@reactive.calc
def df_tips():
# fmt: off
data = {
'total_bill': [
16.99, 10.34, 21.01, 23.68, 24.59,
25.29, 8.77, 26.88, 15.04, 14.78
],
'tip': [
1.01, 1.66, 3.50, 3.31, 3.61,
4.71, 2.00, 3.12, 3.52, 3.00
],
'sex': [
'Female', 'Male', 'Male', 'Male', 'Female',
'Male', 'Male', 'Male', 'Male', 'Female'
],
'smoker': [
'No', 'No', 'No', 'No', 'Yes',
'No', 'No', 'Yes', 'No', 'Yes'
],
'day': [
'Sun', 'Fri', 'Sun', 'Thu', 'Sun',
'Sun', 'Sat', 'Sat', 'Sat', 'Sat'
],
'time': [
'Dinner', 'Dinner', 'Lunch', 'Dinner', 'Lunch',
'Dinner', 'Lunch', 'Dinner', 'Lunch', 'Dinner'
],
'size': [2, 3, 3, 2, 4, 4, 2, 4, 2, 2]
}
# fmt: on
df = pd.DataFrame(data)
return df
app = App(app_ui, server)