# Code Block 1: Importing our libraries

import numpy as np
import matplotlib.pyplot as plt

# A quick print to confirm everything is loaded and ready. Good practice!
print("Toolkit ready. NumPy and Matplotlib are loaded and ready.")

Toolkit ready. NumPy and Matplotlib are loaded and ready.

# Code Block 2: Creating a Synthetic and Realistic Dataset

# Setting a 'random seed'. This is crucial in machine learning software
# to ensure that 'random' numbers are the same each time the code runs,
# making results reproducible.
np.random.seed(42)

# Simulate data for 50 days.
m = 50  # 'm' is the standard variable name for the number of training examples.

# X = Number of Patients. Generate 50 random values between 50 and 150.
# The shape (m, 1) creates a column vector.
X = 50 + 100 * np.random.rand(m, 1)

# y = Number of Masks Used.
# Define a relationship: y = 25 (base) + 0.8 * X + some random noise.
# This simulates a real-world scenario where the relationship isn't perfectly linear.
y = 25 + 0.8 * X + np.random.randn(m, 1) * 10

# First step with any dataset is to visualize it.
# Plot to confirm it looks as expected.
plt.figure(figsize=(12, 7))
plt.scatter(X, y, label='Simulated Daily Data')
plt.title("Minermont Hospital - Synthetic Data")
plt.xlabel("Number of Patients (Feature X)")
plt.ylabel("Masks Used (Target y)")
plt.grid(True)
plt.legend()
plt.show()

# Code Block 3: Building and Training the Model (with Early Stopping)

# --- 1. Parameter Initialization ---
# Provide the model a starting point. Initialize w and b to 0.
# A simple, neutral starting position.
w = 0.0
b = 0.0

# --- 2. Hyperparameter Setup ---
# These are adjustments for the learning algorithm, not learned from the data.
# We need to set them manually.
learning_rate = 0.0001     # How large each update step is. Smaller values are safer.
num_iterations = 10000      # Maximum number of iterations.
alpha = 0.001              # Minimum change in cost to continue (early stopping threshold).

# --- 3. Training Loop ---
# We'll store the cost at each step to observe the model's learning progress.
cost_log = []

print("Starting training sequence...")
for i in range(num_iterations):
    # a) Make predictions for ALL data points with current w and b.
    y_pred = w * X + b

    # b) Calculate the cost (MSE) for these predictions.
    # This tells us how well we're doing in this iteration.
    cost = np.mean((y_pred - y)**2)
    cost_log.append(cost)

    # --- Early Stopping Condition ---
    # If the cost hasn't changed much compared to the previous step,
    # we've reached a flat region and aren't learning much more.
    if i > 0 and abs(cost_log[-2] - cost_log[-1]) < alpha:
        print(f"Early stopping at iteration {i} due to minimal change in cost.")
        break

    # c) Compute gradients. Core of the learning process.
    # These formulas come from the derivative of the cost function.
    # They indicate the slope of our cost "hill" for w and b.
    dw = (1/m) * np.sum((y_pred - y) * X)  # Gradient for weight
    db = (1/m) * np.sum(y_pred - y)        # Gradient for bias

    # d) Update parameters.
    # Move w and b in the *opposite* direction of the gradient.
    # This is the "downhill" step.
    w = w - learning_rate * dw
    b = b - learning_rate * db

    # Simple logging to monitor progress in real time.
    # Print error evolution every 100 iterations
    # Start printing the first iteration since error at 0 is not meaningful with random w and b
    if ((i-1) % 100) == 0:
        print(f"Iteration {i}: Cost = {cost:.6f}")

# Print number of iterations and final error
print("\n--- Training Complete ---")
print(f"Number of iterations: {i}")
print(f"Final error: {cost:.6f}")
print(f"Final learned weight (w): {w:.4f}")
print(f"Final learned bias (b): {b:.4f}")

Starting training sequence...
Iteration 1: Cost = 149.453144
Iteration 101: Cost = 143.590561
Iteration 201: Cost = 143.488009
Iteration 301: Cost = 143.385628
Iteration 401: Cost = 143.283419
Iteration 501: Cost = 143.181381
Iteration 601: Cost = 143.079514
Iteration 701: Cost = 142.977817
Iteration 801: Cost = 142.876291
Iteration 901: Cost = 142.774934
Iteration 1001: Cost = 142.673747
Iteration 1101: Cost = 142.572729
Iteration 1201: Cost = 142.471881
Iteration 1301: Cost = 142.371201
Iteration 1401: Cost = 142.270689
Iteration 1501: Cost = 142.170346
Iteration 1601: Cost = 142.070171
Early stopping at iteration 1657 due to minimal change in cost.

--- Training Complete ---
Number of iterations: 1657
Final error: 142.014146
Final learned weight (w): 1.0363
Final learned bias (b): 0.3832

# Code Block 4: Visualizing Model Performance

# Create a figure with two side-by-side plots for a nice report.
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 7))

# Remove the first cost log value since w and b were 0 initially
# and this cost is not representative
cost_log.pop(0)

# --- Plot 1: Learning Curve ---
ax1.plot(cost_log)
ax1.set_ylim(130, 160)  # Set y-axis based on previous error values
ax1.set_title("Model Learning Curve")
ax1.set_xlabel("Iteration")
ax1.set_ylabel("Cost (Mean Squared Error)")
ax1.grid(True)

# --- Plot 2: Final Regression Line ---
# First, add the original data points to the plot
ax2.scatter(X, y, label='Original Data')
# Then, add the line representing the learned model
# Generate y values for the line using our final w and b
ax2.plot(X, w * X + b, color='red', linewidth=3, label='Learned Model')
ax2.set_title("Final Model Fit")
ax2.set_xlabel("Number of Patients")
ax2.set_ylabel("Masks Used")
ax2.grid(True)
ax2.legend()

plt.show()

# Code Block 5: Making a Prediction

# Suppose we want to know how many masks to budget if we expect 100 patients tomorrow.
patients_tomorrow = 100

# Use our trained final w and b to compute the prediction.
predicted_masks = w * patients_tomorrow + b

print("\n--- Making a New Prediction ---")
print(f"For a day with {patients_tomorrow} patients...")
print(f"Our model predicts we will need approximately {int(predicted_masks)} masks.")

# Let's try another for a quieter day.
patients_quiet_day = 65
predicted_masks_quiet = w * patients_quiet_day + b
print(f"\nFor a day with {patients_quiet_day} patients...")
print(f"Our model predicts we will need approximately {int(predicted_masks_quiet)} masks.")

--- Making a New Prediction ---
For a day with 100 patients...
Our model predicts we will need approximately 104 masks.

For a day with 65 patients...
Our model predicts we will need approximately 67 masks.

2.6 - Luis' Notes - Our First Predictive Model¶

Step 1: Importing Necessary Libraries¶

Step 2: Data Preparation and Preprocessing¶

Step 3: Defining the Model Components¶

Step 4: Implementing the Training Loop¶

Step 5: Visualization and Analysis of Results¶

Step 6: Applying the Model to Make Predictions¶

Conclusion and Next Steps¶