Factor Exposure Modeling in Emerging Markets

Project Summary

This project explores how macroeconomic variables influence equity factor exposures in emerging markets. Using data on FX, rates, and equity indexes, we build a basic factor model using PCA and regression techniques.


Data Acquisition

📥 01 – Data Acquisition

This notebook pulls EM equity and macroeconomic time series data from Bloomberg using the BQL API.


import pandas as pd
import bql
import os

bq = bql.Service()
date_range = bq.func.range('-3Y', '0D')

em_assets = {
    'Brazil_EWZ': 'EWZ US Equity',
    'India_INDA': 'INDA US Equity',
    'China_FXI': 'FXI US Equity',
    'SouthAfrica_EZA': 'EZA US Equity',
    'Mexico_EWW': 'EWW US Equity',
    'Indonesia_EIDO': 'EIDO US Equity'
}

em_data = {}
for label, ticker in em_assets.items():
    data_item = bq.data.px_last(dates=date_range, fill='prev')
    request = bql.Request(ticker, data_item)
    response = bq.execute(request)
    df = response[0].df()
    px_col = [col for col in df.columns if 'PX_LAST' in col.upper()][0]
    df = df[['DATE', px_col]]
    df.columns = ['date', label]
    df.set_index('date', inplace=True)
    em_data[label] = df

em_df = pd.concat(em_data.values(), axis=1)

macro_assets = {
    'USD_Index': 'DXY Curncy',
    'Oil_Brent': 'CO1 Comdty',
    'US_10Y_Yield': 'USGG10YR Index',
    'Fed_Funds': 'FDTR Index',
    'VIX': 'VIX Index',
    'Copper': 'LMCADY Comdty'
}

macro_data = {}
for label, ticker in macro_assets.items():
    data_item = bq.data.px_last(dates=date_range, fill='prev')
    request = bql.Request(ticker, data_item)
    response = bq.execute(request)
    df = response[0].df()
    px_col = [col for col in df.columns if 'PX_LAST' in col.upper()][0]
    df = df[['DATE', px_col]]
    df.columns = ['date', label]
    df.set_index('date', inplace=True)
    macro_data[label] = df

macro_df = pd.concat(macro_data.values(), axis=1)
combined_df = pd.merge(em_df, macro_df, left_index=True, right_index=True)
combined_df = combined_df.sort_index().dropna()

os.makedirs('../data', exist_ok=True)
combined_df.to_csv('../data/combined_em_macro_data.csv')
combined_df.head()



Factor Modeling

📊 02 – Factor Modeling with PCA and Regression


import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('../data/combined_em_macro_data.txt', parse_dates=['date'], index_col='date')
log_returns = np.log(df / df.shift(1)).dropna()

em_columns = [col for col in df.columns if col.startswith(('Brazil', 'India', 'China', 'SouthAfrica', 'Mexico', 'Indonesia'))]
macro_columns = [col for col in df.columns if col not in em_columns]

Y = log_returns[em_columns]
X = log_returns[macro_columns]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X_scaled)

explained_var = pca.explained_variance_ratio_

plt.figure(figsize=(6, 4))
plt.plot(range(1, 4), explained_var, marker='o')
plt.title('PCA Explained Variance')
plt.xlabel('Principal Component')
plt.ylabel('Variance Ratio')
plt.grid(True)
plt.show()

betas = {}
r2_scores = {}
for col in Y.columns:
    model = LinearRegression().fit(X_pca, Y[col])
    betas[col] = model.coef_
    r2_scores[col] = model.score(X_pca, Y[col])

beta_df = pd.DataFrame(betas, index=['PC1', 'PC2', 'PC3']).T

plt.figure(figsize=(8, 5))
sns.heatmap(beta_df, annot=True, cmap='coolwarm', center=0)
plt.title('Sensitivity of EM Equities to Macro Principal Components')
plt.xlabel('Principal Component')
plt.ylabel('EM Equity Index')
plt.tight_layout()
plt.show()

sample_col = 'Brazil_EWZ'
model = LinearRegression().fit(X_pca, Y[sample_col])
Y_pred = model.predict(X_pca)

plt.figure(figsize=(10, 4))
plt.plot(Y.index, Y[sample_col], label='Actual', linewidth=1.5)
plt.plot(Y.index, Y_pred, label='Predicted (PCA Model)', linestyle='--')
plt.title(f"{sample_col} Return: Actual vs Predicted")
plt.legend()
plt.tight_layout()
plt.show()

png

png

png

import pandas as pd
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load and process data
df = pd.read_csv('../data/combined_em_macro_data.txt', parse_dates=['date'], index_col='date')
log_returns = np.log(df / df.shift(1)).dropna()

# Separate EM indices and macro variables
em_columns = [col for col in df.columns if col.startswith(('Brazil', 'India', 'China', 'SouthAfrica', 'Mexico', 'Indonesia'))]
macro_columns = [col for col in df.columns if col not in em_columns]

Y = log_returns[em_columns]
X = log_returns[macro_columns]

# Standardize and apply PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X_scaled)
explained_var = pca.explained_variance_ratio_

# Plot explained variance
plt.figure(figsize=(6, 4))
plt.plot(range(1, 4), explained_var, marker='o')
plt.title('PCA Explained Variance')
plt.xlabel('Principal Component')
plt.ylabel('Variance Ratio')
plt.grid(True)
plt.tight_layout()
plt.show()

# Fit linear model for each EM index
betas = {}
r2_scores = {}
for col in Y.columns:
    model = LinearRegression().fit(X_pca, Y[col])
    betas[col] = model.coef_
    r2_scores[col] = model.score(X_pca, Y[col])

beta_df = pd.DataFrame(betas, index=['PC1', 'PC2', 'PC3']).T

# Heatmap of betas
plt.figure(figsize=(8, 5))
sns.heatmap(beta_df, annot=True, cmap='coolwarm', center=0)
plt.title('Sensitivity of EM Equities to Macro Principal Components')
plt.xlabel('Principal Component')
plt.ylabel('EM Equity Index')
plt.tight_layout()
plt.show()

# Create plots output directory
plot_dir = '../output/plots'
os.makedirs(plot_dir, exist_ok=True)

# Plot and save actual vs. predicted charts for all EM indices
for col in Y.columns:
    model = LinearRegression().fit(X_pca, Y[col])
    Y_pred = model.predict(X_pca)
    r2 = model.score(X_pca, Y[col])

    plt.figure(figsize=(10, 4))
    plt.plot(Y.index, Y[col], label='Actual', linewidth=1.5)
    plt.plot(Y.index, Y_pred, label='Predicted', linestyle='--')
    plt.title(f'{col} — Actual vs Predicted (R² = {r2:.2f})')
    plt.legend()
    plt.tight_layout()
    plt.show()

    # Save each figure
    filename = col.replace(" ", "_").replace("/", "_") + '.png'
    plt.savefig(os.path.join(plot_dir, filename))

local_output_path = '../output/plots'
os.makedirs(local_output_path, exist_ok=True)
# Create a DataFrame for R² scores
r2_df = pd.DataFrame.from_dict(r2_scores, orient='index', columns=['R² Score'])
r2_df.index.name = 'EM Equity Index'
r2_df.sort_values("R² Score", ascending=False, inplace=True)

# Save the plot
plot_filename = os.path.join(local_output_path, "r2_scores_by_em_index.png")
r2_df.plot(kind='bar', legend=False, color='skyblue', edgecolor='black')
plt.ylabel("R² Score")
plt.title("Model Fit (R²) by EM Equity Index")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.grid(axis='y')
plt.savefig(plot_filename)

plot_filename

png

png

png

<Figure size 640x480 with 0 Axes>

png

<Figure size 640x480 with 0 Axes>

png

<Figure size 640x480 with 0 Axes>

png

<Figure size 640x480 with 0 Axes>

png

<Figure size 640x480 with 0 Axes>

png

'../output/plots/r2_scores_by_em_index.png'




<Figure size 640x480 with 0 Axes>

png



Visualization and Analysis

📈 03 – Visualizations, Rolling Regression & Reusable Functions


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('../data/combined_em_macro_data.txt', parse_dates=['date'], index_col='date')
log_returns = np.log(df / df.shift(1)).dropna()

em_cols = [c for c in df.columns if c.startswith(('Brazil', 'India', 'China', 'SouthAfrica', 'Mexico', 'Indonesia'))]
macro_cols = [c for c in df.columns if c not in em_cols]

Y_all = log_returns[em_cols]
X_all = log_returns[macro_cols]

def rolling_r2_scores(X, Y, window=60, n_components=3):
    results = pd.DataFrame(index=Y.index[window:], columns=Y.columns)
    for col in Y.columns:
        for i in range(window, len(Y)):
            X_window = X.iloc[i - window:i]
            Y_window = Y[col].iloc[i - window:i]

            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X_window)
            pca = PCA(n_components=n_components)
            X_pca = pca.fit_transform(X_scaled)

            model = LinearRegression().fit(X_pca, Y_window)
            results.at[Y_window.index[-1], col] = model.score(X_pca, Y_window)

    return results.astype(float)

rolling_r2 = rolling_r2_scores(X_all, Y_all, window=60)

sample_col = 'Brazil_EWZ'
plt.figure(figsize=(10, 4))
plt.plot(rolling_r2.index, rolling_r2[sample_col])
plt.title(f'Rolling R²: {sample_col} vs Macro Factors (60-day PCA model)')
plt.ylabel('R²')
plt.grid(True)
plt.tight_layout()
plt.show()

png

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Simulated example to create a rolling_r2 DataFrame
dates = pd.date_range(start="2022-01-01", periods=100, freq="B")
countries = ['Brazil_EWZ', 'India_NIFTY', 'China_CSI300', 'SouthAfrica_JSE', 'Mexico_MXX', 'Indonesia_JKSE']
rolling_r2 = pd.DataFrame(
    np.random.rand(100, len(countries)),
    index=dates,
    columns=countries
)

# Output directory
output_dir = "../output/plots"
os.makedirs(output_dir, exist_ok=True)

# Generate and save one chart per country
for col in rolling_r2.columns:
    plt.figure(figsize=(10, 4))
    plt.plot(rolling_r2.index, rolling_r2[col])
    plt.title(f'Rolling R²: {col} vs Macro Factors (60-day PCA model)')
    plt.ylabel('R²')
    plt.grid(True)
    plt.tight_layout()

    # Save plot
    filename = f"rolling_r2_{col.replace('/', '_').replace(' ', '_')}.png"
    plt.savefig(os.path.join(output_dir, filename))
    plt.close()

output_dir
'../output/plots'


Summary and Insights

📊 Project Summary Report

Project: Macro Factor Modeling for Emerging Markets
Author: Your Name
Date: YYYY-MM-DD

🔍 Project Objective

Brief summary of the problem you’re solving and why it matters.

🧮 Methods Used

  • PCA on standardized macroeconomic time series
  • Rolling linear regression to model EM index returns
  • Visual R² tracking to evaluate fit quality

📈 Key Visuals

Include your best plots below.

from IPython.display import Image, display
display(Image('../output/plots/r2_scores_by_em_index.png'))

png

🧠 Interpretation & Insights

Write a few bullets interpreting your results:

  • Brazil shows highest macro sensitivity (R² ≈ 0.72)…
  • China returns were poorly explained…

🛠️ Tools

  • Python, pandas, scikit-learn, matplotlib
  • Bloomberg BQuant for original data

✅ Next Steps or Improvements

  • Add Lasso regression for feature selection
  • Expand macro variables to include trade balances or PMIs
  • Test model stability during crisis periods
# Here are some figure ideas to enhance your project:
# 1. Scree plot of PCA explained variance
# 2. Time series plot of principal component scores
# 3. Rolling window R² for each EM index (line plot)
# 4. Heatmap of factor loadings (PCA components vs. macro variables)
# 5. Residuals plot for regression diagnostics
# 6. Correlation matrix heatmap of macro variables

# Example: Scree plot for PCA explained variance (assuming you have a fitted PCA object `pca`)
# import matplotlib.pyplot as plt
# plt.figure(figsize=(6,4))
# plt.plot(range(1, len(pca.explained_variance_ratio_)+1), pca.explained_variance_ratio_, marker='o')
# plt.title('Scree Plot')
# plt.xlabel('Principal Component')
# plt.ylabel('Explained Variance Ratio')
# plt.show()
import matplotlib.pyplot as plt

# Example: Scree plot for PCA explained variance (assuming you have a fitted PCA object `pca`)
plt.figure(figsize=(6, 4))
plt.plot(range(1, len(pca.explained_variance_ratio_) + 1), pca.explained_variance_ratio_, marker='o')
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.show()
import seaborn as sns

# 2. Time series plot of principal component scores (assuming you have `pc_scores` DataFrame)
plt.figure(figsize=(10, 4))
for col in pc_scores.columns:
    plt.plot(pc_scores.index, pc_scores[col], label=col)
plt.title('Principal Component Scores Over Time')
plt.xlabel('Date')
plt.ylabel('Score')
plt.legend()
plt.tight_layout()
plt.show()

# 3. Rolling window R² for each EM index (assuming you have `rolling_r2` DataFrame)
plt.figure(figsize=(10, 4))
for col in rolling_r2.columns:
    plt.plot(rolling_r2.index, rolling_r2[col], label=col)
plt.title('Rolling Window R² for Each EM Index')
plt.xlabel('Date')
plt.ylabel('R²')
plt.legend()
plt.tight_layout()
plt.show()

# 4. Heatmap of factor loadings (assuming you have `pca.components_` and `macro_var_names`)
plt.figure(figsize=(8, 6))
sns.heatmap(pca.components_, annot=True, cmap='coolwarm',
            yticklabels=[f'PC{i+1}' for i in range(pca.components_.shape[0])],
            xticklabels=macro_var_names)
plt.title('PCA Factor Loadings')
plt.xlabel('Macro Variable')
plt.ylabel('Principal Component')
plt.tight_layout()
plt.show()

# 5. Residuals plot for regression diagnostics (assuming you have `residuals` Series)
plt.figure(figsize=(8, 4))
plt.plot(residuals.index, residuals.values)
plt.title('Regression Residuals Over Time')
plt.xlabel('Date')
plt.ylabel('Residual')
plt.tight_layout()
plt.show()

# 6. Correlation matrix heatmap of macro variables (assuming you have `macro_df`)
plt.figure(figsize=(8, 6))
sns.heatmap(macro_df.corr(), annot=True, cmap='vlag')
plt.title('Correlation Matrix of Macro Variables')
plt.tight_layout()
plt.show()
---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

Cell In[2], line 4
      1 import seaborn as sns
      3 # 2. Time series plot of principal component scores (assuming you have `pc_scores` DataFrame)
----> 4 plt.figure(figsize=(10, 4))
      5 for col in pc_scores.columns:
      6     plt.plot(pc_scores.index, pc_scores[col], label=col)


NameError: name 'plt' is not defined



📈 Full Notebooks


Conclusion

This analysis demonstrates how simple data science techniques can be used to decompose macro risk exposures. Future work might incorporate more dynamic modeling or high-frequency indicators.