from statsmodels.tsa.stattools import grangercausalitytests

granger = training_sample.loc[training_sample["stock_id"]==1, # X variable = stock nb 1
                              ["R1M_Usd",                     # Y variable = stock nb 1
                               "Mkt_Cap_6M_Usd"]]             # & Market cap
fit_granger = grangercausalitytests(granger,maxlag=[6],verbose=True) # Maximmum lag

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=4.1110  , p=0.0008  , df_denom=149, df_num=6
ssr based chi2 test:   chi2=26.8179 , p=0.0002  , df=6
likelihood ratio test: chi2=24.8162 , p=0.0004  , df=6
parameter F test:         F=4.1110  , p=0.0008  , df_denom=149, df_num=6


import icpy as icpy

B=training_sample[['Mkt_Cap_12M_Usd','Vol1Y_Usd']].values          # Node B1 and B2
C=training_sample['R1M_Usd'].values                                # Node C
ExpInd=np.round(np.random.uniform(size=training_sample.shape[0]))  # "Environment"
icpy.invariant_causal_prediction(X=B,y=C,z=ExpInd,alpha=0.1)       # test if A or B are parents of C

ICP(S_hat=array([0, 1], dtype=int64), q_values=array([1.34146064e-215, 1.34146064e-215]), p_value=1.341460638715696e-215)


import cdt
import networkx as nx
import matplotlib.pyplot as plt

data_caus = training_sample[features_short+["R1M_Usd"]]              
dm = np.array(data_caus)
cm = np.corrcoef(dm.T)                          # Compute correlations
df=pd.DataFrame(cm)

glasso = cdt.independence.graph.Glasso()        # intialize graph lasso
skeleton = glasso.predict(df)                   # apply graph lasso to dataset
model_pc = cdt.causality.graph.PC()             # PC algo. from pcalg R library
graph_pc = model_pc.predict(df, skeleton)       # Estimate model
fig=plt.figure(figsize=[10,6])
nx.draw_networkx(model_pc)                      # Plot model


from causalimpact import CausalImpact

stock1_data = data_ml.loc[data_ml["stock_id"]==1, :]                # Data of first stock
struct_data = stock1_data[["Advt_3M_Usd"]+features_short]           # Combine label and features
struct_data.index = pd.RangeIndex(start=0, stop=228, step=1)        # Setting index as int 
pre_period = [0, 99]                                                # Pre-break period (pre-2008)
post_period = [100, 199]                                            # Post-break period

impact = CausalImpact(struct_data, pre_period, post_period)         # Causal model created
impact.run()                                                        # run!
print(impact.summary())                                             # Summary analysis
impact.plot()                                                       # Plot!

                                Average       Cumulative
Actual                                0               86
Predicted                             0               76
95% CI                           [0, 1]        [45, 107]
                                                        
Absolute Effect                       0               10
95% CI                           [0, 0]        [40, -20]
                                                        
Relative Effect                   13.4%            13.4%
95% CI                  [53.5%, -26.7%]  [53.5%, -26.7%]
                                                        
P-value                            0.0%                 
Prob. of Causal Effect           100.0%                 
None


data_ml["year"] = pd.to_datetime(data_ml['date']).dt.year           # Adding a year column for later groupby
data_ml.groupby("year")["R1M_Usd"].mean().plot.bar(figsize=[16,6])  # Agregatting and plotting

<AxesSubplot:xlabel='year'>


import statsmodels.api as sm

def regress(df):                                                                    # To avoid loop and keep...
    model = sm.OLS(df['R6M_Usd'], exog=sm.add_constant(df[['Mkt_Cap_6M_Usd']]))     # ... the groupby structure we use...
    return model.fit().params[1]                                                    # ... a function with statsmodel

beta_cap = data_ml.groupby('year').apply(regress)                                   # Perform regression
beta_cap=pd.DataFrame(beta_cap,columns=['beta_cap']).reset_index()                  # Format into df
beta_cap.groupby("year").mean().plot.bar(figsize=[16,6])                            # Plot

<AxesSubplot:xlabel='year'>

Chapter 14 Two key concepts: causality and non-stationarity¶

14.1 Causality¶

14.1.1 Granger causality¶

14.1.2 Causal additive models¶

14.1.3 Structural time series models¶

14.2 Dealing with changing environments¶

14.2.1 Non-stationarity: yet another illustration¶

14.2.2 Online learning¶

14.2.3 Homogeneous transfer learning¶

References¶