Data Science & Analytics
Na de financiële crisis van 2008 werden stress testen verplicht voor financiële instellingen,
echter voor niet financiële instellingen geldt deze plicht nog immer niet en is het ook (enkele uitzondering daar gelaten)
nog steeds geen gebruikelijke procedure.
Dit komt o.a. doordat de methodologiën welke voor de financiële instellingen gebruikelijk zijn niet integraal gekopieerd kunnen
worden naar een stress-test voor niet financiële instellingen; tevens dienen methodologiën overeenkomstig te zijn met de
strategische keuzes van de organisatie.
PPBF heeft een gestandaardiseerd 6-stappenplan ontwikkeld welke organisaties kunnen implementeren, waarmee men zich inzicht kan
verstrekken van de blinde vlekken in de organisatie en hiermee een grotere 'corporate intelligence' creëren.
Figuur 1: Geïntegreerd Stress Test Framework voor niet financiële bedrijven/instellingen
Het 6-stappenplan kent de volgende onderdelen:
Stap 1 - Data collectie & preparatie
Het process start met het uitgebreid verzamelen van data en informatie (intern). Middels gestandaardiseerde enquetes en interviews zijn we in staat om snel de benodigde data te genereren, om dit vervolgens in de juiste data-structuur te vormen.
Stap 2 - Bedrijfsmodel
Het ontwikkelen van een conceptueel model van uw huidige bedrijfsmodel (en verdienmodel). Dit conceptuele model is gebaseerd op zgn. 'business drivers', zodat de differentiaties en correlaties tussen de 'drivers' duidelijk worden
Stap 3 - Scenario's
De integratie van risico-scenario's in het conceptuele bedrijfsmodel gecombineerd met wiskundige en statistische analyses (Normaal-verdeling, long-tails, etc.)
Stap 4 - Geïntegreerde Bedrijf / Scenario Analyse
De diverse scenario's worden middels een 12-kwartalen analyse (V&W, Balans, Cash-Flow, Ratio-Analyse) vormgegeven en uitgewerkt.
Stap 5 - Stress Test Model
De effecten van de (extreme) scenarios geanalyseerd bij uitkomst, tesamen met een zgn 'reverse-testing' analyse. De uitkomsten met adviezen worden gerapporteerd.
Stap 6 - Implementatie
De laatste stap betreft een implementatieplan. Dit kan gedaan binnen de huidige rapportage-structuur (toegevoegd aan bijv. kwartaalrapportage) of een specifiek dashboard voor Risico/Scenario analyse kan ontwikkeld en geintegreerd worden (maatwerk).
# Step 1: Start with loading the needed modules
import fix_yahoo_finance as yf
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
# Step 2: Create class for DB
class Database():
def __init__(self, ticker, days):
self.ticker = ticker
data = yf.download(ticker, start="2019-01-01", end="2020-07-25")
self.df = pd.DataFrame(data)
pd.set_option('display.max_columns', None)
self.df = self.df[-days:]
def quote(self):
return self.df
db = Database('ADS.DE', 252) # days is amount of days taken from df
df = db.quote()
print(df.tail())
# Step 4: Adding trend resistance & support lines
pivot_high_1=df['High'][-21:-1].max()
pivot_high_2=df['High'][-55:-22].max()
pivot_low_1=df['Low'][-21:-1].min()
pivot_low_2=df['Low'][-55:-22].min()
A=[df['High'][-21:-1].idxmax(), pivot_high_1]
B=[df['High'][-55:-22].idxmax(), pivot_high_2]
A1=[df['Low'][-21:-1].idxmin(), pivot_low_1]
B1=[df['Low'][-55:-22].idxmin(), pivot_low_2]
x1_high_values = [A[0], B[0]]
y1_high_values = [A[1], B[1]]
x1_low_values = [A1[0], B1[0]]
y1_low_values = [A1[1], B1[1]]
# Step 3: Visualisation with Matplotlib
plt.rcParams.update({'font.size': 10})
fig, ax1 = plt.subplots(figsize=(14,7))
ax1.set_ylabel('Price in €')
ax1.set_xlabel('Date')
ax1.set_title('ADIDAS AG')
ax1.plot('Adj Close',data=df, label='Close Price', linewidth=0.5, color='blue')
ax1.plot(x1_high_values, y1_high_values, color='g', linestyle='--', linewidth=0.5, label='Trend resistance')
ax1.plot(x1_low_values, y1_low_values, color='r', linestyle='--', linewidth=0.5, label='Trend support')
ax1.axhline(y=pivot_high_1, color='g', linewidth=6, label='First resistance line', alpha=0.2)
ax1.axhline(y=pivot_low_1, color='r', linewidth=6, label='First support line', alpha=0.2)
trans = transforms.blended_transform_factory(ax1.get_yticklabels()[0].get_transform(), ax1.transData)
ax1.text(0,pivot_high_1, "{:.2f}".format(pivot_high_1), color="g", transform=trans,ha="right", va="center")
ax1.text(0,pivot_low_1, "{:.2f}".format(pivot_low_1), color="r", transform=trans,ha="right", va="center")
ax1.legend()
ax1.grid()
plt.show()
Een data-wetenschappelijk proces bevat een bepaald aantal stappen, welke afgeleid zijn van de wetenschappelijke processtappen.
Het totale proces van data-wetenschap bevat 8 stappen.
In het 'Whitepaper: a scientific approach to Data Science' leggen we elke stap van het proces uit.
import fix_yahoo_finance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
class Database():
def __init__(self, ticker, days):
self.ticker = ticker
data = yf.download(ticker, start="2019-01-01", end="2020-08-24")
self.df = pd.DataFrame(data)
self.df['Date'] = pd.to_datetime(self.df.index)
pd.set_option('display.max_columns', None)
self.df = self.df[-days:]
def quote(self):
return self.df
def MACD(self, per1, per2, per3):
short = self.df['Adj Close'].ewm(span=per1, adjust=False).mean()
long = self.df['Adj Close'].ewm(span=per2, adjust=False).mean()
MACD = short - long
signal = MACD.ewm(span=per3, adjust=False).mean()
return [MACD, signal]
def MACD_bar(self, df):
MACD_bar = []
for i in range(0, len(df)):
value = df['MACD'][i] - df['signal'][i]
MACD_bar.append(value)
return MACD_bar
def MACD_color(self, df):
MACD_color = []
for i in range(0, len(df)):
if df['MACD_bar'][i] > df['MACD_bar'][i - 1]:
MACD_color.append(1)
else:
MACD_color.append(-1)
return MACD_color
def Data(ticker, days):
db=Database(ticker, days)
df=db.quote()
df['MACD']=db.MACD(12, 26, 9)[0] # (12, 26, 9) standaard instelling
df['signal']=db.MACD(12, 26, 9)[1]
df['MACD_bar'] = db.MACD_bar(df)
df['MACD_color'] = db.MACD_color(df)
df['positive'] = df['MACD_color'] > 0
return df
def buy_sell(df, risk):
Buy=[]
Sell=[]
flag=False
for i in range(0, len(df)):
if df['MACD'][i] > df['signal'][i]:
Sell.append(np.nan)
if flag ==False:
Buy.append(df['Adj Close'][i])
flag=True
else:
Buy.append(np.nan)
elif df['MACD'][i] < df['signal'][i]:
Buy.append(np.nan)
if flag ==True:
Sell.append(df['Adj Close'][i])
flag=False
else:
Sell.append(np.nan)
elif flag == True and df['Adj Close'][i] < Buy[-1] * (1 - risk):
Sell.append(df["Adj Close"][i])
Buy.append(np.nan)
flag = False
elif flag == True and df['Adj Close'][i] < df['Adj Close'][i - 1] * (1 - risk):
Sell.append(df["Adj Close"][i])
Buy.append(np.nan)
flag = False
else:
Buy.append(np.nan)
Sell.append(np.nan)
return (Buy, Sell)
def Plot(df, name):
plt.rcParams.update({'font.size': 10})
fig, ax1 = plt.subplots(figsize=(14,8))
fig.suptitle(name, fontsize=10, backgroundcolor='blue', color='white')
ax1 = plt.subplot2grid((14, 8), (0, 0), rowspan=8, colspan=14)
ax2 = plt.subplot2grid((14, 8), (8, 0), rowspan=6, colspan=14)
ax1.set_ylabel('Price in €')
ax1.plot('Adj Close',data=df, label='Close Price', linewidth=0.5, color='blue')
ax1.scatter(df.index, df['Buy_signal'], color='green', marker='^', alpha=1)
ax1.scatter(df.index, df['Sell_signal'], color='red', marker='v', alpha=1)
ax1.legend()
ax1.grid()
ax1.set_xlabel('Date', fontsize=8)
fig.tight_layout()
ax2.set_ylabel('MACD', fontsize=8)
ax2.plot('MACD', data=df, label='MACD', linewidth=0.5, color='blue')
ax2.plot('signal', data=df, label='signal', linewidth=0.5, color='red')
ax2.bar('Date', 'MACD_bar', data=df, label='Volume', color=df.positive.map({True: 'g', False: 'r'}), width=1,alpha=0.8)
ax2.axhline(0, color='black', linewidth=0.5, alpha=0.5)
ax2.legend()
ax2.grid()
plt.show()
def Run(name, ticker):
days = 252
df = Data(ticker, days)
buy_sell_markers = buy_sell(df, 0.025) # df en riskpercentage
df['Buy_signal'] = buy_sell_markers[0]
df['Sell_signal'] = buy_sell_markers[1]
Plot(df, name)
Run('Deutsche Bank AG', 'DBK.DE')
import fix_yahoo_finance as yf
import pandas as pd
import csv
class Database():
def __init__(self, ticker):
data = yf.download(ticker, start="2019-01-01", end="2020-10-23")
self.df = pd.DataFrame(data)
pd.set_option('display.max_columns', None)
def quote(self):
return self.df
def Data(ticker):
db=Database(ticker)
df=db.quote()
return df
def Performance(name, ticker, days, Save):
df = Data(ticker)
df = df[-days:]
year = (df['Close'][-1] / df['Close'][-days]) - 1
year = "{:.2%}".format(year)
print("Year-%: ", year)
start = float(df['Close'][-days])
end = float(df['Close'][-1])
num_periods = len(df['Close'])
cagr = (end / start) ** (1 / (num_periods - 1)) - 1
cagr = "{:.4%}".format(cagr)
print('Daily-CAGR : ', cagr)
df['Dayperc'] = df['Close'].pct_change()
stdev = "{:.2%}".format(df[-days + 1:]['Dayperc'].values.std())
print('StDev.: ', stdev)
high_up = df.loc[df['Dayperc'].idxmax()]
low_down = df.loc[df['Dayperc'].idxmin()]
high = "{:.2%}".format(high_up[6])
low = "{:.2%}".format(low_down[6])
print('Highest up%: ', high)
print('Lowest down%: ', low)
df1 = Data('DJI')
df1 = df1[-days:]
df1['Dayperc'] = df1['Close'].pct_change()
corr = round(df.corrwith(df1).values[6], 2)
print('Correlation:', corr)
neg_Days = []
pos_Days = []
neut_Days = []
for i in range(0, len(df)):
if df['Dayperc'][i] < 0:
neg_Days.append(1)
elif df['Dayperc'][i] > 0:
pos_Days.append(1)
else:
neut_Days.append(1)
print('Positive days: ', len(pos_Days))
print('Negative days: ', len(neg_Days))
print('Neutral days: ', len(neut_Days))
if Save == 'Yes':
with open('Analysis.csv', mode='a') as Data_file:
Data_writer = csv.writer(Data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
Data_writer.writerow( [name, ticker, year, cagr, stdev, high, low, corr, len(pos_Days), len(neg_Days), len(neut_Days)])
tickDict = {'3M Company': 'MMM',
'American Express Company': 'AXP',
'Apple Inc.': 'AAPL',
'The Boeing Company': 'BA',
'Caterpillar Inc.': 'CAT',
'Chevron Corporation': 'CVX',
'Cisco Systems Inc.': 'CSCO',
'The Coca-Cola Company': 'KO',
'Dow Inc.': 'DOW',
'Exxon Mobil Corporation': 'XOM',
'Goldman Sachs Group Inc.': 'GS',
'The Home Depot Inc.': 'HD',
'International Business Machines Corporation': 'IBM',
'Intel Corporation': 'INTC',
'Johnson & Johnson Inc.': 'JNJ',
'JPMorgan Chase & Co.': 'JPM',
'McDonalds Corporation': 'MCD',
'Merck & Co Inc.': 'MRK',
'Microsoft Corporation': 'MSFT',
'Nike Inc.': 'NKE',
'Pfizer Inc.': 'PFE',
'Procter & Gamble Corporation': 'PG',
'Raytheon Technologies Corporation': 'RTX',
'Travelers Companies Inc.': 'TRV',
'UnitedHealth Group Inc.': 'UNH',
'Verizon Communications Inc.': 'VZ',
'Visa Inc.': 'V',
'Walgreens Boots Alliance Inc.': 'WBA',
'Walmart Inc.': 'WMT',
'The Walt Disney Company': 'DIS',
'Dow Jones Ind. Ave.': 'DJI'}
def writeCSV():
with open('Analysis.csv', mode='w') as Data_file:
Data_writer = csv.writer(Data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
Data_writer.writerow( ['Company', 'Ticker', 'Year%', 'CAGR', 'STDEV', '52W High', '52W Low', 'Correlation', 'Positive days',
'Negative days', 'Neutral days'])
for name in tickDict:
ticker = tickDict[name]
Performance(name, ticker, 253, 'Yes')
#writeCSV()
def readCSV():
df2 = pd.read_csv('Analysis.csv')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print(df2)
readCSV()
import yfinance as yf
from finta import TA
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
class Database():
def __init__(self, ticker):
data = yf.download(ticker, start="2019-01-01", end="2020-12-31")
self.df = pd.DataFrame(data)
self.df['Date'] = pd.to_datetime(self.df.index)
pd.set_option('display.max_columns', None)
def quote(self):
return self.df
db = Database('RDSA.AS') # RDSA.AS - Royal Dutch Shell plc
df = db.quote()
df['SMA']=TA.SMA(df, 50)
print(df)
plt.figure(figsize=(12,7))
plt.plot('Close', data=df, label='Close Price Royal Dutch Shell', color='blue', linewidth=1)
plt.plot('SMA', data=df, label='Simple Moving Average', color='red', linewidth=0.5)
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
from yahoo_fin import stock_info as si
import pandas as pd
import csv
tickDict = {'3M Company': 'MMM',
'American Express Company': 'AXP',
'Apple Inc.': 'AAPL',
'The Boeing Company': 'BA',
'Caterpillar Inc.': 'CAT',
'Chevron Corporation': 'CVX',
'Cisco Systems Inc.': 'CSCO',
'The Coca-Cola Company': 'KO',
'Dow Inc.': 'DOW',
'Exxon Mobil Corporation': 'XOM',
'Goldman Sachs Group Inc.': 'GS',
'The Home Depot Inc.': 'HD',
'International Business Machines Corporation': 'IBM',
'Intel Corporation': 'INTC',
'Johnson & Johnson Inc.': 'JNJ',
'JPMorgan Chase & Co.': 'JPM',
'McDonalds Corporation': 'MCD',
'Merck & Co Inc.': 'MRK',
'Microsoft Corporation': 'MSFT',
'Nike Inc.': 'NKE',
'Pfizer Inc.': 'PFE',
'Procter & Gamble Corporation': 'PG',
'Raytheon Technologies Corporation': 'RTX',
'Travelers Companies Inc.': 'TRV',
'UnitedHealth Group Inc.': 'UNH',
'Verizon Communications Inc.': 'VZ',
'Visa Inc.': 'V',
'Walgreens Boots Alliance Inc.': 'WBA',
'Walmart Inc.': 'WMT',
'The Walt Disney Company': 'DIS',
'Dow Jones Ind. Ave.': 'DJI'}
def Indicator(name, ticker):
indicator = si.get_stats(ticker)
MarCap=indicator.values[0][1]
PE=indicator.values[3][1]
PEG=indicator.values[4][1]
EPS = indicator.values[49][1]
Dividend=indicator.values[28][1]
with open('Indicator.csv', mode='a') as Data_file:
Data_writer = csv.writer(Data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
Data_writer.writerow([name, ticker, MarCap, PE, PEG, EPS, Dividend])
def writeCSV():
with open('Indicator.csv', mode='w') as Data_file:
Data_writer = csv.writer(Data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
Data_writer.writerow(['Company', 'Ticker', 'MarCap', 'PE', 'PEG', 'EPS', 'Dividend'])
for name in tickDict:
ticker=tickDict[name]
Indicator(name, ticker)
#writeCSV()
def readCSV():
df = pd.read_csv('Indicator.csv')
pd.set_option('display.max_columns', None)
print(df)
readCSV()
Watch tutorial on youtube: Indicator Screener