## Copyright (C) 2015 Daniel Kottke, Georg Krempl ## ## Author: Daniel Kottke ## ## If using this method, please cite the paper: ## Optimised Probabilistic Active Learning ## by Georg Krempl, Daniel Kottke, Vincent Lemaire ## ## usage: [val] = OPALgain(n,pObs,tau,m) ## ## Caculates the opal gain for given parameters: ## n - number of labelled instances ## pObs - share of positives among labelled instances ## tau - cost-ratio = FP/(FP+FN) ## m - labelling budget size ## ## Example: ## from OPALgain import * ## import pylab as plt ## pObsValues = np.linspace(0,1,101); ## val = np.zeros(pObsValues.shape); ## for i,pObs in enumerate(pObsValues): ## val[i] = OPALgain(1,pObs,.4,2); ## end ## plt.plot(pObsValues, val) ## plt.show() ## import numpy as np from math import gamma from scipy.misc import comb def OPALgain(n, pObs, tau, m): k = n*pObs val = iml(n, k, tau, 0, 0) for y in range(m+1): val = val - iml(n,k,tau,m,y) val = comb(n,k) * (n+1)/m * val return val def iml(n, k, tau, m, y): if (n+m)==0: conditions = tau else: conditions = 1.*(k + y)/(n + m) if abs(conditions - tau) < 1e-15 : return comb(m,y) * (tau-tau**2) * gamma(1-y+m+n-k) * gamma(1+y+k) / gamma(2+m+n); elif conditions < tau: return comb(m,y) * (1-tau) * gamma(1-y+m+n-k) * gamma(2+y+k) / gamma(3+m+n); elif conditions > tau: return comb(m,y) * (tau) * gamma(2-y+m+n-k) * gamma(1+y+k) / gamma(3+m+n);