## Copyright (C) 2015 Daniel Kottke, Georg Krempl
##
## Author: Daniel Kottke <daniel.kottke@ovgu.de>
## 
## If using this method, please cite the paper:
##   Optimised Probabilistic Active Learning
##   by Georg Krempl, Daniel Kottke, Vincent Lemaire
##


## usage: [val] = OPALgain(n,pObs,tau,m)
##
## Caculates the opal gain for given parameters:
##   n    - number of labelled instances
##   pObs - share of positives among labelled instances
##   tau  - cost-ratio = FP/(FP+FN)
##   m    - labelling budget size
## 
## Example:
##  from OPALgain import *
##  import pylab as plt
##  pObsValues = np.linspace(0,1,101);
##  val = np.zeros(pObsValues.shape);
##  for i,pObs in enumerate(pObsValues):
##    val[i] = OPALgain(1,pObs,.4,2);
##  end
##  plt.plot(pObsValues, val)
##  plt.show()
##  

import numpy as np
from math import gamma
from scipy.misc import comb

def OPALgain(n, pObs, tau, m):
    k = n*pObs
    val = iml(n, k, tau, 0, 0)
    for y in range(m+1):
      val = val - iml(n,k,tau,m,y)
    val = comb(n,k) * (n+1)/m * val
    return val

def iml(n, k, tau, m, y):
  if (n+m)==0:
    conditions = tau
  else: 
    conditions = 1.*(k + y)/(n + m)
  
  if abs(conditions - tau) < 1e-15 :
    return comb(m,y) * (tau-tau**2) * gamma(1-y+m+n-k) * gamma(1+y+k) / gamma(2+m+n);
  elif conditions < tau:
    return comb(m,y) * (1-tau)      * gamma(1-y+m+n-k) * gamma(2+y+k) / gamma(3+m+n);
  elif conditions > tau:
    return comb(m,y) * (tau)        * gamma(2-y+m+n-k) * gamma(1+y+k) / gamma(3+m+n);