#OK to post homework
#Yukun Yao, Apr. 6, Assignment 19


###Problem 1 By modifying procedure MakeDS(P,DS), write a procedure
#MakeDSGr(P,DS)

#that does the same as MakeDS(P,DS) but uses the "Information Gain ratio" GR(d,D) (Eq. (4.8) of the book, from hw18.txt) instead #of Information Gain to decide on the descriptive feature to pick as the root.

#Compare the outputs of MakeDS([[2,2,2],2],Dodam(20,3)) and MakeDSGr([[2,2,2],2],Dodam(20,3)).


#MakeDSGr1(P,DS,AF,DSprev): implements the ID3 algorithm
 #inputs P=[L,N], and a data set 
 #DS=[ ..., [Name, v,t], ...]
 MakeDS1:=proc(P,DS,AF,DSprev) local i, T,TarS,Cont,champ,rec,hope,RL,SDS:
 
   if DS={} then
   RETURN([Majority(P,DSprev),[]]):
  fi:

  TarS:={seq(DS[i][3],i=1..nops(DS))}:
   if nops(TarS)=1 then
     RETURN([TarS[1],[]]):
   fi:

  if AF={} then
    RETURN([Majority(P,DS),[]]):
   fi:

champ:=AF[1]:

rec:=GR(P,DS,champ):

 for i from 2 to nops(AF) do
   hope:=GR(P,DS,AF[i]):
    if hope>rec then
       champ:=AF[i]:
       rec:=hope:
     
    fi:
 od:


 SDS:=Breakup(P,DS,champ):
[champ,[seq(MakeDS1(P,SDS[i],AF minus {champ},DS),i=1..nops(SDS))]]:
      
end:


#MakeDSGr(P,DS): implements the ID3 algorithm (added after class)
 MakeDSGr:=proc(P,DS) local i :
 MakeDSGr1(P,DS,{seq(i,i=1..nops(P[1]))},DS):
end:


###Problem 2 By modifying procedure MakeDS(P,DS), write procedure
#MakeDSGini(P,DS)
#that does the same as MakeDS(P,DS) but uses the Gini inded (Eq. (4.9) of the book, from hw18.txt) instead of Information Gain to decide on the descriptive feature to pick as the root.
#Compare the outputs of MakeDS([[2,2,2],2],Dodam(20,3)) and MakeDSGini([[2,2,2],2],Dodam(20,3)).

#MakeDSGr1(P,DS,AF,DSprev): implements the ID3 algorithm
 #inputs P=[L,N], and a data set 
 #DS=[ ..., [Name, v,t], ...]
 MakeDSGini1:=proc(P,DS,AF,DSprev) local i, T,TarS,Cont,champ,rec,hope,RL,SDS:
 
   if DS={} then
   RETURN([Majority(P,DSprev),[]]):
  fi:

  TarS:={seq(DS[i][3],i=1..nops(DS))}:
   if nops(TarS)=1 then
     RETURN([TarS[1],[]]):
   fi:

  if AF={} then
    RETURN([Majority(P,DS),[]]):
   fi:

champ:=AF[1]:

rec:=Gini(P,DS,champ):

 for i from 2 to nops(AF) do
   hope:=Gini(P,DS,AF[i]):
    if hope>rec then
       champ:=AF[i]:
       rec:=hope:
     
    fi:
 od:


 SDS:=Breakup(P,DS,champ):
[champ,[seq(MakeDS1(P,SDS[i],AF minus {champ},DS),i=1..nops(SDS))]]:
      
end:


#MakeDSGini(P,DS): implements the ID3 algorithm (added after class)
 MakeDSGini:=proc(P,DS) local i :
 MakeDSGini1(P,DS,{seq(i,i=1..nops(P[1]))},DS):
end:


###Problem 3 Write a procedure MakeDSshallow(P,DS,d), that always outputs a decision tree of depth <=d. Once you get AF to be of size nops(L[1])-d it creates a lead with the majority of the remaining data set.


 MakeDSshallow1:=proc(P,DS,AF,DSprev,d) local i, T,TarS,Cont,champ,rec,hope,RL,SDS:
 
   if DS={} then
   RETURN([Majority(P,DSprev),[]]):
  fi:

  TarS:={seq(DS[i][3],i=1..nops(DS))}:
   if nops(TarS)=1 then
     RETURN([TarS[1],[]]):
   fi:

  if nops(AF)<=nops(P[1][1])-d then
    RETURN([Majority(P,DS),[]]):
   fi:

champ:=AF[1]:

rec:=IG(P,DS,champ):

 for i from 2 to nops(AF) do
   hope:=IG(P,DS,AF[i]):
    if hope>rec then
       champ:=AF[i]:
       rec:=hope:
     
    fi:
 od:


 SDS:=Breakup(P,DS,champ):
[champ,[seq(MakeDS1(P,SDS[i],AF minus {champ},DS),i=1..nops(SDS))]]:
      
end:


 MakeDSshallow:=proc(P,DS) local i :
 MakeDSshallow1(P,DS,{seq(i,i=1..nops(P[1]))},DS,d):
end:


###Following is from C19.txt

#C19.txt, 4/4/19, Finishing Decision Trees
Help:=proc(): print(` TBU(P,DS) , Majority(P,DS), MakeDS1(P,DS,AF,DSprev), MakeDS(P,DS)  `): end:

#old stuff

#C18.txt, April Fool's Day, 2019
#Decison Trees
Help18:=proc(): print(` H(P,DS) , Dodam(N,k), IG(P,DS,f) `): 
print(`MakeDS(P,DS) `):
end:

#start C16.txt
#C16.txt, Dr. Z. ExpMath, March 25, 2019
Help16:=proc(): print(` GuessWhoS(x,N), ManyGWS(N,K,t), GuessWhoC(x,N) , SEnt(P) `): 
print(`Profile(n,k), ABTP(N,k) `):
end:

#Simulating a stupid guess who game with a nunmber from 0 to N
GuessWhoS:=proc(x,N) local co,i:
co:=0:
for i from 0 to N do
#print(`Is the number equal to`, i):
 co:=co+1:
 if x=i then
   RETURN(co):
 fi:
od:
FAIL:
end:

#ManyGHS(N,K,t): the prob. generating function using variable t
#fof K Guess who games, followed by the average and s.d.
ManyGWS:=proc(N,K,t) local ra,i,f,mu,sig,f1:

ra:=rand(0..N):

f:=add(t^GuessWhoS(ra(),N),i=1..K)/K:
mu:=subs(t=1,diff(f,t)):
f1:=f/t^mu:
sig:= sqrt(subs(t=1,diff(t*diff(f1,t),t))):
[evalf([mu,sig]),sort(evalf(f))]:
end:


#Simulating a clever guess who game with a nunmber from 0 to N
GuessWhoC:=proc(x,N) local co,L,U,U0:
L:=0:
U:=trunc((N-1)/2):

co:=0:

while L<U do
#print(`Is the nunber between`, L,` and `, U , `including both`):
 co:=co+1:
if x>=L and x<=U then
 
   U:=trunc((U+L)/2):
  
else
   U0:=U:
   U:=trunc(U+(U-L)/2):
   L:=U0+1:
fi:
od:
co:
end:

#SEent(P): The (Shannon) amount of information in bits of the discere prob. distribution P
SEnt:=proc(P) local i,P1:

P1:=remove(x->x=0,P):
P1:=P1/convert(P1,`+`):

if not (type(P1,list) and min(op(P1))>0 and convert(P1,`+`)=1) then
 print(`bad input`):
 RETURN(FAIL):
fi:
-add(P1[i]*log[2](P1[i]),i=1..nops(P1)):
end:


#[[input,output]]. #F(input)=output,  Minimize Sum((F(input)-output)^2, data)
##input are n-dim vectors and fit it with a polynomial in (x1, ..., xn)
#of degree d
 
#[[Name,[DescriptiveFeature1, ..., DescriptiveFeature],targetFeature] , ...]

#Profile(n,k): inputs a positive integer n and k outputs
#a list of 0,1 of length k whose i-th item is "Is n divisible by prime[i]" for i from 1 to k
#For example
#Profile(10,3) should output [1,0,1]
Profile:=proc(n,k) local i:
subs({true=1, false=0},[ seq(evalb(n mod ithprime(i)=0),i=1..k)]):
end:

 
#Inputs a positive integer N and pos. integer k outputs an ABT
#for the data generated from them using the k descriptive features
#is it divisible by the k-th prime 
ABTP:=proc(N,k) local n:
[ seq([n,Profile(n,k), isprime(n)],n=1..N)]:
end:

#end C16.txt

#From C17.txt
#C17.txt, Decision trees, March 28, 2019
Help17:=proc(): print(`RandDT(P,d), RandDT1(P,d,AF) , Predict(P,T,v)`): 
print(`Breakup(P,S,f)`):
end:

#If there are k desriptive features, we name them 1, ..., k
#and descriptive feature i has a[i] possible values (named 1, ...i)
#and the target feature has N possible values
#[[a[1], ..., a[k]],N]:
#For example if "divisible by 2", "by 3" and by 5, and the target "is prime"
#[[2,2,2],2]

#RandDT1(P,d,AF): P=[L,N] inputs the type of the data and an integer d, outputs
#a random decision tree of depth <=d, with vertex-labels fromAF
RandDT1:=proc(P,d,AF) local L,N,T1,T,RootLabel,AF1,k,i:
L:=P[1]:
N:=P[2]:
if d=0 then
 RETURN([rand(1..N)(),[]]):
fi:
 
RootLabel:=AF[rand(1..nops(AF))()]:
AF1:=AF minus {RootLabel}:
#k:=number of subtrees of descriptive feature RootLabel
k:=L[RootLabel]:

T:=[]:
for i from 1 to k do
 T1:=RandDT1(P,d-1,AF1):
  T:=[op(T),T1]:
od:
[RootLabel,T]:
end:

#RandDT(P,d): P=[L,N] inputs the type of the data and an integer d, outputs
#a random decision tree of depth <=d
RandDT:=proc(P,d) local i:
if d>nops(P[1]) then
 print(`The depth can be at most`, nops(P[1])):
 RETURN(FAIL):
fi:

RandDT1(P,d,{seq(i,i=1..nops(P[1]))}):
end:

#Predict(P,T,v): Inputs a  profile P 
#and a decision tree T with that profile and a data vector v
#outputs the prediction made by the tree regarding the target value of v
Predict:=proc(P,T,v) local N,L,i,RootLabel,T1:
L:=P[1]:
N:=P[2]:
#1<=v[i]<=L[i]
#nops(v)=nops(L)
 if nops(v)<>nops(L) then
   RETURN(FAIL):
 fi:

  for i from 1 to nops(v) do
    if not (1<=v[i] and v[i]<=L[i]) then
     RETURN(FAIL):
    fi:
  od:

if T[2]=[] then
 if not (T[1]>=1 and T[1]<=N) then
   RETURN(FAIL):
  else
 RETURN(T[1]):
 fi:
fi:

RootLabel:=T[1]:
T1:=T[2][v[RootLabel]]:

Predict(P,T1,v):

end:
#[v,Target]
#Breakup(P,S,f): inputs  a profile of a data-set and a data-set S of vectors of length
#nops(P[1]) and a desription feature f( between 1 and nops(P[1]) outputs
#the list of subsets according to the value of v[f] (there are P[1][f] of them)
#Modified after class
Breakup:=proc(P,S,f) local L,N,i, SS,v:
 L:=P[1]:
  N:=P[2]:
 if not (1<=f and f<=nops(P[1])) then
   RETURN(FAIL):
 fi:

for i from 1 to L[f] do
  SS[i]:=[]:
od:
 
for i from 1 to nops(S) do
v:=S[i][2];
 SS[v[f]]:=[op(SS[v[f]]), S[i]]:
od:

[seq(SS[i],i=1..L[f])]:


end:

#end from C17.txt

Dodam:=proc(N,k) local G: 
G:=subs({false=1,true=2},ABTP(N,k)):
[seq([G[i][1], G[i][2]+[1$k], G[i][3]],i=1..nops(G))]:
end:


#H(P,DS): the entropy according to the target feature of the data set DS
#[Name,v,t] (1<=t<=N), N=P[2];
H:=proc(P,DS) local co,i,T,t,N,s:
N:=P[2]:
co:=[0$N]:

for i from 1 to nops(DS) do
 t:=DS[i][3]:
  co:=co+[0$(t-1),1,0$(N-t)]:
od:

s:=convert(co,`+`):
co:=co/s:

evalf(SEnt(co)):
end: 


#IG(P,DS,f): the information gain in picking f as the root of the decision tree
#Try: IG([[2,2,2],2],Dodam(20,3)),1);
IG:=proc(P,DS,f) local k,DSlist,N:
N:=P[2]:
k:=nops(P[1]):
if not (1<=f and f<=k) then
   RETURN(FAIL):
fi:

DSlist:=Breakup(P,DS,f):

H(P,DS)- add( nops(DSlist[i])/nops(DS)*H(P,DSlist[i]),i=1..P[1][f]):
end:

 
##end old stuff

#TBU(P,DS): inputs P and DS P=[L,N], and outputs a list of length 
#N such that its i-th entry is the number of data points with feature i
TBU:=proc(P,DS) local i,co,N:
N:=P[2]:

for i from 1 to N do
 co[i]:=0:
od:

for i from 1 to nops(DS) do
 co[DS[i][3]]:=co[DS[i][3]]+1:
od:

[seq(co[i],i=1..N)]:

end:

#Majority(P,DS): Inputs P: a profile of our data [L,N] where
#L is a list of length k (where k is the number of descriptive fearues)
#and L[k=i] is the  number of different values {1,2, ... L[i]} that
#descriptive feature i can assume
#data set DS=[ [Name, Descriptive Vector, TargetValue] 
Majority:=proc(P,DS) :
max[index](TBU(P,DS)):
end:

#MakeDS1(P,DS,AF,DSprev): implements the ID3 algorithm
 #inputs P=[L,N], and a data set 
 #DS=[ ..., [Name, v,t], ...]
 MakeDS1:=proc(P,DS,AF,DSprev) local i, T,TarS,Cont,champ,rec,hope,RL,SDS:
 
   if DS={} then
   RETURN([Majority(P,DSprev),[]]):
  fi:

  TarS:={seq(DS[i][3],i=1..nops(DS))}:
   if nops(TarS)=1 then
     RETURN([TarS[1],[]]):
   fi:

  if AF={} then
    RETURN([Majority(P,DS),[]]):
   fi:

champ:=AF[1]:

rec:=IG(P,DS,champ):

 for i from 2 to nops(AF) do
   hope:=IG(P,DS,AF[i]):
    if hope>rec then
       champ:=AF[i]:
       rec:=hope:
     
    fi:
 od:


 SDS:=Breakup(P,DS,champ):
[champ,[seq(MakeDS1(P,SDS[i],AF minus {champ},DS),i=1..nops(SDS))]]:
      
end:


#MakeDS(P,DS): implements the ID3 algorithm (added after class)
 MakeDS:=proc(P,DS) local i :
 MakeDS1(P,DS,{seq(i,i=1..nops(P[1]))},DS):
end: