:- use_module(library(lists)).
:- arithmetic_function(log/2).

prob(unigram,Words,P) :-
    prob(unigram,Words,1,P).

prob(bigram,Words,P) :-
    prob(bigram,[e_o_s|Words],1,P).

prob(trigram,[Word1|Words],P) :-
    pw_w(Word1,e_o_s,P1),
    prob(trigram,[e_o_s,Word1|Words],P1,P).

prob(unigram,[],P,P).
prob(unigram,[W|Ws],I,J) :-
	pw(W,P),
	K is I*P,
	prob(unigram,Ws,K,J).

prob(bigram,[_],P,P) :- !.
prob(bigram,[W1,W2|Ws],I,J) :-
	pw_w(W2,W1,P),
	K is I*P,
	prob(bigram,[W2|Ws],K,J).

prob(trigram,[_,_],P,P).
prob(trigram,[W1,W2,W3|Ws],I,J) :-
	pw_ww(W3,W1,W2,P),
	K is I*P,
	prob(trigram,[W2,W3|Ws],K,J).

pw(W,P) :- 
	fw(W,F),
	tokens(N),
	P is F/N.

pw_w(W1,W2,P) :- 
	fww(W2,W1,F1), 
	fw(W2,F2), 
	P is F1/F2.

pw_ww(W1,W2,W3,P) :- 
	fwww(W2,W3,W1,F1), 
	fww(W2,W3,F2), 
	P is F1/F2.

test :-
    test(1).

test(I) :-
    test(I, unigram).

test(I, Model) :-
    test_data(I, Words),
    print_probability(Model, Words).

test_sentence(Words) :-
    test_sentence(unigram, Words).

test_sentence(Model, Words) :-
    append(Words,[e_o_s],Ws),
    print_probability(Model, Ws).

print_probability(Model, Words) :-
    member(Model, [unigram,bigram,trigram]),
    ( prob(Model, Words, Probability) -> true ; Probability is 0.0),
    print('Sentence:'),
    print_words(Words),
    nl,
    print('Probability: '),
    print(Probability),
    nl.

print_words([e_o_s]) :- !.
print_words([W|Ws]) :-
    print(' '),
    print(W),
    print_words(Ws).

test_corpus(Model) :-
    test_per_sentence(Sentences),
    corpus_prob(Model,Sentences,P),
	length(Sentences,N),
	LPS is abs(log(2,P))/N,
    test_per_word(Words),
    length(Words,M),
    LPW is abs(log(2,P))/M,
    print('Probability: '),
    print(P),
    nl,
    print('Entropy per word: '),
    print(LPW),
    nl,
    print('Entropy per sentence: '),
    print(LPS),
    nl.
    

corpus_prob(Model,Sentences,P) :-
    corpus_prob(Model,Sentences,1,P).

corpus_prob(_,[],P,P).
corpus_prob(Model,[S|Ss],I,P) :-
	prob(Model,S,J),
	K is I*J,
	corpus_prob(Model,Ss,K,P).

log(Bas, X, Result) :-
    Result is log(X)/log(Bas).

