#!/usr/bin/python

## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
##      http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.

"""Replicates Rao et al's "proof" that the Indus symbols
represent a writing system. With a pure Zipfian model with alpha=1.5,
and complete conditional independence, you observe a very similar
growth of conditional entropy to what they observe for the Indus
corpus.

Outputs code that can be fed into R to produce the plot.
"""

__author__ = """
rws@xoba.com (Richard Sproat)
"""


from math import log


def MakePerfectZipf(n, alpha=1.0):
  alpha = float(alpha)
  unigrams = []
  total = 0
  for i in range(1, n + 1):
    p = 1.0/(i ** alpha)
    unigrams.append(p)
    total += p
  for i in range(0, n):
    unigrams[i] = unigrams[i]/total
  return unigrams


def MakeIndependentJointMatrix(unigrams):
  joints = {}
  for i in range(len(unigrams)):
    for j in range(len(unigrams)):
      joints[i, j] = unigrams[i] * unigrams[j]
  return joints


def ComputeEntropy(joints, unigrams, n):
  ent = 0
  for x in range(n):
    for y in range(n):
      ent -= joints[x, y] * log(joints[x, y] / unigrams[x])
  return ent


def SimpleIndependent(n, alpha):
  unigrams = MakePerfectZipf(n, alpha)
  joints = MakeIndependentJointMatrix(unigrams)
  ents = []
  for i in range(20, n, 20):
    ents.append(ComputeEntropy(joints, unigrams, i))
  return ents


def MakeRPlot(ents, alpha):
  steps = []
  for i in range(len(ents)):
    steps.append(i*20 + 20)
  print 'postscript("plot.ps")'
  print 'steps = c(%s)' % ','.join(map(lambda x: str(x), steps))
  print 'ents = c(%s)' % ','.join(map(lambda x: str(x), ents))
  print 'plot(steps, ents, xlab="Number of Tokens",\
 ylim=c(0,4),main="Perfect Zipf model, alpha=%4.2f, Complete Independence",\
 ylab="Conditional Entropy", type="b", pch=5)' % alpha
  print 'q()'


if __name__ == '__main__':
  alpha = 1.4
  ents = SimpleIndependent(400, alpha)
  MakeRPlot(ents, alpha)


