# -*- Mode: Python -*- import random import math # based on code written in fortran-90 from: # http://users.bigpond.net.au/amiller/random.html # # which in turn references: # #! Algorithm from page 551 of: #! Devroye, Luc (1986) `Non-uniform random variate generation', #! Springer-Verlag: Berlin. ISBN 3-540-96305-7 (also 0-387-96305-7) class zipf_generator: def __init__ (self, a): if a <= 1.0: raise ValueError, " must be > 1.0" else: self.a = a self.b = 2.0 ** (a - 1.0) self.const = -1.0 / (a - 1.0) def next (self): while 1: u = random.random() v = random.random() r = math.floor (u ** self.const) t = (1.0 + (1.0/r)) ** (self.a - 1.0) if (v * r * (t - 1.0) / (self.b - 1.0) <= t/self.b): return r # to see this in gnuplot: # 1) generate: "python zipf.py 2.0 > zipf.txt" # 2) sort: "sort -r -n > zipf.sorted.txt" # 3) plot: # gnuplot> set logscale x # gnuplot> set logscale y # gnuplot> plot "zipf.sorted.txt" # # you should see something like this: # # 1000 ++-----+---+--+-+-+-++++------+---+--+--++-++-++------+---+--+-+-++-+++ # + + "/tmp/zipf2.txt" A + # + + # + + # | | # + + # A A A | # 100 ++ A A ++ # + A AAAAAA + # + AAAA + # + AAAAAA + # + AAA + # | AAAAA | # 10 ++ AAAA ++ # + AAAA + # + AAAAA + # + AAA + # + AAAA + # + AAAAAA + # + + + + # 1 ++-----+---+--+-+-+-++++------+---+--+--++-++-++------+---+--AAAAAAAAAA # 1 10 100 1000 if __name__ == '__main__': import sys zg = zipf_generator (float (sys.argv[1])) for x in range (1000): print zg.next()