Distributed Sampling on Dask Bags

import randomseq = [1, 2, 3, 4, 5, 6]s = random.choices(seq, k=2)print(s)>> [2, 6]
import dask.bag as db
from dask.bag import random
seq = db.from_sequence([1, 2, 3, 4, 5, 6], npartitions=3)s = random.choices(seq, k=2)print(list(s.compute()))>> [2, 6]
import dask.bag as db
numbers = range(6)
buckets = {i: 0 for i in numbers}
a = db.from_sequence(numbers, partition_size=4)
for i in range(150):
s = a.sample(k=1).compute()
for e in s:
buckets[e] += 1
print(buckets)
install.packages('XNomial')
require(XNomial)
obs = c(25,26,33,22,17,27)
n = length(obs)
expr = rep(1/n, n)
xmulti(obs, expr, statName = "LLR", histobins = T, histobounds = c(0, 0), showCurve = T)
P value  (LLR)  =  0.3335
P value (Prob) = 0.3339
P value (Chisq) = 0.3428

Conclusions

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store