Need your help on running clojure library via leiningen - clojure

I found a solution for minimum hitting set on github: https://github.com/bdesham/hitting-set and then tried to use it. The solution is clojure library so I downloaded leiningen to try to run it.
I read the readme file from github link but I still didn't know how to run the clj code to get result of minimal hitting set. I saw that there was a function called minimal-hitting-sets in hitting_set.clj file but I don't know how to call it with argument.
Eg: Get minimal hitting set of:
{"Australia" #{:white :red :blue},
"Tanzania" #{:black :blue :green :yellow},
"Norway" #{:white :red :blue},
"Uruguay" #{:white :blue :yellow},
"Saint Vincent and the Grenadines" #{:blue :green :yellow},
"Ivory Coast" #{:white :orange :green},
"Sierra Leone" #{:white :blue :green},
"United States" #{:white :red :blue}}
Project.clj code:
(defproject hitting-set "0.9.0"
:description "Find minimal hitting sets"
:url "https://github.com/bdesham/hitting-set"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"
:distribution :repo
:comments "Same as Clojure"}
:main hitting-set
:min-lein-version "2.0.0"
:dependencies [ [org.clojure/clojure "1.4.0"]
[hitting-set "0.9.0"]])
hitting_set.clj code:
(ns hitting-set
(:use hitting-set :only [minimal-hitting-sets]))
; Utility functions
(defn- dissoc-elements-containing
"Given a map in which the keys are sets, removes all keys whose sets contain
the element el. Adapted from http://stackoverflow.com/a/2753997/371228"
[el m]
(apply dissoc m (keep #(-> % val
(not-any? #{el})
(if nil (key %)))
m)))
(defn- map-old-new
"Returns a sequence of vectors. Each first item is an element of coll and the
second item is the result of calling f with that item."
[f coll]
(map #(vector % (f %)) coll))
(defn- count-vertices
"Returns the number of vertices in the hypergraph h."
[h]
(count (apply union (vals h))))
(defn- sorted-hypergraph
"Returns a version of the hypergraph h that is sorted so that the edges with
the fewest vertices come first."
[h]
(into (sorted-map-by (fn [key1 key2]
(compare [(count (get h key1)) key1]
[(count (get h key2)) key2])))
h))
(defn- remove-dupes
"Given a map m, remove all but one of the keys that map to any given value."
[m]
(loop [sm (sorted-map),
m m,
seen #{}]
(if-let [head (first m)]
(if (contains? seen (second head))
(recur sm
(rest m)
seen)
(recur (assoc sm (first head) (second head))
(rest m)
(conj seen (second head))))
sm)))
(defn- efficient-hypergraph
"Given a hypergraph h, returns an equivalent hypergraph that will go through
the hitting set algorithm more quickly. Specifically, redundant edges are
discarded and then the map is sorted so that the smallest edges come first."
[h]
(-> h remove-dupes sorted-hypergraph))
(defn- largest-edge
"Returns the name of the edge of h that has the greatest number of vertices."
[h]
(first (last (sorted-hypergraph h))))
(defn- remove-vertices
"Given a hypergraph h and a set vv of vertices, remove the vertices from h
(i.e. remove all of the vertices of vv from each edge in h). If this would
result in an edge becoming empty, remove that edge entirely."
[h vv]
(loop [h h,
res {}]
(if (first h)
(let [edge (difference (second (first h))
vv)]
(if (< 0 (count edge))
(recur (rest h)
(assoc res (first (first h)) edge))
(recur (rest h)
res)))
res)))
; Auxiliary functions
;
; These functions might be useful if you're working with hitting sets, although
; they're not actually invoked anywhere else in this project.
(defn reverse-map
"Takes a map from keys to sets of values. Produces a map in which the values
are mapped to the set of keys in whose sets they originally appeared."
[m]
(apply merge-with into
(for [[k vs] m]
(apply hash-map (flatten (for [v vs]
[v #{k}]))))))
(defn drop-elements
"Given a set of N elements, return a set of N sets, each of which is the
result of removing a different item from the original set."
[s]
(set (for [e s] (difference s #{e}))))
; The main functions
;
; These are the functions that users are probably going to be interested in.
; Hitting set
(defn hitting-set?
"Returns true if t is a hitting set of h. Does not check whether s is
minimal."
[h t]
(not-any? empty? (map #(intersection % t)
(vals h))))
(defn hitting-set-exists?
"Returns true if a hitting set of size k exists for the hypergraph h. See the
caveat in README.md for odd behavior of this function."
[h k]
(cond
(< (count-vertices h) k) false
(empty? h) true
(zero? k) false
:else (let [hvs (map #(dissoc-elements-containing % h)
(first (vals h)))]
(boolean (some #(hitting-set-exists? % (dec k))
hvs)))))
(defn- enumerate-algorithm
[h k x]
(cond
(empty? h) #{x}
(zero? k) #{}
:else (let [hvs (map-old-new #(dissoc-elements-containing % h)
(first (vals h)))]
(apply union (map #(enumerate-algorithm (second %)
(dec k)
(union x #{(first %)}))
hvs)))))
(defn enumerate-hitting-sets
"Return a set containing the hitting sets of h. See the caveat in README.md
for odd behavior of this function. If the parameter k is passed then the
function will return all hitting sets of size less than or equal to k."
([h]
(enumerate-algorithm (efficient-hypergraph h) (count-vertices h) #{}))
([h k]
(enumerate-algorithm (efficient-hypergraph h) k #{})))
(defn minimal-hitting-sets
"Returns a set containing the minimal hitting sets of the hypergraph h. If
you just want one hitting set and don't care whether there are multiple
minimal hitting sets, use (first (minimal-hitting-sets h))."
[h]
(first (filter #(> (count %) 0)
(map #(enumerate-hitting-sets h %)
(range 1 (inc (count-vertices h)))))))
; Set cover
(defn cover?
"Returns true if the elements of s form a set cover for the hypergraph h."
[h s]
(= (apply union (vals h))
(apply union (map #(get h %) s))))
(defn greedy-cover
"Returns a set cover of h using the 'greedy' algorithm."
[h]
(loop [hh h,
edges #{}]
(if (cover? h edges)
edges
(let [e (largest-edge hh)]
(recur (remove-vertices hh (get hh e))
(conj edges e))))))
(defn approx-hitting-set
"Returns a hitting set of h. The set is guaranteed to be a hitting set, but
may not be minimal."
[h]
(greedy-cover (reverse-map h)))
Since I am a new bie to leiningen and clojure so I really need your help on it.
Thanks,
Hung

In general to use a clojure library from clojure:
make a new project with lein new app project-name
include the library in project.clj's dependency section
require and refer to that library in at lease one .clj file (core.clj is an example)
load that file in you editor of choice and switch the REPL namespace to the namespace in ns form at the top of the file.
...
profit!!
There are a lot more details though I hope this is enough to give you an overview of one way to go about this, and if you solve step 5 please share your solution ;-)

Related

Clojure - Sliding Window Minimum in Log Time

Given vector size n and window size k, how can I efficiently calculate the sliding window minimum in n log k time? ie, for vector [1 4 3 2 5 4 2] and window size 2, the output would be [1 3 2 2 4 2].
Obviously I can do it using partition and map but that that's n * k time.
I think I need to keep track of the minimum in a sorted map, and update the map when it's outside the window. But although I can get the min of a sorted map in log time, searching through the map to find any indexes that are expired is not log time.
Thanks.
You can solve this is with a priority queue based on Clojure's priority map data structure. We index the values in the window with their position in the vector.
The value of its first entry is the window minimum.
We add the new entry and get rid of the oldest one by key/vector-position.
A possible implementation is
(use [clojure.data.priority-map :only [priority-map]])
(defn windowed-min [k coll]
(let [numbered (map-indexed list coll)
[head tail] (split-at k numbered)
init-win (into (priority-map) head)
win-seq (reductions
(fn [w [i n]]
(-> w (dissoc (- i k)) (assoc i n)))
init-win
tail)]
(map (comp val first) win-seq)))
For example,
(windowed-min 2 [1 4 3 2 5 4 2])
=> (1 3 2 2 4 2)
The solution is developed lazily, so can be applied to an endless sequence.
After the initialisation, which is O(k), the function computes each element in the sequence in O(log k) time, as noted here.
You can solve in linear time --O(n), rather than O(n*log k)) as described by 1. http://articles.leetcode.com/sliding-window-maximum/ (easily change from find max to find min) and 2. https://people.cs.uct.ac.za/~ksmith/articles/sliding_window_minimum.html
The approaches needs a double ended queue to manage previous values which uses O(1) time for most queue operations (i.e. push/pop/peek, etc.) rather than O(log K) when using Priority Queue (i.e. Priority Map). I used a double ended queue from https://github.com/pjstadig/deque-clojure
Main Code to implement code in 1st reference above (for min rather than max):
(defn windowed-min-queue [w a]
(let [
deque-init (fn deque-init [] (reduce (fn [dq i]
(dq-push-back i (prune-back a i dq)))
empty-deque (range w)))
process-min (fn process-min [dq] (reductions (fn [q i]
(->> q
(prune-back a i)
(prune-front i w)
(dq-push-back i)))
dq (range w (count a))))
init (deque-init)
result (process-min init)] ;(process-min init)]
(map #(nth a (dq-front %)) result)))
Comparing the speed of this method to the other solution that uses a Priority Map we have (note: I liked the other solution since as well since its simpler).
; Test using Random arrays of data
(def N 1000000)
(def a (into [] (take N (repeatedly #(rand-int 50)))))
(def b (into [] (take N (repeatedly #(rand-int 50)))))
(def w 1024)
; Solution based upon Priority Map (see other solution which is also great since its simpler)
(time (doall (windowed-min-queue w a)))
;=> "Elapsed time: 1820.526521 msecs"
; Solution based upon double-ended queue
(time (doall (windowed-min w b)))
;=> "Elapsed time: 8290.671121 msecs"
Which is over a 4x faster, which is great considering the PriorityMap is written in Java while the double-ended queue code is pure Clojure (see https://github.com/pjstadig/deque-clojure)
Including the other wrappers/utilities used on the double-ended queue for reference.
(defn dq-push-front [e dq]
(conj dq e))
(defn dq-push-back [e dq]
(proto/inject dq e))
(defn dq-front [dq]
(first dq))
(defn dq-pop-front [dq]
(pop dq))
(defn dq-pop-back [dq]
(proto/eject dq))
(defn deque-empty? [dq]
(identical? empty-deque dq))
(defn dq-back [dq]
(proto/last dq))
(defn dq-front [dq]
(first dq))
(defn prune-back [a i dq]
(cond
(deque-empty? dq) dq
(< (nth a i) (nth a (dq-back dq))) (recur a i (dq-pop-back dq))
:else dq))
(defn prune-front [i w dq]
(cond
(deque-empty? dq) dq
(<= (dq-front dq) (- i w)) (recur i w (dq-pop-front dq))
:else dq))
My solution uses two auxillary maps to achieve fast performance. I map the keys to their values and also store the values to their occurrences in a sorted map. Upon each move of the window, I update the maps, and get the minimum of the sorted map, all in log time.
The downside is the code is a lot uglier, not lazy, and not idiomatic. The upside is that it outperforms the priority-map solution by about 2x. I think a lot of that though, can be blamed on the laziness of the solution above.
(defn- init-aux-maps [w v]
(let [sv (subvec v 0 w)
km (->> sv (map-indexed vector) (into (sorted-map)))
vm (->> sv frequencies (into (sorted-map)))]
[km vm]))
(defn- update-aux-maps [[km vm] j x]
(let [[ai av] (first km)
km (-> km (dissoc ai) (assoc j x))
vm (if (= (vm av) 1) (dissoc vm av) (update vm av dec))
vm (if (nil? (get vm x)) (assoc vm x 1) (update vm x inc))]
[km vm]))
(defn- get-minimum [[_ vm]] (ffirst vm))
(defn sliding-minimum [w v]
(loop [i 0, j w, am (init-aux-maps w v), acc []]
(let [acc (conj acc (get-minimum am))]
(if (< j (count v))
(recur (inc i) (inc j) (update-aux-maps am j (v j)) acc)
acc))))

Clojure: Find even numbers in a vector

I am coming from a Java background trying to learn Clojure. As the best way of learning is by actually writing some code, I took a very simple example of finding even numbers in a vector. Below is the piece of code I wrote:
`
(defn even-vector-2 [input]
(def output [])
(loop [x input]
(if (not= (count x) 0)
(do
(if (= (mod (first x) 2) 0)
(do
(def output (conj output (first x)))))
(recur (rest x)))))
output)
`
This code works, but it is lame that I had to use a global symbol to make it work. The reason I had to use the global symbol is because I wanted to change the state of the symbol every time I find an even number in the vector. let doesn't allow me to change the value of the symbol. Is there a way this can be achieved without using global symbols / atoms.
The idiomatic solution is straightfoward:
(filter even? [1 2 3])
; -> (2)
For your educational purposes an implementation with loop/recur
(defn filter-even [v]
(loop [r []
[x & xs :as v] v]
(if (seq v) ;; if current v is not empty
(if (even? x)
(recur (conj r x) xs) ;; bind r to r with x, bind v to rest
(recur r xs)) ;; leave r as is
r))) ;; terminate by not calling recur, return r
The main problem with your code is you're polluting the namespace by using def. You should never really use def inside a function. If you absolutely need mutability, use an atom or similar object.
Now, for your question. If you want to do this the "hard way", just make output a part of the loop:
(defn even-vector-3 [input]
(loop [[n & rest-input] input ; Deconstruct the head from the tail
output []] ; Output is just looped with the input
(if n ; n will be nil if the list is empty
(recur rest-input
(if (= (mod n 2) 0)
(conj output n)
output)) ; Adding nothing since the number is odd
output)))
Rarely is explicit looping necessary though. This is a typical case for a fold: you want to accumulate a list that's a variable-length version of another list. This is a quick version:
(defn even-vector-4 [input]
(reduce ; Reducing the input into another list
(fn [acc n]
(if (= (rem n 2) 0)
(conj acc n)
acc))
[] ; This is the initial accumulator.
input))
Really though, you're just filtering a list. Just use the core's filter:
(filter #(= (rem % 2) 0) [1 2 3 4])
Note, filter is lazy.
Try
#(filterv even? %)
if you want to return a vector or
#(filter even? %)
if you want a lazy sequence.
If you want to combine this with more transformations, you might want to go for a transducer:
(filter even?)
If you wanted to write it using loop/recur, I'd do it like this:
(defn keep-even
"Accepts a vector of numbers, returning a vector of the even ones."
[input]
(loop [result []
unused input]
(if (empty? unused)
result
(let [curr-value (first unused)
next-result (if (is-even? curr-value)
(conj result curr-value)
result)
next-unused (rest unused) ]
(recur next-result next-unused)))))
This gets the same result as the built-in filter function.
Take a look at filter, even? and vec
check out http://cljs.info/cheatsheet/
(defn even-vector-2 [input](vec(filter even? input)))
If you want a lazy solution, filter is your friend.
Here is a non-lazy simple solution (loop/recur can be avoided if you apply always the same function without precise work) :
(defn keep-even-numbers
[coll]
(reduce
(fn [agg nb]
(if (zero? (rem nb 2)) (conj agg nb) agg))
[] coll))
If you like mutability for "fun", here is a solution with temporary mutable collection :
(defn mkeep-even-numbers
[coll]
(persistent!
(reduce
(fn [agg nb]
(if (zero? (rem nb 2)) (conj! agg nb) agg))
(transient []) coll)))
...which is slightly faster !
mod would be better than rem if you extend the odd/even definition to negative integers
You can also replace [] by the collection you want, here a vector !
In Clojure, you generally don't need to write a low-level loop with loop/recur. Here is a quick demo.
(ns tst.clj.core
(:require
[tupelo.core :as t] ))
(t/refer-tupelo)
(defn is-even?
"Returns true if x is even, otherwise false."
[x]
(zero? (mod x 2)))
; quick sanity checks
(spyx (is-even? 2))
(spyx (is-even? 3))
(defn keep-even
"Accepts a vector of numbers, returning a vector of the even ones."
[input]
(into [] ; forces result into vector, eagerly
(filter is-even? input)))
; demonstrate on [0 1 2...9]
(spyx (keep-even (range 10)))
with result:
(is-even? 2) => true
(is-even? 3) => false
(keep-even (range 10)) => [0 2 4 6 8]
Your project.clj needs the following for spyx to work:
:dependencies [
[tupelo "0.9.11"]

Simple "R-like" melt : better way to do?

Today I tried to implement a "R-like" melt function. I use it for Big Data coming from Big Query.
I do not have big constraints about time to compute and this function takes less than 5-10 seconds to work on millions of rows.
I start with this kind of data :
(def sample
'({:list "123,250" :group "a"} {:list "234,260" :group "b"}))
Then I defined a function to put the list into a vector :
(defn split-data-rank [datatab value]
(let [splitted (map (fn[x] (assoc x value (str/split (x value) #","))) datatab)]
(map (fn[y] (let [index (map inc (range (count (y value))))]
(assoc y value (zipmap index (y value)))))
splitted)))
Launch :
(split-data-rank sample :list)
As you can see, it returns the same sequence but it replaces :list by a map giving the position in the list of each item in quoted list.
Then, I want to melt the "dataframe" by creating for each item in a group its own row with its rank in the group.
So that I created this function :
(defn split-melt [datatab value]
(let [splitted (split-data-rank datatab value)]
(map (fn [y] (dissoc y value))
(apply concat
(map
(fn[x]
(map
(fn[[k v]]
(assoc x :item v :Rank k))
(x value)))
splitted)))))
Launch :
(split-melt sample :list)
The problem is that it is heavily indented and use a lot of map. I apply dissoc to drop :list (which is useless now) and I have also to use concat because without that I have a sequence of sequences.
Do you think there is a more efficient/shorter way to design this function ?
I am heavily confused with reduce, does not know whether it can be applied here since there are two arguments in a way.
Thanks a lot !
If you don't need the split-data-rank function, I will go for:
(defn melt [datatab value]
(mapcat (fn [x]
(let [items (str/split (get x value) #",")]
(map-indexed (fn [idx item]
(-> x
(assoc :Rank (inc idx) :item item)
(dissoc value)))
items)))
datatab))

Checking odd parity in clojure

I have the following functions that check for odd parity in sequence
(defn countOf[a-seq elem]
(loop [number 0 currentSeq a-seq]
(cond (empty? currentSeq) number
(= (first currentSeq) elem) (recur (inc number) (rest currentSeq))
:else (recur number (rest currentSeq))
)
)
)
(defn filteredSeq[a-seq elemToRemove]
(remove (set (vector (first a-seq))) a-seq)
)
(defn parity [a-seq]
(loop [resultset [] currentSeq a-seq]
(cond (empty? currentSeq) (set resultset)
(odd? (countOf currentSeq (first currentSeq))) (recur (concat resultset (vector(first currentSeq))) (filteredSeq currentSeq (first currentSeq)))
:else (recur resultset (filteredSeq currentSeq (first currentSeq)))
)
)
)
for example (parity [1 1 1 2 2 3]) -> (1 3) that is it picks odd number of elements from a sequence.
Is there a better way to achieve this?
How can this be done with reduce function of clojure
First, I decided to make more idiomatic versions of your code, so I could really see what it was doing:
;; idiomatic naming
;; no need to rewrite count and filter for this code
;; putting item and collection in idiomatic argument order
(defn count-of [elem a-seq]
(count (filter #(= elem %) a-seq)))
;; idiomatic naming
;; putting item and collection in idiomatic argument order
;; actually used the elem-to-remove argument
(defn filtered-seq [elem-to-remove a-seq]
(remove #(= elem-to-remove %) a-seq))
;; idiomatic naming
;; if you want a set, use a set from the beginning
;; destructuring rather than repeated usage of first
;; use rest to recur when the first item is guaranteed to be dropped
(defn idiomatic-parity [a-seq]
(loop [result-set #{}
[elem & others :as current-seq] a-seq]
(cond (empty? current-seq)
result-set
(odd? (count-of elem current-seq))
(recur (conj result-set elem) (filtered-seq elem others))
:else
(recur result-set (filtered-seq elem others)))))
Next, as requested, a version that uses reduce to accumulate the result:
;; mapcat allows us to return 0 or more results for each input
(defn reducing-parity [a-seq]
(set
(mapcat
(fn [[k v]]
(when (odd? v) [k]))
(reduce (fn [result item]
(update-in result [item] (fnil inc 0)))
{}
a-seq))))
But, reading over this, I notice that the reduce is just frequencies, a built in clojure function. And my mapcat was really just a hand-rolled keep, another built in.
(defn most-idiomatic-parity [a-seq]
(set
(keep
(fn [[k v]]
(when (odd? v) k))
(frequencies a-seq))))
In Clojure we can refine our code, and as we recognize places where our logic replicates the built in functionality, we can simplify the code and make it more clear. Also, there is a good chance the built in is better optimized than our own work-alikes.
Is there a better way to achieve this?
(defn parity [coll]
(->> coll
frequencies
(filter (fn [[_ v]] (odd? v)))
(map first)
set))
For example,
(parity [1 1 1 2 1 2 1 3])
;#{1 3}
How can this be done with reduce function of clojure.
We can use reduce to rewrite frequencies:
(defn frequencies [coll]
(reduce
(fn [acc x] (assoc acc x (inc (get acc x 0))))
{}
coll))
... and again to implement parity in terms of it:
(defn parity [coll]
(let [freqs (frequencies coll)]
(reduce (fn [s [k v]] (if (odd? v) (conj s k) s)) #{} freqs)))

Grouping words and more

I'm working on a project to learn Clojure in practice. I'm doing well, but sometimes I get stuck. This time I need to transform sequence of the form:
[":keyword0" "word0" "word1" ":keyword1" "word2" "word3"]
into:
[[:keyword0 "word0" "word1"] [:keyword1 "word2" "word3"]]
I'm trying for at least two hours, but I know not so many Clojure functions to compose something useful to solve the problem in functional manner.
I think that this transformation should include some partition, here is my attempt:
(partition-by (fn [x] (.startsWith x ":")) *1)
But the result looks like this:
((":keyword0") ("word1" "word2") (":keyword1") ("word3" "word4"))
Now I should group it again... I doubt that I'm doing right things here... Also, I need to convert strings (only those that begin with :) into keywords. I think this combination should work:
(keyword (subs ":keyword0" 1))
How to write a function which performs the transformation in most idiomatic way?
Here is a high performance version, using reduce
(reduce (fn [acc next]
(if (.startsWith next ":")
(conj acc [(-> next (subs 1) keyword)])
(conj (pop acc) (conj (peek acc)
next))))
[] data)
Alternatively, you could extend your code like this
(->> data
(partition-by #(.startsWith % ":"))
(partition 2)
(map (fn [[[kw-str] strs]]
(cons (-> kw-str
(subs 1)
keyword)
strs))))
what about that:
(defn group-that [ arg ]
(if (not-empty arg)
(loop [list arg, acc [], result []]
(if (not-empty list)
(if (.startsWith (first list) ":")
(if (not-empty acc)
(recur (rest list) (vector (first list)) (conj result acc))
(recur (rest list) (vector (first list)) result))
(recur (rest list) (conj acc (first list)) result))
(conj result acc)
))))
Just 1x iteration over the Seq and without any need of macros.
Since the question is already here... This is my best effort:
(def data [":keyword0" "word0" "word1" ":keyword1" "word2" "word3"])
(->> data
(partition-by (fn [x] (.startsWith x ":")))
(partition 2)
(map (fn [[[k] w]] (apply conj [(keyword (subs k 1))] w))))
I'm still looking for a better solution or criticism of this one.
First, let's construct a function that breaks vector v into sub-vectors, the breaks occurring everywhere property pred holds.
(defn breakv-by [pred v]
(let [break-points (filter identity (map-indexed (fn [n x] (when (pred x) n)) v))
starts (cons 0 break-points)
finishes (concat break-points [(count v)])]
(mapv (partial subvec v) starts finishes)))
For our case, given
(def data [":keyword0" "word0" "word1" ":keyword1" "word2" "word3"])
then
(breakv-by #(= (first %) \:) data)
produces
[[] [":keyword0" "word0" "word1"] [":keyword1" "word2" "word3"]]
Notice that the initial sub-vector is different:
It has no element for which the predicate holds.
It can be of length zero.
All the others
start with their only element for which the predicate holds and
are at least of length 1.
So breakv-by behaves properly with data that
doesn't start with a breaking element or
has a succession of breaking elements.
For the purposes of the question, we need to muck about with what breakv-by produces somewhat:
(let [pieces (breakv-by #(= (first %) \:) data)]
(mapv
#(update-in % [0] (fn [s] (keyword (subs s 1))))
(rest pieces)))
;[[:keyword0 "word0" "word1"] [:keyword1 "word2" "word3"]]