Empty children and Clojure zippers - clojure

Why the last expression retruns
{:a :foo, :args [{:id :XX}], :id :XX}
instead of:
{:a :foo, :args [], :id :XX}
(require '[clojure.zip :as zip])
(defn my-zipper [tree]
(zip/zipper
(fn branch? [node]
(:args node))
(fn children [node]
(:args node))
(fn make-node [node children]
(assoc node :args (vec children)))
tree))
(def z (my-zipper {:a :foo :args []}))
(loop [loc z]
(if (zip/end? loc)
(zip/node loc)
(recur
(zip/next
(zip/edit loc #(assoc % :id :XX))))))
It looks like the problem is associated with the fact that traversing with zip/next reveals there are 2 nodes :
(zip/node (zip/next z)) ; => nil
(zip/node (zip/next (zip/next z))) ; => {:a :foo :args []}
Why is that? There is a single node with empty children so there should be only one node, correct?

After looking at the code of clojure.zip/vector-zip I conclude that lack of node's children should be communicated with nil. The empty sequence doesn't work.
So the children function should really be:
(fn children [node]
(seq (:args node)))

Related

Deleting any node inside BST - Clojure

I'm studying algorithms and at the class we were asked to create a BST with structures, I'm trying really hard to create a delete function but the one I created isn't efficient and doesn't work. I searched in google for something similar, but most of the questions are about vectors and not record/structures. If you have any recommendations, I would really appreciate it.
This is the basic creating of the root and node:
(let [bst (make-bst)]
(bst-empty? bst)
(make-bst-node 10))
(defrecord BST [root])
(defn bst? [bst]
(= (class bst) BST))
(defn make-bst []
(BST. (ref nil)))
(defn bst-empty? [bst]
(nil? #(:root bst)))
(defrecord BSTnode [data left right])
(defn make-bst-node [val]
(BSTnode. val (ref nil) (ref nil)))
(defn bst-insert! [bst val]
(loop [node (:root bst)]
(if (nil? #node)
(dosync
(ref-set node (make-bst-node val)))
(let [data (:data bst)]
(if (< val data)
(recur (:left #node))
(if (val data)
(recur (:right #node))))))))
This is the delete function:
(defn bst-del [bst val]
(if (nil? #(:root bst))
false
(do
(if (= (:data #(:root bst)) val)
(if (nil? (and (:right bst) (:left bst)))
(dosync
(ref-set (:root bst) nil))
(if (not (nil? (:right bst)))
(dosync
(ref-set (:root bst) #(:right bst)))
(if (not (nil? (:left bst)))
(dosync
(ref-set (:root bst) #(:left bst)))
(if (not (nil? (and (:right bst) (:left bst))))
(dosync
(ref-set (:root bst) #(:left bst))
(ref-set (:root bst) (:right bst))) false))))))))
(defn node-del [bst val]
(loop [node #(:root bst)]
(if (nil? node)
false
(if (true? bst-del)
(println "somthing got deleted")
(if (< val (:data node))
(recur #(:left node))
(recur #(:right node)))))))
I tried to search in google but all the function or examples were for maps and vectors, not my case, as well as, reading theoretical material about the subject and references from different languages.
this code of yours seems to be overly complicated and hard to debug (or event understand an algorithm)
I would propose implementing this recursive algorithm for deletion, which works quite nice with that mutable structure of yours:
Node delete(root : Node, z : T):
if root == null
return root
if z < root.key
root.left = delete(root.left, z)
else if z > root.key
root.right = delete(root.right, z)
else if root.left != null and root.right != null
root.key = minimum(root.right).key
root.right = delete(root.right, root.key)
else
if root.left != null
root = root.left
else if root.right != null
root = root.right
else
root = null
return root
so, i would start with the following type defs:
(defrecord Tree [root])
(defn make-tree [root-node]
(->Tree (ref root-node)))
(defrecord Node [data left right])
(defn make-node [data & {:keys [left right]}]
(->Node (ref data)
(ref left)
(ref right)))
first thing we want is insertion + traversal functions for tree creating/debug. let's implement them for Node (and employ in Tree later):
(defn insert-node [{:keys [data left right] :as node} item]
(if (nil? node)
(make-node item)
(dosync (alter (if (< item #data) left right)
insert-node
item)
node)))
(defn traverse-node [node]
(when-some [{:keys [data left right]} node]
(concat (traverse-node #left)
[#data]
(traverse-node #right))))
user> (reduce insert-node nil [3 1 2])
;; {:data #<Ref#1a28bd3f: 3>,
;; :left
;; #<Ref#514133ef:
;; {:data #<Ref#5617f393: 1>,
;; :left #<Ref#637de49b: nil>,
;; :right
;; #<Ref#6efa5317:
;; {:data #<Ref#14ef556b: 2>,
;; :left #<Ref#7fe0e031: nil>,
;; :right #<Ref#5a16bba5: nil>}>}>,
;; :right #<Ref#4eec6b9f: nil>}
user> (traverse-node (reduce insert-node nil [3 1 2]))
;; (1 2 3)
so, this seems to work ok.
Next, we will implement the deletion algorithm.
there's an utility function minimum in the aforementioned algorithm, so we start with that one:
(defn minimum-node [{:keys [data left right] :as node}]
(cond (nil? node) nil
(nil? #left) #data
:else (recur #left)))
user> (minimum-node (reduce insert-node nil (shuffle (range 10 20))))
;; 10
after that the deletion implementation looks trivial:
(defn del-node [node item]
(when-some [{:keys [data left right]} node]
(cond (< item #data) (dosync (alter left del-node item)
node)
(> item #data) (dosync (alter right del-node item)
node)
(and (some? #left) (some? #right)) (let [m (-> right deref minimum-node)]
(dosync
(ref-set data m)
(alter right del-node m))
node)
(some? #left) #left
(some? #right) #right)))
user> (traverse-node (del-node
(reduce insert-node nil (shuffle (range 10 20)))
13))
;;=> (10 11 12 14 15 16 17 18 19)
this seems to be working mutable algorithm.
Let's then go back to the Tree structure. I would start with the BST protocol, to be used by both Node and Tree:
(defprotocol BST
(traverse [self])
(insert [self item])
(minimum [self])
(del [self item]))
(extend-protocol BST
Node
(traverse [self]
(traverse-node self))
(insert [self item]
(insert-node self item))
(minimum [self]
(minimum-node self))
(del [self item]
(del-node self item)))
(extend-protocol BST
Tree
(traverse [self] (-> self :root deref traverse))
(insert [self item]
(dosync
(if (-> self :root deref some?)
(alter (:root self) insert item)
(ref-set (:root self) (make-node item))))
self)
(minimum [self] (-> self :root deref minimum))
(del [self item]
(dosync
(when (-> self :root deref some?)
(alter (:root self) del item)))
self))
and that's it. Now just use it:
user> (reduce del
(reduce insert (make-tree nil) [1 2 3 4])
[2 4])
;; {:root
;; #<Ref#273f7854:
;; {:data #<Ref#67d4f4d0: 1>,
;; :left #<Ref#26329ec3: nil>,
;; :right
;; #<Ref#5636f9f8:
;; {:data #<Ref#3cd02119: 3>,
;; :left #<Ref#7d62fb13: nil>,
;; :right #<Ref#1f25eeb7: nil>}>}>}
After the class, I understood how to delete a function and I implemented it similarly inside a dictionary binary tree. It is really similar, the only difference is with "key" value, but the logic is the same.
(defn dict-find-leftmost-node[start-node]
(loop [node start-node]
(if (nil? #(:left #node))
node
(recur (:left #node)))))
(defn dict-remove! [dict key]
(let [node-to-remove (dict-get-node dict key)]
(when (not (nil? node-to-remove))
(if (dict-node-leaf? node-to-remove)
(dosync
(ref-set node-to-remove nil))
(if (nil? #(:left #node-to-remove))
(dosync
;;(println "I need to pull up the right branch")
(ref-set node-to-remove
#(:right #node-to-remove)))
(if (nil? #(:right #node-to-remove))
(dosync
;;(println "I need to pull up the left branch")
(ref-set node-to-remove
#(:left #node-to-remove)))
(let [leftmost-node
(dict-find-leftmost-node
(:right #node-to-remove))]
(dosync
(ref-set (:left #leftmost-node)
#(:left #node-to-remove))
(ref-set node-to-remove
#(:right #node-to-remove))))
;;(println "I don't know what to do yet!")
;; this is where we remove the node
))))))
(defn dict-node-leaf? [node]
(and (nil? #(:left #node))
(nil? #(:right #node))))

In a tree, how do I find paths to tree nodes that have children with leaves?

Basically, I am trying to implement this algorithm, though maybe there's a better way to go about it.
starting at the root
check each child of current node for children with leafs (child of child)
if any child-of-child nodes of the current node have leafs, record path to current node (not to child) and do not continue down that path any farther.
else continue DFS
non-functional pseudo code:
def find_paths(node):
for child in node.children:
if child.children.len() == 0
child_with_leaf = true
if child_with_leaf
record path to node
else
for child in node.children
find_paths(child)
For example:
:root
|- :a
| +- :x
| |- :y
| | +- :t
| | +- :l2
| +- :z
| +- :l3
+- :b
+- :c
|- :d
| +- :l4
+- :e
+- :l5
The result would be:
[[:root :a]
[:root :b :c]]
Here is my crack at it in clojure:
(defn atleast-one?
[pred coll]
(not (nil? (some pred coll))))
; updated with erdos's answer
(defn children-have-leaves?
[loc]
(some->> loc
(iterate z/children)
(take-while z/branch?)
(atleast-one? (comp not empty? z/children))))
(defn find-paths
[tree]
(loop [loc (z/vector-zip tree)
ans nil]
(if (z/end? loc)
ans
(recur (z/next loc)
(cond->> ans
(children-have-leaves? loc)
(cons (->> loc z/down z/path (map z/node)))))))
)
(def test-data2
[:root [:a [:x [:y [:t [:l2]]] [:z [:l3]]]] [:b [:c [:d [:l4]] [:e [:l5]]]]]
)
Update: fixed the crash with erdos' answer below, but I think there's still a problem with my code since this prints every path and not the desired ones.
I assume you have referenced my previous answer related to zipper. But please note that my previous answer uses vector-zip as is and hence you have to navigate it like a vector-zip - which you may have to wrap your head around how the two cursors work. To simplify the navigation, I suggest you create your own zipper for your tree structure. I.e.
(defn my-zipper [root]
(z/zipper ;; branch?
(fn [x]
(when (vector? x)
(let [[n & xs] x] (and n (-> xs count zero? not)))))
;; children
(fn [[n & xs]] xs)
;; make-node
(fn [[n & _] xs] [n xs])
root))
then the solution will be similar to my other answer:
(def test-data2
[:root
[:a
[:x
[:y
[:t [:l2]]]
[:z [:l3]]]]
[:b
[:c
[:d [:l4]]
[:e [:l5]]]]])
(->> test-data2
my-zipper
(iterate z/next)
(take-while (complement z/end?))
(filter (comp children-with-leaves? z/node))
(map #(->> % z/path (map z/node)))
set)
;; => #{(:root :a :x) (:root :a :x :y) (:root :b :c)}
where the main logic is simplified to:
(defn children-with-leaves? [[_ & children]]
(some (fn [[c & xs]] (nil? xs)) children))
The exception comes from your children-have-leaves? function.
The (not (empty? z/children)) expression fails, because z/children is a function, however, empty? must be invoked on a collection.
What you need is a predicate that returns true if a node has children, like: (fn [x] (not (empty? (z/children x)))) or shorter: (comp not empty? z/children)
The correct implementation:
(defn children-have-leaves?
[loc]
(some->> loc
(iterate z/children)
(take-while z/branch?)
(atleast-one? (comp not empty? z/children))))
If you want to process tree-like data structures, I would highly recommend the tupelo.forest library.
I don't understand your goal, though. Nodes :a and :c in your example are not equally distant from the closest leaf.
Actually, I just noticed that the tree in your example is different than the tree in your code attempt. Could you please update the question to make them consistent?
Here is an example of how you could do it:
(dotest ; find the grandparent of each leaf
(hid-count-reset)
(with-forest (new-forest)
(let [data [:root
[:a
[:x
[:y [:t [:l2]]]
[:z [:l3]]]]
[:b [:c
[:d [:l4]]
[:e [:l5]]]]]
root-hid (add-tree-hiccup data)
leaf-paths (find-paths-with root-hid [:** :*] leaf-path?)
grandparent-paths (mapv #(drop-last 2 %) leaf-paths)
grandparent-tags (set
(forv [path grandparent-paths]
(let [path-tags (it-> path
(mapv #(hid->node %) it)
(mapv #(grab :tag %) it))]
path-tags)))]
(is= (format-paths leaf-paths)
[[{:tag :root} [{:tag :a} [{:tag :x} [{:tag :y} [{:tag :t} [{:tag :l2}]]]]]]
[{:tag :root} [{:tag :a} [{:tag :x} [{:tag :z} [{:tag :l3}]]]]]
[{:tag :root} [{:tag :b} [{:tag :c} [{:tag :d} [{:tag :l4}]]]]]
[{:tag :root} [{:tag :b} [{:tag :c} [{:tag :e} [{:tag :l5}]]]]]])
(is= grandparent-tags
#{[:root :a :x]
[:root :a :x :y]
[:root :b :c]} ))))

How to parse an heterogeneous tree in clojure

I'm working on some Clojure code, in which I have a tree of entities represented as a nested vector like this:
(def tree '[SYMB1 "a" [SYMB2 {:k1 [SYMB1 "b" "c"]} "x"] {:k2 ["b" "c"]})
here, leaves are strings and nodes can be either symbols or maps. Each map having a key associated to a subtree or to a collection of leaves.
How can I render the tree above to get:
[SYMB1 "a" [SYMB2 [SYMB1 "b" "c"] "x"] "b" "c"]
It looks like you just want to throw away :k1 and :k2 whenever you encounter a map (and assume each map has only 1 key). You can do this easily using postwalk:
(ns ...
(:require
[clojure.walk :as walk]
))
(def tree
'[SYMB1 "a" [SYMB2 {k1 [SYMB1 "b" "c"]} "x"] {k2 ["b" "c"]} ])
(def desired
'[SYMB1 "a" [SYMB2 [SYMB1 "b" "c"] "x"] ["b" "c"]])
(let [result (walk/postwalk
(fn [item]
(cond
(map? item) (do
(when-not (= 1 (count item))
(throw (ex-info "Must be only 1 item" {:item item})))
(val (first item)))
:else item ))
tree) ]
(is= desired result))
result => [SYMB1 "a" [SYMB2 [SYMB1 "b" "c"] "x"] ["b" "c"]]
Note that the results for :k2 are still wrapped in a vector, unlike your original question. I'm not sure if that is what you meant or not.
Using clojure.spec:
(ns tree
(:require [clojure.spec.alpha :as s]))
(def tree '[SYMB1 "a" [SYMB2 {:k1 [SYMB1 "b" "c"]} "x"] {:k2 ["b" "c"]}])
(s/def ::leaf string?)
(s/def ::leafs (s/coll-of ::leaf))
(s/def ::map
(s/and
map?
(s/conformer
(fn [m]
(let [[_ v] (first m)]
(s/conform (s/or
:node ::node
:leafs ::leafs) v))))))
(s/def ::node (s/and
(s/or :symbol ::symbol
:leaf ::leaf
:map ::map)
(s/conformer second)))
(s/def ::symbol
(s/and
(s/cat :name
symbol?
:children
(s/* ::node))
(s/conformer (fn [parsed]
(let [{:keys [name children]} parsed]
(reduce
(fn [acc v]
(case (first v)
:leafs (into acc (second v))
:node (conj acc (second v))
(conj acc v)))
[name]
children))))))
(s/conform ::node tree) ;; [SYMB1 "a" [SYMB2 [SYMB1 "b" "c"] "x"] "b" "c"]
I found a solution using postwak and some helper functions:
(defn clause-coll? [item]
(and (vector? item)
(symbol? (first item))))
(defn render-map[amap]
(let [[[_ v]] (vec amap)]
(if (clause-coll? v)
[v]
v)))
(defn render-item[item]
(if (map? item)
(render-map item)
[item]))
(defn render-level [[op & etc]]
(->> (mapcat render-item etc)
(cons op)))
(defn parse-tree[form]
(clojure.walk/postwalk #(if (clause-coll? %)
(render-level %)
%)
form))
Michiel's clojure.spec solution was clever and Alan's clojure.walk solution was concise.
Without using any libraries and walking the tree directly:
(def tree
'[SYMB1 "a"
[SYMB2 {:k1 [SYMB1 "b" "c"]}
"x"]
{:k2 ["b" "c"]}])
(defn get-new-keys
"Determines next keys vector for tree navigation, can backtrack."
[source-tree current-keys current-node]
(if (and (vector? current-node) (symbol? (first current-node)))
(conj current-keys 0)
(let [last-index (->> current-keys count dec)]
(let [forward-keys (update-in current-keys [last-index] inc)
forward-node (get-in source-tree forward-keys)]
(if forward-node
forward-keys
(if (= 1 (count current-keys))
current-keys
(recur source-tree (subvec current-keys 0 last-index) current-node)))))))
(defn convert-tree
"Converts nested vector source tree to target tree."
([source-tree] (convert-tree source-tree [0] []))
([source-tree keys target-tree]
(let [init-node (get-in source-tree keys)
node (if (map? init-node)
(first (vals init-node))
(if (vector? init-node)
[]
init-node))
new-target-tree (update-in target-tree keys (constantly node))
new-keys (get-new-keys source-tree keys init-node)]
(if (= new-keys keys)
new-target-tree
(recur source-tree new-keys new-target-tree)))))
user=> (convert-tree tree)
[SYMB1 "a" [SYMB2 [SYMB1 "b" "c"] "x"] ["b" "c"]]

clojure find arbitrarily nested key

Is there an easy way in Clojure (maybe using specter) to filter collections depending on whether the an arbitrarily nested key with a known name contains an element ?
Ex. :
(def coll [{:res [{:a [{:thekey [
"the value I am looking for"
...
]
}
]}
{:res ...}
{:res ...}
]}])
Knowing that :a could have a different name, and that :thekey could be nested somewhere else.
Let's say I would like to do :
#(find-nested :thekey #{"the value I am looking for"} coll) ;; returns a vector containing the first element in coll (and maybe others)
use zippers.
in repl:
user> coll
[{:res [{:a [{:thekey ["the value I am looking for"]}]} {:res 1} {:res 1}]}]
user> (require '[clojure.zip :as z])
nil
user> (def cc (z/zipper coll? seq nil coll))
#'user/cc
user> (loop [x cc]
(if (= (z/node x) :thekey)
(z/node (z/next x))
(recur (z/next x))))
["the value I am looking for"]
update:
this version is flawed, since it doesn't care about :thekey being the key in a map, or just keyword in a vector, so it would give unneeded result for coll [[:thekey [1 2 3]]]. Here is an updated version:
(defn lookup-key [k coll]
(let [coll-zip (z/zipper coll? #(if (map? %) (vals %) %) nil coll)]
(loop [x coll-zip]
(when-not (z/end? x)
(if-let [v (-> x z/node k)] v (recur (z/next x)))))))
in repl:
user> (lookup-key :thekey coll)
["the value I am looking for"]
user> (lookup-key :absent coll)
nil
lets say we have the same keyword somewhere in a vector in a coll:
(def coll [{:res [:thekey
{:a [{:thekey ["the value I am looking for"]}]}
{:res 1} {:res 1}]}])
#'user/coll
user> (lookup-key :thekey coll)
["the value I am looking for"]
which is what we need.

How to parse xml and get an vector for some attributes on an element

I know how to extract one attribute using zip-xml/attr, but how to extract multiple attributes?
e.g I have the following
<table>
<column name="col1" type="varchar" length="8"/>
<column name="col2" type="varchar" length="16"/>
<column name="col3" type="int" length="16"/>
<table>
And the expected result is. A silly way is to call zip-xml/attr for each attribute, but is there any elegant way to do that?
[["co11" "varchar" 8] [["co12" "varchar" 16] [["co13" "int" 16]
My advice is to use a tree-walking function to extract the interesting data from the XML tree. clojure.walk has several of these, but here I use tree-seq from core clojure to just produce a seq of nodes and work on that. This function takes two functions - a branch? predicate which checks if a node can have children and a children function which gets them. I use :content for both, as tags with no nested tags produce nil, which is a falsey value and so it works also as a predicate.
(->> (clojure.xml/parse "res/doc.xml") ;;source file for your xml
(tree-seq :content :content) ;; Produce a seq by walking the tree
(filter #(= :column (:tag %))) ;;Take only :column tags
(mapv (comp vec vals :attrs)))
;;Collect the values of the :attrs maps into vectors
;;and collect those into a vector with mapv
Your desired output had unmatched square brackets, but I assume it should be like
[["col1" "varchar" "8"] ["col2" "varchar" "16"] ["col3" "int" "16"]]
which was my return value. However, this is potentially brittle - you're relying on the maps returned by clojure.xml/parse preserving the ordering of the attributes in the XML in order to know what the data means. That's not really part of the contract of maps. As an implementation detail it creates clojure.lang.PersistentStructMaps which apparently do have this feature, but it might not always be so.
Alternatively you could use just (mapv :attrs) to keep the whole of the map in there.
The right solution depends on how large and complex the XML is and to some extent, what you know about its structure. If it needs to be very generic, then you need to have quite a lot of logic to navigate the nodes etc. However, if it is a known format and you know what nodes you are interested in, its pretty straight-forward.
I used clojure.zip to create a zipper from the XML file and then use clojure.data.zip.xml to extract the nodes/paths I was interested in. I then defined simple helper functions to process specific nodes. This was pretty much my first bit of clojure and I've not yet gone back to it to re-factor it and refine/clarify some of my very rough clojure idioms based on what I've learnt since, but in the spirit of an example being worth 1000 words, here it is -
(ns arcis.models.nessus
(:use [taoensso.timbre :only [trace debug info warn error fatal]])
(:require [arcis.util :as util]
[arcis.models.db :as db]
[clojure.java.io :as io]
[clojure.xml :as xml]
[clojure.zip :as zip]
[clojure.data.zip.xml :as zx]))
(def nessus-host-keys [:hostname :host_fqdn
:system_type :operating_system
:operating_system_unsupported])
(def used-nessus-host-keys (conj nessus-host-keys
:host_start :host_end
:items :traceroute_hop_0 :traceroute_hop_1
:traceroute_hop_2 :traceroute_hop_3
:traceroute_hop_4 :traceroute_hop_5
:traceroute_hop_6 :traceroute_hop_7
:traceroute_hop_8 :traceroute_hop_9
:traceroute_hop_10 :traceroute_hop_11
:traceroute_hop_12 :traceroute_hop_13
:traceroute_hop_14 :traceroute_hop_15
:traceroute_hop_16 :traceroute_hop_17
:host_ip :patch_summary_total_cves
:cpe_0 :cpe_1 :cpe_2 :cpe_3 :cpe_4 :cpe_5
:cpe_6 :cpe_7 :cpe_8 :cpe_9))
(def nessus-item-keys [:port :svc_name :protocol :severity :plugin_id
:plugin_output])
(def used-nessus-item-keys (conj nessus-item-keys
:plugin_details
:plugin_name
:plugin_family))
(def nessus-plugin-keys [:plugin_id :plugin_name :plugin_family :fname
:script_version :plugin_type :exploitability_ease
:vuln_publication_date :cvss_temporal_data
:solution :cvss_temporal_score :risk_factor
:description :cvss_vector :synopsis
:patch_publication_date :exploit_available
:plugin_publication_date :plugin_modification_date
:cve :bid :exploit_framework_canvas :edb_id
:exploit_framework_metasploit :exploit_framework_core
:metasploit_name :canvas_package :osvdb :cwe
:cvss_temporal_vector :cvss_base_score :cpe
:exploited_by_malware])
(def used-nessus-plugin-keys (conj nessus-plugin-keys
:xref :see_also :cert
:attachment :iava :stig_severity :hp
:secunia :iawb :msft))
(def show-unprocessed true)
(defn log-unprocessed [title vls]
(if (and show-unprocessed
(seq vls))
(println (str "Unprocessed " title ": " vls))))
;;; parse nessus report
(defn parse-xref [xref]
{:xref (first (:content xref))})
(defn parse-see-also [see-also]
{:see_also (first (:content see-also))})
(defn parse-plugin [plugin]
{(util/db-keyword (name (:tag plugin))) (first (:content plugin))})
(defn parse-contents [cont]
(let [xref (mapv parse-xref (filter #(= (:tag %) :xref) cont))
see-also (mapv parse-see-also (filter #(= (:tag %) :see-also) cont))
details (reduce merge {}
(map parse-plugin
(remove #(or (= (:tag %) :xref)
(= (:tag %) :see-also)) cont)))]
(assoc details
:see_also see-also
:xref xref)))
(defn fix-item-keywords [item]
(let [ks (keys item)]
(into {}
(for [k ks]
[(util/db-keyword (name k))
(k item)]))))
(defn parse-item [item]
(let [attrs (fix-item-keywords (:attrs item))
contents (parse-contents (:content item))]
(assoc attrs
:plugin_output (:plugin_output contents)
:plugin_details (assoc (dissoc contents :plugin_output)
:plugin_id (:plugin_id attrs)
:plugin_family (:plugin_family attrs)))))
(defn parse-properties [props]
(into {}
(for [p props]
[(util/db-keyword (:name (:attrs p)))
(first (:content p))])))
(defn parse-host [h]
(let [items (map first (zx/xml-> h :ReportItem))
properties (:content (first (zx/xml1-> h :HostProperties)))]
(assoc (parse-properties properties)
:hostname (zx/attr h :name)
:items (mapv parse-item items))))
(defn parse-hosts [hosts]
(mapv parse-host hosts))
(defn parse-file [f]
(let [root (zip/xml-zip (xml/parse (io/file f)))
report-xml (zx/xml1-> root :Report)
hosts (zx/xml-> report-xml :ReportHost)]
{:report_name (zx/attr report-xml :name)
:policy (zx/text (zx/xml1-> root :Policy :policyName))
:hosts (parse-hosts hosts)}))
;;; insert nessus records into db
(defn mk-host-rec [scan-id host]
(let [[id err] (db/get-sequence-nextval "host_seq")]
(if (nil? err)
(assoc (util/build-map host nessus-host-keys)
:ipv4 (:host_ip host)
:scan_start (util/from-nessus-date (:scan_start host))
:scan_end (util/from-nessus-date (:scan_end host))
:total_cves (:patch_summary_total_cves host)
:id id
:scan_id scan-id)
nil)))
(defn insert-patches [p]
(when (seq p)
(db/insert-nessus-host-patch (first p))
(recur (rest p))))
(defn insert-host-patch [id host]
(let [p-keys (filter #(re-find #"patch_summary_*" %) (map name (keys host)))
recs (map (fn [s]
{:id (first (db/get-sequence-nextval "patch_seq"))
:host_id id
:summary ((keyword (str "patch_summary_txt_" s)) host)
:cve_num ((keyword (str "patch_summary_cve_num_" s)) host)
:cves ((keyword (str "patch_summary_cves_" s)) host)})
(filter seq
(map #(second (re-find #"patch_summary_txt_(.*)" %))
p-keys)))]
(insert-patches recs)
(util/remove-keys host (map keyword p-keys))))
(defn mk-item-rec [host-id item]
(let [[id err] (db/get-sequence-nextval "item_seq")]
(assoc (util/build-map item nessus-item-keys)
:host_id host-id
:id id)))
(defn insert-item [host-id item]
(let [rec (mk-item-rec host-id item)
not-done (keys (util/remove-keys item used-nessus-item-keys))]
(log-unprocessed "Item Keys" not-done)
(db/insert-nessus-report-item rec)
(:plugin_id item)))
(defn mk-plugin-rec [item]
(let [rec (util/build-map (:plugin_details item) nessus-plugin-keys)
not-used (keys (util/remove-keys (:plugin_details item)
used-nessus-plugin-keys))]
(log-unprocessed "Plugin Keys" not-used)
(assoc rec
:vuln_publication_date (util/from-nessus-date
(:vuln_publication_date rec))
:patch_publication_date (util/from-nessus-date
(:patch_publication_date rec))
:plugin_publication_date (util/from-nessus-date
(:plugin_publication_date rec))
:plugin_modification_date (util/from-nessus-date
(:plugin_modificaiton_date rec)))))
(defn insert-xref [plugin-id xrefs]
(when (seq xrefs)
(let [xref {:id (first (db/get-sequence-nextval "xref_seq"))
:plugin_id plugin-id
:xref (:xref (first xrefs))}]
(db/insert-nessus-xref xref)
(recur plugin-id (rest xrefs)))))
(defn insert-see-also [plugin-id see-also]
(when (seq see-also)
(let [sa {:id (first (db/get-sequence-nextval "ref_seq"))
:plugin_id plugin-id
:reference (:see_also (first see-also))}]
(db/insert-nessus-ref sa)
(recur plugin-id (rest see-also)))))
(defn insert-plugin [item]
(let [rec (mk-plugin-rec item)
xref (:xref (:plugin_details item))
see-also (:see_also (:plugin_details item))]
(if (seq xref)
(insert-xref (:plugin_id rec) xref))
(if (seq see-also)
(insert-see-also (:plugin_id rec) see-also))
(db/upsert-nessus-plugin rec)))
(defn insert-items [host-id items plugin-set]
(if (empty? items)
plugin-set
(let [p (insert-item host-id (first items))]
(if-not (contains? plugin-set p)
(insert-plugin (first items)))
(recur host-id (rest items) (conj plugin-set p)))))
(defn insert-host [scan-id host plugin-set]
(if-let [h-rec (mk-host-rec scan-id host)]
(let [[v err] (db/insert-nessus-host h-rec)
items (:items host)]
(if (nil? err)
(let [host2 (insert-host-patch (:id h-rec) host)]
(log-unprocessed "Host Keys" (keys (util/remove-keys
host2 used-nessus-host-keys)))
(insert-items (:id h-rec) items plugin-set))
plugin-set))
plugin-set))
(defn insert-hosts
([id hosts]
(insert-hosts id hosts #{}))
([id hosts plugins]
(if (empty? hosts)
plugins
(let [plugin-set (insert-host id (first hosts) plugins)]
(recur id (rest hosts) plugin-set)))))
(defn mk-scan-record [id report]
{:id id
:name (:report_name report)
:scan_dt (util/to-sql-date)
:policy (:policy report)
:entered_dt (util/to-sql-date)})
(defn store-report [update-plugins report]
(let [[id err] (db/get-sequence-nextval "nscan_seq")
scan-rec (mk-scan-record id report)]
(if (nil? err)
(let [[v e] (db/insert-nessus-scan scan-rec)]
(if (nil? e)
(if update-plugins
(let [plugin-list (set (first (db/select-nessus-plugin-ids)))]
[(insert-hosts id (:hosts report) plugin-list) nil])
[(insert-hosts id (:hosts report)) nil])
[v e]))
[id err])))
(defn process-nessus-report [update-plugins filename]
(let [report (parse-file filename)]
(println (str "Report: " (:report_name report)
"\nPolicy: " (:policy report)
"\nHost Records: " (count (:hosts report))))
(store-report update-plugins report)))
Magos's answer using tree-seq is perfectly fine, but there's no reason to abandon zippers; filtering using zippers is more succinct and the arguably the "clojure" way. (note this example uses data.xml ([org.clojure/data.xml "0.0.8"]) instead of clojure.xml).
(require '[clojure.data.zip.xml :as zf])
(require '[clojure.zip :as z])
(def ex
"<table>
<column name=\"col1\" type=\"varchar\" length=\"8\"/>
<column name=\"col2\" type=\"varchar\" length=\"16\"/>
<column name=\"col3\" type=\"int\" length=\"16\"/>
</table>")
(let [x (z/xml-zip (clojure.data.xml/parse-str ex))]
(->> (zf/xml-> x :column) ;;equivalent to (->> treeseq ... filter)
flatten
(keep :attrs)
(map vals)))
;>>> (("col1" "varchar" "8") ("col2" "varchar" "16") ("col3" "int" "16"))
But the xml-> macro simply applies functions in order, so you can do the following:
(let [x (z/xml-zip (clojure.data.xml/parse-str ex))]
(->> (zf/xml-> x :column #(keep :attrs %))
(map vals)))
;>>> (("col1" "varchar" "8") ("col2" "varchar" "16") ("col3" "int" "16"))