Closeable lazy-seq in Clojure - clojure

I'm trying to create a lazy-seq which is also closeable. What would be the cleanest way to do that in Clojure?
Intended usage (but it's just one example, I can think of more usages for a closeable lazy sequance):
(with-open [lines (file-lines-seq file)]
(consume (map do-stuff-to-line lines)))
Which in this case would be equivalent to:
(with-open [reader io/reader file]
(consume (map do-stuff-to-line (line-seq file))))

Managed to get the intended usage with this wonderful piece of code:
(defn file-lines-seq [file]
(let [reader (clojure.java.io/reader file)
lines-seq (line-seq reader)]
(reify
Closeable
(close [this] (.close reader))
ISeq
(first [this] (.first lines-seq))
(next [this] (.next lines-seq))
(more [this] (.more lines-seq))
(cons [this var1] (.cons lines-seq var1))
(count [this] (.count lines-seq))
(empty [this] (.empty lines-seq))
(equiv [this var1] (.equiv lines-seq var1))
(seq [this] (.seq lines-seq))
)))
If there's a less ugly way to do this, please let me know.

Related

clojure cannot define same function in different namespace? not possible

i have one file "map_reduce2.clj" and another "map_reduce3.clj", the both defin function "map-reduce" themself.
now i want to use namespace of "map_reduce2.clj" in "map_reduce3.clj", but when i press "C-c C-k" in emacs to compile the "map_reduce3.clj",
error happens: "parse-line already refers to: #'chapter12.map-reduce2/parse-line in
namespace: chapter12.map-reduce3" , but this doesn't make any sense.
; map_reduce3.cli
(ns chapter12.map-reduce3
(:use clojure.java.io)
(:require [chapter12.map-reduce2 :as c12]))
(def IGNORE "_")
(defn parse-line [line]
(let [tokens (.split (.toLowerCase line) " ")]
[[IGNORE (count tokens)]]))
(defn average [numbers]
(/ (apply + numbers)
(count numbers)))
(defn reducer [combined]
(average (val (first combined))))
(defn average-line-length [filename]
(c12/map-reduce parse-line reducer (line-seq (reader filename))))
; map_reduce2.clj
(ns chapter12.map-reduce2
(:use clojure.java.io))
(defn combine [mapped]
(->> (apply concat mapped)
(group-by first)
(map (fn [[k v]]
{k (map second v)}))
(apply merge-with conj)))
(defn map-reduce [mapper reducer args-seq]
(->> (map mapper args-seq)
(combine)
(reducer)))
(defn parse-line [line]
(let [tokens (.split (.toLowerCase line) " ")]
(map #(vector % 1) tokens)))
(defn sum [[k v]]
{k (apply + v)})
(defn reduce-parsed-lines [collected-values]
(apply merge (map sum collected-values)))
(defn word-frequency [filename]
(map-reduce parse-line reduce-parsed-lines (line-seq (reader filename))))
images of the the error
This probably means you have dirty REPL state. Maybe you moved the function parse-line from one namespace to the other. I suggest restarting the REPL, or unload parse-line from map-reduce3: How to unload a function from another namespace?.

Where is my Null Pointer Exception - Clojure edition

I am getting the following stacktrace when running the command: lein run "this is the other different thing" "this,different,other"
Stacktrace
Exception in thread "main" java.lang.NullPointerException, compiling:(/private/var/folders/y8/6lt_81xn47d4n2k641z52rg00000gn/T/form-init8328218573408236617.clj:1:125)
at clojure.lang.Compiler.load(Compiler.java:7391)
at clojure.lang.Compiler.loadFile(Compiler.java:7317)
at clojure.main$load_script.invokeStatic(main.clj:275)
at clojure.main$init_opt.invokeStatic(main.clj:277)
at clojure.main$init_opt.invoke(main.clj:277)
at clojure.main$initialize.invokeStatic(main.clj:308)
at clojure.main$null_opt.invokeStatic(main.clj:342)
at clojure.main$null_opt.invoke(main.clj:339)
at clojure.main$main.invokeStatic(main.clj:421)
at clojure.main$main.doInvoke(main.clj:384)
at clojure.lang.RestFn.invoke(RestFn.java:421)
at clojure.lang.Var.invoke(Var.java:383)
at clojure.lang.AFn.applyToHelper(AFn.java:156)
at clojure.lang.Var.applyTo(Var.java:700)
at clojure.main.main(main.java:37)
Caused by: java.lang.NullPointerException
at clojure.string$replace.invokeStatic(string.clj:101)
at clojure.string$replace.invoke(string.clj:75)
at redact.core$redact_doc.invokeStatic(core.clj:12)
at redact.core$redact_doc.invoke(core.clj:7)
at redact.core$_main.invokeStatic(core.clj:54)
at redact.core$_main.doInvoke(core.clj:50)
at clojure.lang.RestFn.invoke(RestFn.java:421)
at clojure.lang.Var.invoke(Var.java:383)
at user$eval5.invokeStatic(form-init8328218573408236617.clj:1)
at user$eval5.invoke(form-init8328218573408236617.clj:1)
at clojure.lang.Compiler.eval(Compiler.java:6927)
at clojure.lang.Compiler.eval(Compiler.java:6917)
at clojure.lang.Compiler.load(Compiler.java:7379)
... 14 more
And here is my code:
(ns redact.core
(:gen-class)
(:require [clojure.java.io :as io]
[clojure.string :as str]
))
(defn redact-doc
;; Reads the file line by line and redacts all the matched words
([target stoplist]
(if (empty? stoplist)
(str/trim target)
(redact-doc (str/replace target (re-pattern (str "\\s(" (first stoplist) ")(\\s|$)")) " REDACTED ") (rest stoplist))))
)
(defn get-target-text
;; Takes a vector of args and returns a String of a text file or and sentances
([args] (get-target-text args ""))
([args result]
(if (empty? args)
result
(get-target-text (rest args) (if (boolean (re-find #"(.+\.[^csv\s])" (first args)))
(str result (slurp (first args)))
(if (not (boolean (re-find #"(.+\.csv|.+,.+)" (first args))))
(if (boolean (re-find #"\s" (str/trim (first args))))
(str result (first args) " ")))))))
)
(defn read-csv
;; Takes in a filename and returns a vector of the csv values
[file-name]
(str/split (with-open [rdr (io/reader file-name)]
(doall (reduce str (line-seq rdr)))) #","))
(defn gen-stoplist
;; Generates the stoplist for words to be redacted
([args] (gen-stoplist args []))
([args stoplist]
(if (empty? args)
stoplist
(gen-stoplist (rest args) (if (boolean (re-find #"(.+\.csv)" (first args)))
(into [] (concat stoplist (read-csv (first args))))
(if (boolean (re-find #"(.+\..[^csv\s])" (first args)))
stoplist
(if (boolean (re-find #"(.*,.*)" (first args)))
(into [] (concat stoplist (str/split (first args) #",")))
(if (boolean (re-find #"(\s)" (str/trim (first args))))
stoplist
(into [] (concat stoplist [(first args)] ))))))))))
(defn -main
([& args]
(def stoplist (gen-stoplist args))
(def target-text (get-target-text args))
(println (redact-doc target-text stoplist)))
)
I have been staring at this trying to figure out what is causing the issue. I have tested all of the methods independently on the REPL and they all seem to work but the (-main) method is throwing a null pointer exception on the str/replace call....just not sure why. Any help you can give is much appreciated. Thanks!
There is a fair bit about your code which is not really correct. My guess is
that our getting that call because your calling a function which is expecting a
value and is getting a nil passed in - my guess would be one of the string
functions.
your function definitions are not quite right. If your function only has a
single 'signature' then you don't need the additional brackets. You should also
use let bindings inside rather than def. e.g.
(defn -main
[& args]
(let [stoplist (gen-stoplist args)
target-text (get-target-text args))]
(println (redact-doc target-text stoplist)))
Your code is not passing what you think to gen-stoplist or get-target-text. I
suspect the null pointer is because of the call to str/trim being passed a nil
rather than a string.
My suggestion would be to open a repl and interact with it using some println in
your functions to look at what is getting parsed in.

How to parse xml and get an vector for some attributes on an element

I know how to extract one attribute using zip-xml/attr, but how to extract multiple attributes?
e.g I have the following
<table>
<column name="col1" type="varchar" length="8"/>
<column name="col2" type="varchar" length="16"/>
<column name="col3" type="int" length="16"/>
<table>
And the expected result is. A silly way is to call zip-xml/attr for each attribute, but is there any elegant way to do that?
[["co11" "varchar" 8] [["co12" "varchar" 16] [["co13" "int" 16]
My advice is to use a tree-walking function to extract the interesting data from the XML tree. clojure.walk has several of these, but here I use tree-seq from core clojure to just produce a seq of nodes and work on that. This function takes two functions - a branch? predicate which checks if a node can have children and a children function which gets them. I use :content for both, as tags with no nested tags produce nil, which is a falsey value and so it works also as a predicate.
(->> (clojure.xml/parse "res/doc.xml") ;;source file for your xml
(tree-seq :content :content) ;; Produce a seq by walking the tree
(filter #(= :column (:tag %))) ;;Take only :column tags
(mapv (comp vec vals :attrs)))
;;Collect the values of the :attrs maps into vectors
;;and collect those into a vector with mapv
Your desired output had unmatched square brackets, but I assume it should be like
[["col1" "varchar" "8"] ["col2" "varchar" "16"] ["col3" "int" "16"]]
which was my return value. However, this is potentially brittle - you're relying on the maps returned by clojure.xml/parse preserving the ordering of the attributes in the XML in order to know what the data means. That's not really part of the contract of maps. As an implementation detail it creates clojure.lang.PersistentStructMaps which apparently do have this feature, but it might not always be so.
Alternatively you could use just (mapv :attrs) to keep the whole of the map in there.
The right solution depends on how large and complex the XML is and to some extent, what you know about its structure. If it needs to be very generic, then you need to have quite a lot of logic to navigate the nodes etc. However, if it is a known format and you know what nodes you are interested in, its pretty straight-forward.
I used clojure.zip to create a zipper from the XML file and then use clojure.data.zip.xml to extract the nodes/paths I was interested in. I then defined simple helper functions to process specific nodes. This was pretty much my first bit of clojure and I've not yet gone back to it to re-factor it and refine/clarify some of my very rough clojure idioms based on what I've learnt since, but in the spirit of an example being worth 1000 words, here it is -
(ns arcis.models.nessus
(:use [taoensso.timbre :only [trace debug info warn error fatal]])
(:require [arcis.util :as util]
[arcis.models.db :as db]
[clojure.java.io :as io]
[clojure.xml :as xml]
[clojure.zip :as zip]
[clojure.data.zip.xml :as zx]))
(def nessus-host-keys [:hostname :host_fqdn
:system_type :operating_system
:operating_system_unsupported])
(def used-nessus-host-keys (conj nessus-host-keys
:host_start :host_end
:items :traceroute_hop_0 :traceroute_hop_1
:traceroute_hop_2 :traceroute_hop_3
:traceroute_hop_4 :traceroute_hop_5
:traceroute_hop_6 :traceroute_hop_7
:traceroute_hop_8 :traceroute_hop_9
:traceroute_hop_10 :traceroute_hop_11
:traceroute_hop_12 :traceroute_hop_13
:traceroute_hop_14 :traceroute_hop_15
:traceroute_hop_16 :traceroute_hop_17
:host_ip :patch_summary_total_cves
:cpe_0 :cpe_1 :cpe_2 :cpe_3 :cpe_4 :cpe_5
:cpe_6 :cpe_7 :cpe_8 :cpe_9))
(def nessus-item-keys [:port :svc_name :protocol :severity :plugin_id
:plugin_output])
(def used-nessus-item-keys (conj nessus-item-keys
:plugin_details
:plugin_name
:plugin_family))
(def nessus-plugin-keys [:plugin_id :plugin_name :plugin_family :fname
:script_version :plugin_type :exploitability_ease
:vuln_publication_date :cvss_temporal_data
:solution :cvss_temporal_score :risk_factor
:description :cvss_vector :synopsis
:patch_publication_date :exploit_available
:plugin_publication_date :plugin_modification_date
:cve :bid :exploit_framework_canvas :edb_id
:exploit_framework_metasploit :exploit_framework_core
:metasploit_name :canvas_package :osvdb :cwe
:cvss_temporal_vector :cvss_base_score :cpe
:exploited_by_malware])
(def used-nessus-plugin-keys (conj nessus-plugin-keys
:xref :see_also :cert
:attachment :iava :stig_severity :hp
:secunia :iawb :msft))
(def show-unprocessed true)
(defn log-unprocessed [title vls]
(if (and show-unprocessed
(seq vls))
(println (str "Unprocessed " title ": " vls))))
;;; parse nessus report
(defn parse-xref [xref]
{:xref (first (:content xref))})
(defn parse-see-also [see-also]
{:see_also (first (:content see-also))})
(defn parse-plugin [plugin]
{(util/db-keyword (name (:tag plugin))) (first (:content plugin))})
(defn parse-contents [cont]
(let [xref (mapv parse-xref (filter #(= (:tag %) :xref) cont))
see-also (mapv parse-see-also (filter #(= (:tag %) :see-also) cont))
details (reduce merge {}
(map parse-plugin
(remove #(or (= (:tag %) :xref)
(= (:tag %) :see-also)) cont)))]
(assoc details
:see_also see-also
:xref xref)))
(defn fix-item-keywords [item]
(let [ks (keys item)]
(into {}
(for [k ks]
[(util/db-keyword (name k))
(k item)]))))
(defn parse-item [item]
(let [attrs (fix-item-keywords (:attrs item))
contents (parse-contents (:content item))]
(assoc attrs
:plugin_output (:plugin_output contents)
:plugin_details (assoc (dissoc contents :plugin_output)
:plugin_id (:plugin_id attrs)
:plugin_family (:plugin_family attrs)))))
(defn parse-properties [props]
(into {}
(for [p props]
[(util/db-keyword (:name (:attrs p)))
(first (:content p))])))
(defn parse-host [h]
(let [items (map first (zx/xml-> h :ReportItem))
properties (:content (first (zx/xml1-> h :HostProperties)))]
(assoc (parse-properties properties)
:hostname (zx/attr h :name)
:items (mapv parse-item items))))
(defn parse-hosts [hosts]
(mapv parse-host hosts))
(defn parse-file [f]
(let [root (zip/xml-zip (xml/parse (io/file f)))
report-xml (zx/xml1-> root :Report)
hosts (zx/xml-> report-xml :ReportHost)]
{:report_name (zx/attr report-xml :name)
:policy (zx/text (zx/xml1-> root :Policy :policyName))
:hosts (parse-hosts hosts)}))
;;; insert nessus records into db
(defn mk-host-rec [scan-id host]
(let [[id err] (db/get-sequence-nextval "host_seq")]
(if (nil? err)
(assoc (util/build-map host nessus-host-keys)
:ipv4 (:host_ip host)
:scan_start (util/from-nessus-date (:scan_start host))
:scan_end (util/from-nessus-date (:scan_end host))
:total_cves (:patch_summary_total_cves host)
:id id
:scan_id scan-id)
nil)))
(defn insert-patches [p]
(when (seq p)
(db/insert-nessus-host-patch (first p))
(recur (rest p))))
(defn insert-host-patch [id host]
(let [p-keys (filter #(re-find #"patch_summary_*" %) (map name (keys host)))
recs (map (fn [s]
{:id (first (db/get-sequence-nextval "patch_seq"))
:host_id id
:summary ((keyword (str "patch_summary_txt_" s)) host)
:cve_num ((keyword (str "patch_summary_cve_num_" s)) host)
:cves ((keyword (str "patch_summary_cves_" s)) host)})
(filter seq
(map #(second (re-find #"patch_summary_txt_(.*)" %))
p-keys)))]
(insert-patches recs)
(util/remove-keys host (map keyword p-keys))))
(defn mk-item-rec [host-id item]
(let [[id err] (db/get-sequence-nextval "item_seq")]
(assoc (util/build-map item nessus-item-keys)
:host_id host-id
:id id)))
(defn insert-item [host-id item]
(let [rec (mk-item-rec host-id item)
not-done (keys (util/remove-keys item used-nessus-item-keys))]
(log-unprocessed "Item Keys" not-done)
(db/insert-nessus-report-item rec)
(:plugin_id item)))
(defn mk-plugin-rec [item]
(let [rec (util/build-map (:plugin_details item) nessus-plugin-keys)
not-used (keys (util/remove-keys (:plugin_details item)
used-nessus-plugin-keys))]
(log-unprocessed "Plugin Keys" not-used)
(assoc rec
:vuln_publication_date (util/from-nessus-date
(:vuln_publication_date rec))
:patch_publication_date (util/from-nessus-date
(:patch_publication_date rec))
:plugin_publication_date (util/from-nessus-date
(:plugin_publication_date rec))
:plugin_modification_date (util/from-nessus-date
(:plugin_modificaiton_date rec)))))
(defn insert-xref [plugin-id xrefs]
(when (seq xrefs)
(let [xref {:id (first (db/get-sequence-nextval "xref_seq"))
:plugin_id plugin-id
:xref (:xref (first xrefs))}]
(db/insert-nessus-xref xref)
(recur plugin-id (rest xrefs)))))
(defn insert-see-also [plugin-id see-also]
(when (seq see-also)
(let [sa {:id (first (db/get-sequence-nextval "ref_seq"))
:plugin_id plugin-id
:reference (:see_also (first see-also))}]
(db/insert-nessus-ref sa)
(recur plugin-id (rest see-also)))))
(defn insert-plugin [item]
(let [rec (mk-plugin-rec item)
xref (:xref (:plugin_details item))
see-also (:see_also (:plugin_details item))]
(if (seq xref)
(insert-xref (:plugin_id rec) xref))
(if (seq see-also)
(insert-see-also (:plugin_id rec) see-also))
(db/upsert-nessus-plugin rec)))
(defn insert-items [host-id items plugin-set]
(if (empty? items)
plugin-set
(let [p (insert-item host-id (first items))]
(if-not (contains? plugin-set p)
(insert-plugin (first items)))
(recur host-id (rest items) (conj plugin-set p)))))
(defn insert-host [scan-id host plugin-set]
(if-let [h-rec (mk-host-rec scan-id host)]
(let [[v err] (db/insert-nessus-host h-rec)
items (:items host)]
(if (nil? err)
(let [host2 (insert-host-patch (:id h-rec) host)]
(log-unprocessed "Host Keys" (keys (util/remove-keys
host2 used-nessus-host-keys)))
(insert-items (:id h-rec) items plugin-set))
plugin-set))
plugin-set))
(defn insert-hosts
([id hosts]
(insert-hosts id hosts #{}))
([id hosts plugins]
(if (empty? hosts)
plugins
(let [plugin-set (insert-host id (first hosts) plugins)]
(recur id (rest hosts) plugin-set)))))
(defn mk-scan-record [id report]
{:id id
:name (:report_name report)
:scan_dt (util/to-sql-date)
:policy (:policy report)
:entered_dt (util/to-sql-date)})
(defn store-report [update-plugins report]
(let [[id err] (db/get-sequence-nextval "nscan_seq")
scan-rec (mk-scan-record id report)]
(if (nil? err)
(let [[v e] (db/insert-nessus-scan scan-rec)]
(if (nil? e)
(if update-plugins
(let [plugin-list (set (first (db/select-nessus-plugin-ids)))]
[(insert-hosts id (:hosts report) plugin-list) nil])
[(insert-hosts id (:hosts report)) nil])
[v e]))
[id err])))
(defn process-nessus-report [update-plugins filename]
(let [report (parse-file filename)]
(println (str "Report: " (:report_name report)
"\nPolicy: " (:policy report)
"\nHost Records: " (count (:hosts report))))
(store-report update-plugins report)))
Magos's answer using tree-seq is perfectly fine, but there's no reason to abandon zippers; filtering using zippers is more succinct and the arguably the "clojure" way. (note this example uses data.xml ([org.clojure/data.xml "0.0.8"]) instead of clojure.xml).
(require '[clojure.data.zip.xml :as zf])
(require '[clojure.zip :as z])
(def ex
"<table>
<column name=\"col1\" type=\"varchar\" length=\"8\"/>
<column name=\"col2\" type=\"varchar\" length=\"16\"/>
<column name=\"col3\" type=\"int\" length=\"16\"/>
</table>")
(let [x (z/xml-zip (clojure.data.xml/parse-str ex))]
(->> (zf/xml-> x :column) ;;equivalent to (->> treeseq ... filter)
flatten
(keep :attrs)
(map vals)))
;>>> (("col1" "varchar" "8") ("col2" "varchar" "16") ("col3" "int" "16"))
But the xml-> macro simply applies functions in order, so you can do the following:
(let [x (z/xml-zip (clojure.data.xml/parse-str ex))]
(->> (zf/xml-> x :column #(keep :attrs %))
(map vals)))
;>>> (("col1" "varchar" "8") ("col2" "varchar" "16") ("col3" "int" "16"))

macro always throw "UnmatchedDelimiter" if given anon function

I wrote a macro to handle http response
(defmacro defhandler
[name & args]
(let [[docstring args] (if (string? (first args))
[(first args) (next args)]
[nil args])
args (apply hash-map :execute-if true (vec args))]
`(do
(def ~name
(with-meta (fn [scope# promise#]
(let [e# (:execute-if ~args)
ei# (if (fn? e#)
(e# scope#)
(boolean e#))]
(when ei#
(.then promise# (fn [result#]
(let [{:strs [http-status# value#]} result#
the-func# ((keyword http-status#) ~args)]
(the-func# scope# value#))))))) {:structure ~args}))
(alter-meta! (var ~name) assoc :doc ~docstring))))
So I can do
(defhandler my-handler
:200 (fn [$scope value] (set! (.-content $scope) value)))
But that throws "UnmatchedDelimiter" at line 1, but if I try with a named function:
(defn my-func [$scope value] (set! (.-content $scope) value))
(defhandler my-handler
:200 my-func)
It works ok. I'm just curious, is that a normal behaviour?
That is not the behavior I see when I try your example, nor does it seem very likely. I suggest checking that the forms you pasted here are exactly the ones that produce an error; I suspect your actual anonymous function included one too many )s.

Threadsafe pop in clojure?

I've found this code on http://www.learningclojure.com/2010/11/yet-another-way-to-write-factorial.html, but I don't understand if/how the pop-task is supposed to be threadsafe. Doesn't it allow to return twice the same head ?
(def to-do-list (atom '()))
(defn add-task![t] (swap! to-do-list #(cons t %)))
(defn pop-task![] (let [h (first #to-do-list)] (swap! to-do-list rest) h))
If so, is it possible to keep using atom and write the peek and swap! atomically, or is this a job for the ref mechanism ?
Or you drop to a lower level.
(def to-do-list (atom nil))
(defn add-task!
[t]
(swap! to-do-list conj t))
(defn pop-task!
[]
(let [[h & r :as l] #to-do-list]
(if (compare-and-set! to-do-list l r)
h
(recur))))
Yeah, that code isn't thread safe. You can make it thread-safe by taking advantage of the fact that swap! returns the new value of the atom, which implies you need to combine the queue with the "popped" value.
(def to-do-list
(atom {}))
(defn add-task!
[t]
(swap! to-do-list
(fn [tl]
{:queue (cons t (:queue tl))})))
(defn pop-task!
[]
(let [tl (swap! to-do-list
(fn [old]
{:val (first (:queue old))
:queue (rest (:queue old))}))]
(:val tl)))