Get object of case class from regex match - regex

i'm working on scraping data from a webpage with scala regex-es, but i encountered problem with parsing result to object of some case class-es.
In following snippet i managed to scrape all the data, but i have no clue how to parse 3 elements from an iterator. I thought about something like:
val a :: b :: c :: _ = result.group(0).iDontKnowWha
Any ideas what can i do?
import model.FuneralSchedule
import play.api.libs.json.Json
import scala.io.Source
var date = "2015-05-05"
val source = Source.fromURL("http://zck.krakow.pl/?pageId=16&date=" + date).mkString
val regex = "(?s)<table>.+?(Cmentarz.+?)<.+?</table>".r
var thing: List[FuneralSchedule] = List()
var jsonFeed: List[Funeral] = List()
val regMatcher = "("
case class Funeral(hour: String, who: String, age: String) {
override def toString: String = {
"Cos"
}
}
//implicit val format = Json.format[Funeral]
val out = regex.findAllIn(source).matchData foreach { table =>
thing ::= FuneralSchedule(table.group(1), clearStrings(table.group(0)))
"""<tr\s?>.+?</\s?tr>""".r.findAllIn(clearStrings(table.group(0))).matchData foreach { tr =>
//TODO: Naprawic bo szlak trafia wydajnosc
val temp = """<td\s?>.+?</\s?td>""".r.findAllIn(tr.group(0)).matchData.foreach {
elem => println(elem)
}
//println(Json.toJson(thingy))
}
println("Koniec tabeli")
}
thing
//Json.toJson(jsonFeed)
println(removeMarkers("<td > <td> Marian Debil </ td>"))
def removeMarkers(s: String) = {
s.replaceAll( """(</?\s?td\s?>)""", "")
}
def clearStrings(s: String) = {
val regex = "((class=\".+?\")|(id=\".+?\")|(style=\".+?\")|(\\n))"
s.replaceAll(regex, "")
}

One way of doing it would be converting it to a Stream and matching it using stream's operators like this:
val a #:: b #:: c #:: _ = """([a-z]){1}""".r.findAllIn("a b c").toStream
then a, b and c is what you're looking for

Related

Why Int attributes are not being saved in DynamoDB when using AWS SDK 2.x?

I added an Int attribute in my class named isAlsoActor but it doesn't get saved in DynamoDB. Here is the class I'm trying to save in Dynamo:
package me.brunosantana.dto
import com.fasterxml.jackson.annotation.JsonInclude
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbAttribute
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbBean
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbIgnore
import software.amazon.awssdk.services.dynamodb.model.AttributeValue
private const val TYPE = "ARTIST"
#DynamoDbBean
//#JsonInclude(JsonInclude.Include.NON_EMPTY)
data class Artist(
#get:DynamoDbAttribute("ArtistName")
var name: String,
#get:DynamoDbAttribute("Nationality")
var nationality: String,
#get:DynamoDbAttribute(value = "IsAwardWinner")
var isAwardWinner: Boolean,
#get:DynamoDbAttribute(value = "IsAlsoActor")
var isAlsoActor: Int,
#get:DynamoDbIgnore
val songs: MutableList<Song> = mutableListOf()
): DynamoBaseModel(
pkType = "artist",
pkId = name.lowercase().replace(" ", "_"),
sk = "artist#${name.lowercase().replace(" ", "_")}",
gsi1pk = "type#artist",
gsi1sk = "type#artist",
) {
constructor(
name: String,
nationality: String,
isAwardWinner: Boolean,
isAlsoActor: Int
) :
this(
name = name,
nationality = nationality,
isAwardWinner = isAwardWinner,
isAlsoActor = isAlsoActor,
songs = mutableListOf()
)
#Deprecated(message = "Intended to be used only by AWS SDK")
constructor() :
this(
name = "",
nationality = "",
isAwardWinner = false,
isAlsoActor = 0,
songs = mutableListOf()
)
#DynamoDbAttribute("Type")
fun getType(): String {
return TYPE
}
fun setType(type: String) {
// Do nothing, this setter is just to make the AWS SDK 2.x happy
}
fun addAllSongs(songs: MutableList<Song>){
this.songs.addAll(songs)
}
companion object {
fun attributeMapToArtist(attributeMap: Map<String, AttributeValue>): Artist {
val name = attributeMap["ArtistName"]!!.s()
val nationality = attributeMap["Nationality"]!!.s()
val isAwardWinner = attributeMap["IsAwardWinner"]?.bool() ?: false
val isAlsoActor = attributeMap["IsAlsoActor"]?.n()?.toInt() ?: 0
val versionTimestamp = attributeMap["VersionTimestamp"]?.s()
val artist = Artist(
name = name,
nationality = nationality,
isAwardWinner = isAwardWinner,
isAlsoActor = isAlsoActor)
artist.versionTimestamp = versionTimestamp
return artist
}
}
}
Here is the code responsible for saving it in Dynamo (it's using putItem):
fun saveModel(model: DynamoBaseModel){
val pk = "${model.pkType}#${model.pkId}"
val existingModel = findByPkAndSk(pk, model.sk)
val existingVersion = existingModel?.versionTimestamp
val incomingVersion = model.versionTimestamp!!
try {
when(model){
is Artist -> {
save(model, incomingVersion, existingVersion, true)
}
is Song -> {
save(model, incomingVersion, existingVersion, true)
}
}
}catch (e: DynamoDbException){
e.printStackTrace()
}
}
fun findByPkAndSk(pk: String, sk: String): DynamoBaseModel? {
val tableName = "music"
val pkAttribute = AttributeValue.builder().s(pk).build()
val skAttribute = AttributeValue.builder().s(sk).build()
val queryReq = QueryRequest.builder()
.tableName(tableName)
.consistentRead(false)
.keyConditionExpression("pk = :pk and sk = :sk")
.expressionAttributeValues(mapOf(":pk" to pkAttribute, ":sk" to skAttribute))
.build()
try {
val queryResponse: QueryResponse = client.query(queryReq)
queryResponse.items().firstOrNull {
return when(it["Type"]!!.s()) {
"ARTIST" -> Artist.attributeMapToArtist(it)
"SONG" -> Song.attributeMapToSong(it)
else -> throw Exception("Not found")
}
}
} catch (e: DynamoDbException) {
System.err.println(e.message)
}
return null
}
private final inline fun <reified T> save(model: T, incomingVersion: String, existingVersion: String?, versioningCheck: Boolean){
println("incoming: $incomingVersion existing: $existingVersion")
val musicTable: DynamoDbTable<T> =
enhancedClient.table("music", TableSchema.fromBean(T::class.java))
if(versioningCheck){
if(existingVersion == null){
println("no existing version")
musicTable.putItem(model)
}else{
val incomingDate = DateUtils.convertStringToZonedDateTime(incomingVersion)
val existingDate = DateUtils.convertStringToZonedDateTime(existingVersion)
if(DateUtils.isIncomingDateNewer(incomingDate, existingDate)){
println("override")
musicTable.putItem(model) //check how to override properly
}else{
println("Skip. $incomingVersion is older than $existingVersion")
}
}
}else{
println("check disabled")
musicTable.putItem(model)
}
}
Does anyone have any idea why it's not saving the Int attribute? It saves the item in the table, but the Int attribute does not appear there.
Thank you.

Return list outside ForEach loop return only last item in Kotlin

I need to return all list items, in forEach it works fine, outside the loop it only returns the last item.
fun scanAndConvertFile(): String {
val scanner = Scanner(System.`in`)
print("Enter path to file to convert: ")
val fileName: String = scanner.nextLine()
val bufferedReader: BufferedReader = File(fileName).bufferedReader()
var result = bufferedReader.use { it.readText() }
val header = result.substring(0, result.indexOf(":61:"))
val body = result.substring(result.indexOf(":61:"), result.lastIndexOf(":61:220131C6"))
val footer = result.substring(result.lastIndexOf(":61:220131C6"), result.length)
var list = body.split(":61:")
list = list.filter { it.isNotEmpty() }
list = list.map {
":61:$it"
}
list.forEach() {
val part1 = it.substring(0, it.indexOf("?20"))
var part2ToBePasted = it.substring(it.indexOf("?20"), it.indexOf("?00"))
part2ToBePasted = part2ToBePasted.drop(3)
val part3 = it.substring(it.indexOf("?00"), it.indexOf("?27"))
var part4ToPast = it.substring(it.indexOf("?27"), it.indexOf("?28"))
part4ToPast = part4ToPast.drop(3)
val part5 = it.substring(it.indexOf("?28"), it.length)
list = if(part4ToPast.equals("")) {
listOf(part1.plus("?20").plus(part2ToBePasted).plus(part3).plus("?27").plus(part4ToPast).plus(part5))
} else {
listOf(part1.plus("?20").plus(part4ToPast).plus(part3).plus("?27").plus(part4ToPast).plus(part5))
}
// println(list) - works good
}
val converted = header.plus(list).plus(footer)
// println(converted) - print only last element of list
return converted
}
I tried to clean up your code a little (with no guarantee of course since I do not have any test data):
fun scanAndConvertFile(): String {
print("Enter path to file to convert: ")
val fileName: String = Scanner(System.`in`).nextLine()
val bufferedReader: BufferedReader = File(fileName).bufferedReader()
val result = bufferedReader.use { it.readText() }
val header = result.substring(0, result.indexOf(":61:"))
val footer = result.substring(result.lastIndexOf(":61:220131C6"), result.length)
val list = result
.substring(result.indexOf(":61:"), result.lastIndexOf(":61:220131C6"))
.split(":61:")
.filter { it.isNotEmpty() }
.map { ":61:$it" }
.map {
val indexOf00 = it.indexOf("?00")
val indexOf20 = it.indexOf("?20")
val indexOf27 = it.indexOf("?27")
val indexOf28 = it.indexOf("?28")
val substring27to28 = it.substring(indexOf27, indexOf28).drop(3)
it.substring(0, indexOf20)
.plus("?20")
.plus(if (substring27to28 == "") it.substring(indexOf20, indexOf00).drop(3) else substring27to28)
.plus(it.substring(indexOf00, indexOf27))
.plus("?27")
.plus(substring27to28)
.plus(it.substring(indexOf28, it.length))
}
return header.plus(list).plus(footer)
}
Basically you need to use map instead of forEach to return a list. map is used to transform each element of a list, while with forEach you do something to or with each element, but no list is returned.

scala list addall method

This is my java code,
public void test( List<Map> courses)
{
....
List<Map> data = (List<Map>) response.getBody();
courses.addAll(data);
pageNo = pagination(response.getHeaders());
if(pageNo!=null)
{
params.put("pageNo", pageNo);
pAccountCourses(params, courses);
}
}
How to convert it into scala List[AccountCourses] , so that i can add courseList into accountCourseslist
def test(courseList: java.util.ArrayList[AccountCourses]) {
......
//getting json data
var pageNo: String = null
val body = response.body
val json = parse(body)
var accountCourseslist = json.extract[java.util.ArrayList[AccountCourses]]
accountCourseslist.addAll(courseList)
if (pageNo != null) {
params.put("pageNo", pageNo);
test(accountCourseslist);
}
}
case class AccountCourses(id: Int) //case class
how to perform addAll operation of list in scala?
How to convert java.util.ArrayList[AccountCourses] to scala list?
use scala.collection.JavaConverters._ :
import scala.collection.JavaConverters._
val javaList = new java.util.ArrayList[Int]()
val scalaList = javaList.asScala
val scalaImmutableList = scalaList.toList // will return immutable copy
How to perform addAll operation of list in scala?
Use ++= if it is mutable collection or ++ on immutable list:
scalaList ++= List(1,2,3,4,5) // will also update javaList
val result = scalaImmutableList ++ List(1,2,3,4,5) // will return new copy

Regex to match all '(def.+?})'

Pattern (def.+?}) matches the first Scala method :
object defvaltest {
println("Welcome to the Scala worksheet") //> Welcome to the Scala worksheet
val str = "object t extends App { def one = { } def two = { } //Examples one two }"
//> str : String = object t extends App { def one = { } def two = { } //Example
//| s one two }
val Pattern = "(def.+?})".r //> Pattern : scala.util.matching.Regex = (def.+?})
Pattern.findFirstIn(str).get //> res0: String = def one = { }
}
How to match all Scala methods as List[String]
So instead of
res0: String = def one = { }
return
res0: List[String] = List("def one = { }" , "def two = { }")
You are looking for just one match with findFirstIn. To look for multiple matches, you need findAllIn:
val str = "object t extends App { def one = { } def two = { } //Examples one two }"
val Pattern = "(def.+?})".r
val res = Pattern.findAllIn(str)
res.foreach {m =>
println(m)
}
Output of the demo:
def one = { }
def two = { }

Scala - replaceAllIn

First off, I'm new to Scala.
I'm trying to make a template parser in Scala (similar to Smarty (PHP)). It needs to search through the document, replacing anything inside "{{ }}" tags, with anything provided in the HashMap.
I'm currently stuck here:
import scala.collection.mutable.HashMap
import scala.io.Source
class Template(filename: String, vars: HashMap[Symbol, Any]) {
def parse() = {
var contents = Source.fromFile(filename, "ASCII").mkString
var rule = """\{\{(.*)\}\}""".r
//for(rule(v) <- rule findAllIn contents) {
// yield v
//}
//rule.replaceAllIn(contents, )
}
}
var t = new Template("FILENAME", new HashMap[Symbol, Any])
println(t.parse)
The part's that I've commented are things that I've thought about doing.
Thanks
I've come a little further...
import scala.collection.mutable.HashMap
import scala.io.Source
import java.util.regex.Pattern
import java.util.regex.Matcher
class Template(filename: String, vars: HashMap[Symbol, Any]) {
def findAndReplace(m: Matcher)(callback: String => String):String = {
val sb = new StringBuffer
while (m.find) {
m.appendReplacement(sb, callback(m.group(1)))
}
m.appendTail(sb)
sb.toString
}
def parse() = {
var contents = Source.fromFile(filename, "ASCII").mkString
val m = Pattern.compile("""\{\{(.*)\}\}""").matcher(contents)
findAndReplace(m){ x => x }
}
}
var t = new Template("FILENAME.html", new HashMap[Symbol, Any])
println(t.parse)
At the moment it just currently adds whatever was inside of the tag, back into the document. I'm wondering if there is an easier way of doing a find-and-replace style regexp in Scala?
I'd do it like this (String as key instead of Symbol):
var s : String = input // line, whatever
val regexp = """pattern""".r
while(regexp findFirstIn s != None) {
s = regexp replaceFirstIn (s, vars(regexp.findFirstIn(s).get))
}
If you prefer not using var, go recursive instead of using while. And, of course, a stringbuilder would be more efficient. In that case, I might do the following:
val regexp = """^(.*?)(?:{{(pattern)}})?""".r
for(subs <- regexp findAllIn s)
subs match {
case regexp(prefix, var) => sb.append(prefix); if (var != null) sb.append("{{"+vars(var)+"}}")
case _ => error("Shouldn't happen")
}
That way you keep appending the non-changing part, followed by the next part to be replaced.
There is a flavor of replaceAllIn in util.matching.Regex that accepts a replacer callback. A short example:
import util.matching.Regex
def replaceVars(r: Regex)(getVar: String => String) = {
def replacement(m: Regex.Match) = {
import java.util.regex.Matcher
require(m.groupCount == 1)
Matcher.quoteReplacement( getVar(m group 1) )
}
(s: String) => r.replaceAllIn(s, replacement _)
}
This is how we would use it:
val r = """\{\{([^{}]+)\}\}""".r
val m = Map("FILENAME" -> "aaa.txt",
"ENCODING" -> "UTF-8")
val template = replaceVars(r)( m.withDefaultValue("UNKNOWN") )
println( template("""whatever input contains {{FILENAME}} and
unknown key {{NOVAL}} and {{FILENAME}} again,
and {{ENCODING}}""") )
Note Matcher.quoteReplacement escapes $ characters in the replacement string. Otherwise you may get java.lang.IllegalArgumentException: Illegal group reference, replaceAll and dollar signs. See the blog post on why this may happen.
Here is also interesting way how to do the same using functions compose:
val Regexp = """\{\{([^{}]+)\}\}""".r
val map = Map("VARIABLE1" -> "VALUE1", "VARIABLE2" -> "VALUE2", "VARIABLE3" -> "VALUE3")
val incomingData = "I'm {{VARIABLE1}}. I'm {{VARIABLE2}}. And I'm {{VARIABLE3}}. And also {{VARIABLE1}}"
def replace(incoming: String) = {
def replace(what: String, `with`: String)(where: String) = where.replace(what, `with`)
val composedReplace = Regexp.findAllMatchIn(incoming).map { m => replace(m.matched, map(m.group(1)))(_) }.reduceLeftOption((lf, rf) => lf compose rf).getOrElse(identity[String](_))
composedReplace(incomingData)
}
println(replace(incomingData))
//OUTPUT: I'm VALUE1. I'm VALUE2. And I'm VALUE3. And also VALUE1