i'm working on scraping data from a webpage with scala regex-es, but i encountered problem with parsing result to object of some case class-es.
In following snippet i managed to scrape all the data, but i have no clue how to parse 3 elements from an iterator. I thought about something like:
val a :: b :: c :: _ = result.group(0).iDontKnowWha
Any ideas what can i do?
import model.FuneralSchedule
import play.api.libs.json.Json
import scala.io.Source
var date = "2015-05-05"
val source = Source.fromURL("http://zck.krakow.pl/?pageId=16&date=" + date).mkString
val regex = "(?s)<table>.+?(Cmentarz.+?)<.+?</table>".r
var thing: List[FuneralSchedule] = List()
var jsonFeed: List[Funeral] = List()
val regMatcher = "("
case class Funeral(hour: String, who: String, age: String) {
override def toString: String = {
"Cos"
}
}
//implicit val format = Json.format[Funeral]
val out = regex.findAllIn(source).matchData foreach { table =>
thing ::= FuneralSchedule(table.group(1), clearStrings(table.group(0)))
"""<tr\s?>.+?</\s?tr>""".r.findAllIn(clearStrings(table.group(0))).matchData foreach { tr =>
//TODO: Naprawic bo szlak trafia wydajnosc
val temp = """<td\s?>.+?</\s?td>""".r.findAllIn(tr.group(0)).matchData.foreach {
elem => println(elem)
}
//println(Json.toJson(thingy))
}
println("Koniec tabeli")
}
thing
//Json.toJson(jsonFeed)
println(removeMarkers("<td > <td> Marian Debil </ td>"))
def removeMarkers(s: String) = {
s.replaceAll( """(</?\s?td\s?>)""", "")
}
def clearStrings(s: String) = {
val regex = "((class=\".+?\")|(id=\".+?\")|(style=\".+?\")|(\\n))"
s.replaceAll(regex, "")
}
One way of doing it would be converting it to a Stream and matching it using stream's operators like this:
val a #:: b #:: c #:: _ = """([a-z]){1}""".r.findAllIn("a b c").toStream
then a, b and c is what you're looking for
Related
I added an Int attribute in my class named isAlsoActor but it doesn't get saved in DynamoDB. Here is the class I'm trying to save in Dynamo:
package me.brunosantana.dto
import com.fasterxml.jackson.annotation.JsonInclude
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbAttribute
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbBean
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbIgnore
import software.amazon.awssdk.services.dynamodb.model.AttributeValue
private const val TYPE = "ARTIST"
#DynamoDbBean
//#JsonInclude(JsonInclude.Include.NON_EMPTY)
data class Artist(
#get:DynamoDbAttribute("ArtistName")
var name: String,
#get:DynamoDbAttribute("Nationality")
var nationality: String,
#get:DynamoDbAttribute(value = "IsAwardWinner")
var isAwardWinner: Boolean,
#get:DynamoDbAttribute(value = "IsAlsoActor")
var isAlsoActor: Int,
#get:DynamoDbIgnore
val songs: MutableList<Song> = mutableListOf()
): DynamoBaseModel(
pkType = "artist",
pkId = name.lowercase().replace(" ", "_"),
sk = "artist#${name.lowercase().replace(" ", "_")}",
gsi1pk = "type#artist",
gsi1sk = "type#artist",
) {
constructor(
name: String,
nationality: String,
isAwardWinner: Boolean,
isAlsoActor: Int
) :
this(
name = name,
nationality = nationality,
isAwardWinner = isAwardWinner,
isAlsoActor = isAlsoActor,
songs = mutableListOf()
)
#Deprecated(message = "Intended to be used only by AWS SDK")
constructor() :
this(
name = "",
nationality = "",
isAwardWinner = false,
isAlsoActor = 0,
songs = mutableListOf()
)
#DynamoDbAttribute("Type")
fun getType(): String {
return TYPE
}
fun setType(type: String) {
// Do nothing, this setter is just to make the AWS SDK 2.x happy
}
fun addAllSongs(songs: MutableList<Song>){
this.songs.addAll(songs)
}
companion object {
fun attributeMapToArtist(attributeMap: Map<String, AttributeValue>): Artist {
val name = attributeMap["ArtistName"]!!.s()
val nationality = attributeMap["Nationality"]!!.s()
val isAwardWinner = attributeMap["IsAwardWinner"]?.bool() ?: false
val isAlsoActor = attributeMap["IsAlsoActor"]?.n()?.toInt() ?: 0
val versionTimestamp = attributeMap["VersionTimestamp"]?.s()
val artist = Artist(
name = name,
nationality = nationality,
isAwardWinner = isAwardWinner,
isAlsoActor = isAlsoActor)
artist.versionTimestamp = versionTimestamp
return artist
}
}
}
Here is the code responsible for saving it in Dynamo (it's using putItem):
fun saveModel(model: DynamoBaseModel){
val pk = "${model.pkType}#${model.pkId}"
val existingModel = findByPkAndSk(pk, model.sk)
val existingVersion = existingModel?.versionTimestamp
val incomingVersion = model.versionTimestamp!!
try {
when(model){
is Artist -> {
save(model, incomingVersion, existingVersion, true)
}
is Song -> {
save(model, incomingVersion, existingVersion, true)
}
}
}catch (e: DynamoDbException){
e.printStackTrace()
}
}
fun findByPkAndSk(pk: String, sk: String): DynamoBaseModel? {
val tableName = "music"
val pkAttribute = AttributeValue.builder().s(pk).build()
val skAttribute = AttributeValue.builder().s(sk).build()
val queryReq = QueryRequest.builder()
.tableName(tableName)
.consistentRead(false)
.keyConditionExpression("pk = :pk and sk = :sk")
.expressionAttributeValues(mapOf(":pk" to pkAttribute, ":sk" to skAttribute))
.build()
try {
val queryResponse: QueryResponse = client.query(queryReq)
queryResponse.items().firstOrNull {
return when(it["Type"]!!.s()) {
"ARTIST" -> Artist.attributeMapToArtist(it)
"SONG" -> Song.attributeMapToSong(it)
else -> throw Exception("Not found")
}
}
} catch (e: DynamoDbException) {
System.err.println(e.message)
}
return null
}
private final inline fun <reified T> save(model: T, incomingVersion: String, existingVersion: String?, versioningCheck: Boolean){
println("incoming: $incomingVersion existing: $existingVersion")
val musicTable: DynamoDbTable<T> =
enhancedClient.table("music", TableSchema.fromBean(T::class.java))
if(versioningCheck){
if(existingVersion == null){
println("no existing version")
musicTable.putItem(model)
}else{
val incomingDate = DateUtils.convertStringToZonedDateTime(incomingVersion)
val existingDate = DateUtils.convertStringToZonedDateTime(existingVersion)
if(DateUtils.isIncomingDateNewer(incomingDate, existingDate)){
println("override")
musicTable.putItem(model) //check how to override properly
}else{
println("Skip. $incomingVersion is older than $existingVersion")
}
}
}else{
println("check disabled")
musicTable.putItem(model)
}
}
Does anyone have any idea why it's not saving the Int attribute? It saves the item in the table, but the Int attribute does not appear there.
Thank you.
I need to return all list items, in forEach it works fine, outside the loop it only returns the last item.
fun scanAndConvertFile(): String {
val scanner = Scanner(System.`in`)
print("Enter path to file to convert: ")
val fileName: String = scanner.nextLine()
val bufferedReader: BufferedReader = File(fileName).bufferedReader()
var result = bufferedReader.use { it.readText() }
val header = result.substring(0, result.indexOf(":61:"))
val body = result.substring(result.indexOf(":61:"), result.lastIndexOf(":61:220131C6"))
val footer = result.substring(result.lastIndexOf(":61:220131C6"), result.length)
var list = body.split(":61:")
list = list.filter { it.isNotEmpty() }
list = list.map {
":61:$it"
}
list.forEach() {
val part1 = it.substring(0, it.indexOf("?20"))
var part2ToBePasted = it.substring(it.indexOf("?20"), it.indexOf("?00"))
part2ToBePasted = part2ToBePasted.drop(3)
val part3 = it.substring(it.indexOf("?00"), it.indexOf("?27"))
var part4ToPast = it.substring(it.indexOf("?27"), it.indexOf("?28"))
part4ToPast = part4ToPast.drop(3)
val part5 = it.substring(it.indexOf("?28"), it.length)
list = if(part4ToPast.equals("")) {
listOf(part1.plus("?20").plus(part2ToBePasted).plus(part3).plus("?27").plus(part4ToPast).plus(part5))
} else {
listOf(part1.plus("?20").plus(part4ToPast).plus(part3).plus("?27").plus(part4ToPast).plus(part5))
}
// println(list) - works good
}
val converted = header.plus(list).plus(footer)
// println(converted) - print only last element of list
return converted
}
I tried to clean up your code a little (with no guarantee of course since I do not have any test data):
fun scanAndConvertFile(): String {
print("Enter path to file to convert: ")
val fileName: String = Scanner(System.`in`).nextLine()
val bufferedReader: BufferedReader = File(fileName).bufferedReader()
val result = bufferedReader.use { it.readText() }
val header = result.substring(0, result.indexOf(":61:"))
val footer = result.substring(result.lastIndexOf(":61:220131C6"), result.length)
val list = result
.substring(result.indexOf(":61:"), result.lastIndexOf(":61:220131C6"))
.split(":61:")
.filter { it.isNotEmpty() }
.map { ":61:$it" }
.map {
val indexOf00 = it.indexOf("?00")
val indexOf20 = it.indexOf("?20")
val indexOf27 = it.indexOf("?27")
val indexOf28 = it.indexOf("?28")
val substring27to28 = it.substring(indexOf27, indexOf28).drop(3)
it.substring(0, indexOf20)
.plus("?20")
.plus(if (substring27to28 == "") it.substring(indexOf20, indexOf00).drop(3) else substring27to28)
.plus(it.substring(indexOf00, indexOf27))
.plus("?27")
.plus(substring27to28)
.plus(it.substring(indexOf28, it.length))
}
return header.plus(list).plus(footer)
}
Basically you need to use map instead of forEach to return a list. map is used to transform each element of a list, while with forEach you do something to or with each element, but no list is returned.
This is my java code,
public void test( List<Map> courses)
{
....
List<Map> data = (List<Map>) response.getBody();
courses.addAll(data);
pageNo = pagination(response.getHeaders());
if(pageNo!=null)
{
params.put("pageNo", pageNo);
pAccountCourses(params, courses);
}
}
How to convert it into scala List[AccountCourses] , so that i can add courseList into accountCourseslist
def test(courseList: java.util.ArrayList[AccountCourses]) {
......
//getting json data
var pageNo: String = null
val body = response.body
val json = parse(body)
var accountCourseslist = json.extract[java.util.ArrayList[AccountCourses]]
accountCourseslist.addAll(courseList)
if (pageNo != null) {
params.put("pageNo", pageNo);
test(accountCourseslist);
}
}
case class AccountCourses(id: Int) //case class
how to perform addAll operation of list in scala?
How to convert java.util.ArrayList[AccountCourses] to scala list?
use scala.collection.JavaConverters._ :
import scala.collection.JavaConverters._
val javaList = new java.util.ArrayList[Int]()
val scalaList = javaList.asScala
val scalaImmutableList = scalaList.toList // will return immutable copy
How to perform addAll operation of list in scala?
Use ++= if it is mutable collection or ++ on immutable list:
scalaList ++= List(1,2,3,4,5) // will also update javaList
val result = scalaImmutableList ++ List(1,2,3,4,5) // will return new copy
Pattern (def.+?}) matches the first Scala method :
object defvaltest {
println("Welcome to the Scala worksheet") //> Welcome to the Scala worksheet
val str = "object t extends App { def one = { } def two = { } //Examples one two }"
//> str : String = object t extends App { def one = { } def two = { } //Example
//| s one two }
val Pattern = "(def.+?})".r //> Pattern : scala.util.matching.Regex = (def.+?})
Pattern.findFirstIn(str).get //> res0: String = def one = { }
}
How to match all Scala methods as List[String]
So instead of
res0: String = def one = { }
return
res0: List[String] = List("def one = { }" , "def two = { }")
You are looking for just one match with findFirstIn. To look for multiple matches, you need findAllIn:
val str = "object t extends App { def one = { } def two = { } //Examples one two }"
val Pattern = "(def.+?})".r
val res = Pattern.findAllIn(str)
res.foreach {m =>
println(m)
}
Output of the demo:
def one = { }
def two = { }
First off, I'm new to Scala.
I'm trying to make a template parser in Scala (similar to Smarty (PHP)). It needs to search through the document, replacing anything inside "{{ }}" tags, with anything provided in the HashMap.
I'm currently stuck here:
import scala.collection.mutable.HashMap
import scala.io.Source
class Template(filename: String, vars: HashMap[Symbol, Any]) {
def parse() = {
var contents = Source.fromFile(filename, "ASCII").mkString
var rule = """\{\{(.*)\}\}""".r
//for(rule(v) <- rule findAllIn contents) {
// yield v
//}
//rule.replaceAllIn(contents, )
}
}
var t = new Template("FILENAME", new HashMap[Symbol, Any])
println(t.parse)
The part's that I've commented are things that I've thought about doing.
Thanks
I've come a little further...
import scala.collection.mutable.HashMap
import scala.io.Source
import java.util.regex.Pattern
import java.util.regex.Matcher
class Template(filename: String, vars: HashMap[Symbol, Any]) {
def findAndReplace(m: Matcher)(callback: String => String):String = {
val sb = new StringBuffer
while (m.find) {
m.appendReplacement(sb, callback(m.group(1)))
}
m.appendTail(sb)
sb.toString
}
def parse() = {
var contents = Source.fromFile(filename, "ASCII").mkString
val m = Pattern.compile("""\{\{(.*)\}\}""").matcher(contents)
findAndReplace(m){ x => x }
}
}
var t = new Template("FILENAME.html", new HashMap[Symbol, Any])
println(t.parse)
At the moment it just currently adds whatever was inside of the tag, back into the document. I'm wondering if there is an easier way of doing a find-and-replace style regexp in Scala?
I'd do it like this (String as key instead of Symbol):
var s : String = input // line, whatever
val regexp = """pattern""".r
while(regexp findFirstIn s != None) {
s = regexp replaceFirstIn (s, vars(regexp.findFirstIn(s).get))
}
If you prefer not using var, go recursive instead of using while. And, of course, a stringbuilder would be more efficient. In that case, I might do the following:
val regexp = """^(.*?)(?:{{(pattern)}})?""".r
for(subs <- regexp findAllIn s)
subs match {
case regexp(prefix, var) => sb.append(prefix); if (var != null) sb.append("{{"+vars(var)+"}}")
case _ => error("Shouldn't happen")
}
That way you keep appending the non-changing part, followed by the next part to be replaced.
There is a flavor of replaceAllIn in util.matching.Regex that accepts a replacer callback. A short example:
import util.matching.Regex
def replaceVars(r: Regex)(getVar: String => String) = {
def replacement(m: Regex.Match) = {
import java.util.regex.Matcher
require(m.groupCount == 1)
Matcher.quoteReplacement( getVar(m group 1) )
}
(s: String) => r.replaceAllIn(s, replacement _)
}
This is how we would use it:
val r = """\{\{([^{}]+)\}\}""".r
val m = Map("FILENAME" -> "aaa.txt",
"ENCODING" -> "UTF-8")
val template = replaceVars(r)( m.withDefaultValue("UNKNOWN") )
println( template("""whatever input contains {{FILENAME}} and
unknown key {{NOVAL}} and {{FILENAME}} again,
and {{ENCODING}}""") )
Note Matcher.quoteReplacement escapes $ characters in the replacement string. Otherwise you may get java.lang.IllegalArgumentException: Illegal group reference, replaceAll and dollar signs. See the blog post on why this may happen.
Here is also interesting way how to do the same using functions compose:
val Regexp = """\{\{([^{}]+)\}\}""".r
val map = Map("VARIABLE1" -> "VALUE1", "VARIABLE2" -> "VALUE2", "VARIABLE3" -> "VALUE3")
val incomingData = "I'm {{VARIABLE1}}. I'm {{VARIABLE2}}. And I'm {{VARIABLE3}}. And also {{VARIABLE1}}"
def replace(incoming: String) = {
def replace(what: String, `with`: String)(where: String) = where.replace(what, `with`)
val composedReplace = Regexp.findAllMatchIn(incoming).map { m => replace(m.matched, map(m.group(1)))(_) }.reduceLeftOption((lf, rf) => lf compose rf).getOrElse(identity[String](_))
composedReplace(incomingData)
}
println(replace(incomingData))
//OUTPUT: I'm VALUE1. I'm VALUE2. And I'm VALUE3. And also VALUE1