I am using Amazon's Textract service for extracting tables, Forms from pdf documnets.
The example provided at Github here is working for single page document only. But as per demo provided by AWS they are able to extract multi page pdf docs as well.
As per documentation we have to call same service for multi pages as well. But it is not working for me.
All the examples provided by them are either in python or java.
I am doing it in dotnet core.
Any help?
Here is my code.
public IActionResult FileExtract(string filename)
{
try
{
string lineText = "";
string wordText = "";
string fieldsText = "";
string fieldsText2 = "";
string tableText = "";
// Extracting file in below code.
var textractAnalysisClient = BuildTextractClient();
var document = PrepareDocument(textractAnalysisClient, "FORMS", filename);
document.Pages.ForEach(page =>
{
page.Lines.ForEach(line =>
{
lineText += "<button class='rawlabel'>" + line.Text + "</button>";
line.Words.ForEach(word =>
{
wordText += word.Text;
});
});
page.Form.Fields.ForEach(f =>
{
fieldsText += "<div><h5>" + f.Key + "</h5><p style='background-color:lightgray;width: 200px;padding: 6px;'>"
+ f.Value + "</p></div>";
});
var key = "Phone Number:";
var field = page.Form.GetFieldByKey(key);
if (field != null)
{
fieldsText2 += "Key: " + field.Key + " | Value: " + field.Value;
}
});
tableText = "<table id='customers'>";
document = PrepareDocument(textractAnalysisClient, "TABLES", filename);
document.Pages.ForEach(page =>
{
page.Tables.ForEach(table =>
{
var r = 0;
table.Rows.ForEach(row =>
{
r++;
tableText += "<tr>";
var c = 0;
row.Cells.ForEach(cell =>
{
c++;
tableText += "<td>";
tableText += cell.Text + "</td>";
});
tableText += "</tr>";
});
});
});
tableText += "</table>";
objJsonResponse.fieldsText = fieldsText;
objJsonResponse.fieldsText2 = fieldsText2;
objJsonResponse.lineText = lineText;
objJsonResponse.tableText = tableText;
objJsonResponse.wordText = wordText;
objJsonResponse.responsecode = 1;
return Json(objJsonResponse);
}
catch (Exception ex)
{
this.objJsonResponse.responsecode = -1;
this.objJsonResponse.error = "failed";
return Json(this.objJsonResponse);
}
}
static TextractTextAnalysisService BuildTextractClient()
{
var builder = new ConfigurationBuilder()
.SetBasePath(Environment.CurrentDirectory)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
.AddEnvironmentVariables()
.Build();
var awsOptions = builder.GetAWSOptions();
return new TextractTextAnalysisService(awsOptions.CreateServiceClient<IAmazonTextract>());
}
static TextractDocument PrepareDocument(TextractTextAnalysisService textractAnalysisClient, string type, string FormFile)
{
var task = textractAnalysisClient.StartDocumentAnalysis(BucketName, FormFile, type);
var jobId = task.Result;
textractAnalysisClient.WaitForJobCompletion(jobId);
var results = textractAnalysisClient.GetJobResults(jobId);
return new TextractDocument(results);
}
Following this Streaming CloudWatch Logs Data to Amazon Elasticsearch Service, it's working fine to stream cloud watch log to ELK having one log group and one Lambda function.
But now I want to change target lambda function for my other logs group, but I am not able to do that as there is no option in AWS console.
Any Help will be appreciated.
Thanks
I was streaming to ELK using the AWS console option which is Start Streaming to Amazon Elasticsearch Service, But I failed to change or choose different lambda function as there is only lambda function can be selected for any log group using this option.
So, I create new lambda function and set stream target to AWS lambda function,
Here is the code that all you need, Node version for lambda function is 4.* as it was some issue with the new version but the pulse point is it does not require any extra NPM packages.
// v1.1.2
var https = require('https');
var zlib = require('zlib');
var crypto = require('crypto');
var endpoint = 'search-my-test.us-west-2.es.amazonaws.com';
exports.handler = function(input, context) {
// decode input from base64
var zippedInput = new Buffer(input.awslogs.data, 'base64');
// decompress the input
zlib.gunzip(zippedInput, function(error, buffer) {
if (error) { context.fail(error); return; }
// parse the input from JSON
var awslogsData = JSON.parse(buffer.toString('utf8'));
// transform the input to Elasticsearch documents
var elasticsearchBulkData = transform(awslogsData);
// skip control messages
if (!elasticsearchBulkData) {
console.log('Received a control message');
context.succeed('Control message handled successfully');
return;
}
// post documents to the Amazon Elasticsearch Service
post(elasticsearchBulkData, function(error, success, statusCode, failedItems) {
console.log('Response: ' + JSON.stringify({
"statusCode": statusCode
}));
if (error) {
console.log('Error: ' + JSON.stringify(error, null, 2));
if (failedItems && failedItems.length > 0) {
console.log("Failed Items: " +
JSON.stringify(failedItems, null, 2));
}
context.fail(JSON.stringify(error));
} else {
console.log('Success: ' + JSON.stringify(success));
context.succeed('Success');
}
});
});
};
function transform(payload) {
if (payload.messageType === 'CONTROL_MESSAGE') {
return null;
}
var bulkRequestBody = '';
payload.logEvents.forEach(function(logEvent) {
var timestamp = new Date(1 * logEvent.timestamp);
// index name format: cwl-YYYY.MM.DD
var indexName = [
'prod-background-wo-' + timestamp.getUTCFullYear(), // year
('0' + (timestamp.getUTCMonth() + 1)).slice(-2), // month
('0' + timestamp.getUTCDate()).slice(-2) // day
].join('.');
var source = buildSource(logEvent.message, logEvent.extractedFields);
source['response_time'] = source["end"] - source["start"];
source['#id'] = logEvent.id;
source['#timestamp'] = new Date(1 * logEvent.timestamp).toISOString();
source['#message'] = logEvent.message;
source['#owner'] = payload.owner;
source['#log_group'] = payload.logGroup;
source['#log_stream'] = payload.logStream;
var action = { "index": {} };
action.index._index = indexName;
action.index._type = payload.logGroup;
action.index._id = logEvent.id;
bulkRequestBody += [
JSON.stringify(action),
JSON.stringify(source),
].join('\n') + '\n';
});
return bulkRequestBody;
}
function buildSource(message, extractedFields) {
if (extractedFields) {
var source = {};
for (var key in extractedFields) {
if (extractedFields.hasOwnProperty(key) && extractedFields[key]) {
var value = extractedFields[key];
if (isNumeric(value)) {
source[key] = 1 * value;
continue;
}
jsonSubString = extractJson(value);
if (jsonSubString !== null) {
source['$' + key] = JSON.parse(jsonSubString);
}
source[key] = value;
}
}
return source;
}
jsonSubString = extractJson(message);
if (jsonSubString !== null) {
return JSON.parse(jsonSubString);
}
return {};
}
function extractJson(message) {
var jsonStart = message.indexOf('{');
if (jsonStart < 0) return null;
var jsonSubString = message.substring(jsonStart);
return isValidJson(jsonSubString) ? jsonSubString : null;
}
function isValidJson(message) {
try {
JSON.parse(message);
} catch (e) { return false; }
return true;
}
function isNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
function post(body, callback) {
var requestParams = buildRequest(endpoint, body);
var request = https.request(requestParams, function(response) {
var responseBody = '';
response.on('data', function(chunk) {
responseBody += chunk;
});
response.on('end', function() {
var info = JSON.parse(responseBody);
var failedItems;
var success;
if (response.statusCode >= 200 && response.statusCode < 299) {
failedItems = info.items.filter(function(x) {
return x.index.status >= 300;
});
success = {
"attemptedItems": info.items.length,
"successfulItems": info.items.length - failedItems.length,
"failedItems": failedItems.length
};
}
var error = response.statusCode !== 200 || info.errors === true ? {
"statusCode": response.statusCode,
"responseBody": responseBody
} : null;
callback(error, success, response.statusCode, failedItems);
});
}).on('error', function(e) {
callback(e);
});
request.end(requestParams.body);
}
function buildRequest(endpoint, body) {
var endpointParts = endpoint.match(/^([^\.]+)\.?([^\.]*)\.?([^\.]*)\.amazonaws\.com$/);
var region = endpointParts[2];
var service = endpointParts[3];
var datetime = (new Date()).toISOString().replace(/[:\-]|\.\d{3}/g, '');
var date = datetime.substr(0, 8);
var kDate = hmac('AWS4' + process.env.AWS_SECRET_ACCESS_KEY, date);
var kRegion = hmac(kDate, region);
var kService = hmac(kRegion, service);
var kSigning = hmac(kService, 'aws4_request');
var request = {
host: endpoint,
method: 'POST',
path: '/_bulk',
body: body,
headers: {
'Content-Type': 'application/json',
'Host': endpoint,
'Content-Length': Buffer.byteLength(body),
'X-Amz-Security-Token': process.env.AWS_SESSION_TOKEN,
'X-Amz-Date': datetime
}
};
var canonicalHeaders = Object.keys(request.headers)
.sort(function(a, b) { return a.toLowerCase() < b.toLowerCase() ? -1 : 1; })
.map(function(k) { return k.toLowerCase() + ':' + request.headers[k]; })
.join('\n');
var signedHeaders = Object.keys(request.headers)
.map(function(k) { return k.toLowerCase(); })
.sort()
.join(';');
var canonicalString = [
request.method,
request.path, '',
canonicalHeaders, '',
signedHeaders,
hash(request.body, 'hex'),
].join('\n');
var credentialString = [ date, region, service, 'aws4_request' ].join('/');
var stringToSign = [
'AWS4-HMAC-SHA256',
datetime,
credentialString,
hash(canonicalString, 'hex')
] .join('\n');
request.headers.Authorization = [
'AWS4-HMAC-SHA256 Credential=' + process.env.AWS_ACCESS_KEY_ID + '/' + credentialString,
'SignedHeaders=' + signedHeaders,
'Signature=' + hmac(kSigning, stringToSign, 'hex')
].join(', ');
return request;
}
function hmac(key, str, encoding) {
return crypto.createHmac('sha256', key).update(str, 'utf8').digest(encoding);
}
function hash(str, encoding) {
return crypto.createHash('sha256').update(str, 'utf8').digest(encoding);
}
I am able to list all the files that i have for my one drive account but need to know how I could download them to my Android or Windows mobile phone.
I tried using the weburl that I receive from the item but looks like it will need the token.
Current JavaScript code :
var AuthenticationContext = new O365Auth.Context();
var discoveryContext = new O365Discovery.Context(); // new DiscoveryServices.Context(new AuthenticationContext(authUrl), appId, redirectUrl);
discoveryContext.services(AuthenticationContext.getAccessTokenFn('Microsoft.SharePoint')).then(function (capabilities) {
capabilities.forEach(function (v) {
if (v.capability === 'MyFiles') {
var msg = "";
var sharePointnew = new Microsoft.CoreServices.SharePointClient(v.resourceId + '/_api/v1.0/me/',
AuthenticationContext.getAccessTokenFn(v.resourceId));
Microsoft.FileServices.FileFetcher
var elementInfo = document.getElementById('popup_body');
document.getElementById('popup_header').innerHTML = "My One Drive Files";
/* This should open a popup */
document.getElementById('popup_div').style.display = "block";
document.getElementById('popup_overlay').style.display = "block";
elementInfo.innerHTML += 'One Drive Files:';
var fileName = 'demo.txt';
var store = cordova.file.dataDirectory;
var fileTransfer = new FileTransfer();
console.log("About to start transfer");
sharePointnew.files.getItems().fetch().then(function (result) {
msg = '';
result.currentPage.forEach(function (item) {
elementInfo.innerHTML += "<br />" + item.name + "<br />";
fileTransfer.download(item.webUrl, store + fileName,
function (entry) {
console.log("Success!");
},
function (err) {
console.log("Error");
console.dir(err);
});
msg += item._odataType + ' "' + item.name + '"\n';
var s = "";
});
console.log('All file system items: \n' + msg);
}, function (error) {
console.error(error);
});
}
});
I've been receiving an error from Amazon web service - InvalidParameterValue
Either Action or Operation query parameter must be present.
I believe it is most likely due to the signature being incorrect as the XML document and Header matches that of a test I did in their scratchpad.
Does anything stand out as being incorrect?
Thanks,
Clare
private static string ConstructCanonicalQueryString(SortedDictionary<string, string> sortedParameters)
{
var builder = new StringBuilder();
if (sortedParameters.Count == 0)
{
builder.Append(string.Empty);
return builder.ToString();
}
foreach (var kvp in sortedParameters)
{
builder.Append(PercentEncodeRfc3986(kvp.Key));
builder.Append("=");
builder.Append(PercentEncodeRfc3986(kvp.Value));
builder.Append("&");
}
var canonicalString = builder.ToString();
return canonicalString.Substring(0, canonicalString.Length - 1);
}
private static string PercentEncodeRfc3986(string value)
{
value = HttpUtility.UrlEncode(string.IsNullOrEmpty(value) ? string.Empty : value, Encoding.UTF8);
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
value = value.Replace("'", "%27")
.Replace("(", "%28")
.Replace(")", "%29")
.Replace("*", "%2A")
.Replace("!", "%21")
.Replace("%7e", "~")
.Replace("+", "%20")
.Replace(":", "%3A");
var sbuilder = new StringBuilder(value);
for (var i = 0; i < sbuilder.Length; i++)
{
if (sbuilder[i] != '%')
{
continue;
}
if (!char.IsLetter(sbuilder[i + 1]) && !char.IsLetter(sbuilder[i + 2]))
{
continue;
}
sbuilder[i + 1] = char.ToUpper(sbuilder[i + 1]);
sbuilder[i + 2] = char.ToUpper(sbuilder[i + 2]);
}
return sbuilder.ToString();
}
public string SignRequest(Dictionary<string, string> parametersUrl, Dictionary<string, string>
parametersSignture)
{
var secret = Encoding.UTF8.GetBytes(parametersSignture["Secret"]);
var signer = new HMACSHA256(secret);
var pc = new ParamComparer();
var sortedParameters = new SortedDictionary<string, string>(parametersUrl, pc);
var orderedParameters = ConstructCanonicalQueryString(sortedParameters);
var builder = new StringBuilder();
builder.Append(parametersSignture["RequestMethod"])
.Append(" \n")
.Append(parametersSignture["EndPoint"])
.Append("\n")
.Append("/\n")
.Append(orderedParameters);
var stringToSign = builder.ToString();
var toSign = Encoding.UTF8.GetBytes(stringToSign);
var sigBytes = signer.ComputeHash(toSign);
var signature = Convert.ToBase64String(sigBytes);
return signature.Replace("=", "%3D").Replace("/", "%2F").Replace("+", "%2B");
}
public class ParamComparer : IComparer<string>
{
public int Compare(string p1, string p2)
{
return string.CompareOrdinal(p1, p2);
}
}
The issue was that the Action wasn't included correctly into the Request
I am trying to create an SharePoint hosted app that contains an app part, this app part should show what the user is following. I have used working code as an example in the app, but I don't get it to work inside of the App Part.
I get this within the app part: "Uncaught ReferenceError: _spPageContextInfo is not defined".
If anyone has a real example how to make this work inside an app part, and using social.following, please help!
This is the code in App.js:
var headline = new Array("People", "Documents", "Sites");
var ActorImg = new Array("/_layouts/15/images/person.gif", "/_layouts/15/images/lg_ICGEN.gif", "/_layouts/15/images/siteicon_16x16.png");
function doJSON(RESTuri, success, fail) {
var restUrl = _spPageContextInfo.webAbsoluteUrl + RESTuri;
var executor = new SP.RequestExecutor(_spPageContextInfo.webAbsoluteUrl);
executor.executeAsync(
{
url: restUrl,
method: "GET",
headers: { "Accept": "application/json; odata=verbose" },
success: success,
error: fail
}
);
}
function renderSuccess(data) {
var jsonObject = JSON.parse(data.body);
var n = document.getElementById('jsonout');
n.innerHTML = data.body;
var results = jsonObject.d.Followed.results;
var str = '';
var old = -1;
var img = null;
for (j = 0; j < 5; j++) {
str += "<div class=\"container container" + j + "\">";
for (i = 0; i < results.length; i++) {
if (j != results[i].ActorType) continue;
if (old != results[i].ActorType) str += "<h1 class=\"headline" + results[i].ActorType + "\">" + headline[results[i].ActorType] + "</h1>";
img = results[i].ImageUri;
if (img == null) img = ActorImg[results[i].ActorType];
switch (results[i].ActorType) {
case 0:
// Use case: depending on ActorType if you want to use indiviual markup for every item-type
str += "<a title=\"" + results[i].Name + "\" class=\"link" + results[i].ActorType + "\" style=\"background-image:url(" + img + ")\" href=\"" + results[i].Uri + "\">" + results[i].Name + "</a>";
break;
default:
str += "<a title=\"" + results[i].Name + "\" class=\"link" + results[i].ActorType + "\" style=\"background-image:url(" + img + ")\" href=\"" + results[i].Uri + "\">" + results[i].Name + "</a>";
break;
}
old = results[i].ActorType;
}
str += "</div>";
}
n = document.getElementById('htmlout');
n.innerHTML = str + "<h1></h1>";
}
function renderFail(data, errorCode, errorMessage) {
n = document.getElementById('htmlout');
n.innerHTML = errorMessage;
}
$(document).ready(function () {
doJSON("/_api/social.following/my/followed%28types=15%29", renderSuccess, renderFail);
});
This is the div that's being used in both the app page(default.aspx) and the app part page(AppPart.aspx):
<div class="classic">
<pre id="jsonout" style="display: none;"></pre>
<div id="htmlout"></div>
</div>
Sorry guys. I solved it by just adding a reference to the sp.requestexecutor.js file, and parsed the host- and app-url from the querystring