Great-Expectations: How to connect to data stored in S3 - great-expectations

s3fs==2022.8.2
great-expectations==0.15.26
It was not easy to find a clear documentation and concrete examples for Great-Expectations. After several tries I succeeded to connect to the s3 bucket;
import great_expectations as ge
from great_expectations.core.batch import BatchRequest
context = ge.data_context.DataContext(context_root_dir="./great_expectations")
# list available datasets names from datasource name
context.get_available_data_asset_names(datasource_names='s3_datasource')
* * * * * **
** output **
* * * * * **
{
"s3_datasource":{
"default_runtime_data_connector_name":[],
"default_inferred_data_connector_name":[
"data/yellow_tripdata_sample_2019-01",
"data/yellow_tripdata_sample_2019-02"]
}
}
# Here is a BatchRequest naming a data_asset
batch_request_parameters = {
'datasource_name': 's3_datasource',
'data_connector_name': 'default_inferred_data_connector_name',
'data_asset_name': 'data/yellow_tripdata_sample_2019-01',
'limit': 1000
}
batch_request=BatchRequest(**batch_request_parameters)
context.create_expectation_suite(
expectation_suite_name='taxi_demo', overwrite_existing=True
)
* * * * * *
# output **
* * * * * *
{
"data_asset_type": null,
"meta": {
"great_expectations_version": "0.15.26"
},
"expectations": [],
"ge_cloud_id": null,
"expectation_suite_name": "taxi_demo"
}
validator = context.get_validator(
batch_request=batch_request, expectation_suite_name='taxi_demo')
* * * * * **
** output **
* * * * * **
# NoCredentialsError: Unable to locate credentials
So far everything is correct, the problem is when I call the function get_validator; NoCredentialsError: Unable to locate credentials
great_expectations.yaml
datasources:
s3_datasource:
module_name: great_expectations.datasource
execution_engine:
class_name: PandasExecutionEngine
module_name: great_expectations.execution_engine
class_name: Datasource
data_connectors:
default_runtime_data_connector_name:
module_name: great_expectations.datasource.data_connector
class_name: RuntimeDataConnector
batch_identifiers:
- default_identifier_name
default_inferred_data_connector_name:
prefix: data/
module_name: great_expectations.datasource.data_connector
default_regex:
pattern: (.*)\.csv
group_names:
- data_asset_name
boto3_options:
endpoint_url: http://localhost:9000
aws_access_key_id: minio
aws_secret_access_key: minio
bucket: ge-bucket
class_name: InferredAssetS3DataConnector
Note
When I try in command line great_expectations suite new I got the same problem approximately;
EndpointConnectionError: Could not connect to the endpoint URL: "https://ge-bucket.s3.us-west-4.amazonaws.com/data/yellow_tripdata_sample_2019-01.csv"
I don't understand where the GE got the s3 credentials !?
After a long debugging, I noticed that GE is looking for s3 credentials from .aws/config. Really I don't understand why GE is looking for s3 credentials from .aws/config instead of my configuration file great_expectations.yaml mentioned above.

Related

Index migration failed

I'm working with docker and 1.1.0 opendistro for es version, i noticed that if i put on kibana.yml
`opendistro_security.multitenancy.enabled: true`
i get this error:
index migration failed for opendistro 7.1.1
i checked migrate_tenants.js:
/*
* Copyright 2015-2018 _floragunn_ GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/*
* Portions Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
import _ from 'lodash';
import Boom from 'boom';
import elasticsearch from 'elasticsearch';
import wrapElasticsearchError from './../backend/errors/wrap_elasticsearch_error';
import { KibanaMigrator} from "../../../../src/legacy/server/saved_objects/migrations/kibana";
async function migrateTenants (server) {
const backend = server.plugins.opendistro_security.getSecurityBackend();
try {
let tenantInfo = await backend.getTenantInfoWithInternalUser();
if (tenantInfo) {
let indexNames = Object.keys(tenantInfo);
for (var index = 0; index < indexNames.length; ++index) {
await migrateTenantIndex(indexNames[index], server);
}
}
} catch (error) {
server.log(['error', 'migration'], error);
throw error;
}
}
async function migrateTenantIndex(tenantIndexName, server) {
const {kbnServer} = mockKbnServer(server.kibanaMigrator.kbnServer, server, tenantIndexName);
const migrator = new KibanaMigrator({kbnServer});
await migrator.awaitMigration();
}
async function migrateTenant(tenantIndexName, force, server) {
const backend = server.plugins.opendistro_security.getSecurityBackend();
try {
let tenantInfo = await backend.getTenantInfoWithInternalUser();
if (tenantInfo) {
if (tenantInfo[tenantIndexName] || (force == true)) {
await migrateTenantIndex(tenantIndexName, server);
return {statusCode:200, message: tenantIndexName + " migrated."}
} else {
return Boom.badRequest('Index ' + tenantIndexName + ' not found or not a tenand index. Force migration: ' + force);
}
} else {
return Boom.badImplementation("Could not fetch tenant info.");
}
} catch (error) {
server.log(['error', 'migration'], error);
return wrapElasticsearchError(error);
}
}
function mockKbnServer(originalKbnServer, server, indexname) {
const kbnServer = {
version: originalKbnServer.version,
ready: originalKbnServer.ready,
uiExports: originalKbnServer.uiExports,
server: {
config: () => ({
get: ((name) => {
switch (name) {
case 'kibana.index':
return indexname;
case 'migrations.batchSize':
return originalKbnServer.server.config().get("migrations.batchSize");
case 'migrations.pollInterval':
return originalKbnServer.server.config().get("migrations.pollInterval");
case 'migrations.scrollDuration':
return originalKbnServer.server.config().get("migrations.scrollDuration");
default:
throw new Error(`Unexpected config ${name}`);
}
})
}),
log: function (tags, data, timestamp, _internal) {
server.log(tags, data, timestamp, _internal);
},
plugins: originalKbnServer.server.plugins
}
};
return { kbnServer };
}
module.exports.migrateTenants=migrateTenants;
module.exports.migrateTenant=migrateTenant;
kibana returns this error in logs:
kibana | {"type":"log","#timestamp":"2019-08-06T09:36:33Z","tags":["status","plugin:opendistro_security#7.1.1","info"],"pid":1,"state":"yellow","message":"Status changed from yellow to yellow - Tenant indices migration failed","prevState":"yellow","prevMsg":"Setting up index template."}
kibana | {"type":"log","#timestamp":"2019-08-06T09:36:34Z","tags":["info","migrations"],"pid":1,"message":"Creating index .kibana_1."}
kibana | {"type":"log","#timestamp":"2019-08-06T09:36:34Z","tags":["info","migrations"],"pid":1,"message":"Pointing alias .kibana to .kibana_1."}
if i disable multitenancy by putting opendistro_security.multitenancy.enabled: false, when i try to login with users that are not admin i get this error:
`elasticsearch | [2019-08-06T09:24:30,239][WARN ][c.a.o.s.c.PrivilegesInterceptorImpl] [a5790f362956] Tenant global_tenant is not allowed for user cn=user,ou=people,dc=example,dc=com`
How can i fix this?

Query many to many relation using slim framework and doctrine

I am using Slim Framework with Doctrine. I have three Tables
id | username | password | name
--------------------------------
1 | Lorel | ******** | Lorel
id | permission | description
-------------------------------
2 | READ_ACCESS | Lorel Ipsum
id | user_id | permission_id
-----------------------------
X | 1 | 2
Is there anyway using doctrine through which I can find out, suppose if user '1' has permission '2'.
I'm assuming you're looking to do Authorization. I've got a setup which does that, in Zend Framework 3 with Doctrine 2. The relations are the same, just not sure how to translate it to Slim Framework. But here goes nothing ;-)
User Entity has a relation to Roles:
/**
* #var Collection|ArrayCollection|Role[]
* #ORM\ManyToMany(targetEntity="User\Entity\Role", inversedBy="users", fetch="LAZY")
* #ORM\JoinTable(
* name="user_user_roles",
* joinColumns={#ORM\JoinColumn(name="user_id", referencedColumnName="id")},
* inverseJoinColumns={#ORM\JoinColumn(name="role_id", referencedColumnName="id")}
* )
*
*/
protected $roles;
Role Entity has Routes and the inverse side to User
/**
* #var Collection|ArrayCollection|Route[]
* #ORM\ManyToMany(targetEntity="User\Entity\Route", inversedBy="roles", fetch="EAGER")
* #ORM\JoinTable(
* name="user_role_routes",
* joinColumns={#ORM\JoinColumn(name="role_id", referencedColumnName="id")},
* inverseJoinColumns={#ORM\JoinColumn(name="route_id", referencedColumnName="id")}
* )
*/
protected $routes;
/**
* #var Collection|ArrayCollection|User[]
* #ORM\ManyToMany(targetEntity="User\Entity\User", mappedBy="roles", fetch="LAZY")
*/
protected $users;
Route Entity just has the inverse to Role
/**
* #var Collection|ArrayCollection|Role[]
* #ORM\ManyToMany(targetEntity="User\Entity\Role", mappedBy="routes", fetch="LAZY")
*/
protected $roles;
Notice that it concerns 2 relationships:
User <-> Role
Role <-> Route
Make sure to initialize each Collection in the __construct, like so:
// Initialize only those within the Entity
public function __construct()
{
$this->users = new ArrayCollection();
}
Generate your getter method (setter not required!). Create Adder/Remover methods instead of a setter, like so (this is within Route Entity):
/**
* #param Collection|ArrayCollection|Role[] $roles
*
* #return Route
*/
public function addRoles(Collection $roles) : Route
{
foreach ($roles as $role) {
if ( ! $this->getRoles()->contains($role)) {
$this->getRoles()->add($role);
}
if ( ! $role->getRoutes()->contains($this)) {
$role->getRoutes()->add($this);
}
}
return $this;
}
/**
* #param Collection|ArrayCollection|Role[] $roles
*
* #return Route
*/
public function removeRoles(Collection $roles) : Route
{
foreach ($roles as $role) {
if ($this->getRoles()->contains($role)) {
$this->getRoles()->remove($role);
}
if ($role->getRoutes()->contains($this)) {
$role->getRoutes()->remove($this);
}
}
return $this;
}
So, there you go, that's the setup. I would advise you to include Gedmo Doctrine extensions and apply the #Gedmo\Tree(type="nested") to your Role Entity. Makes managing (nested/inherited) roles easy. See Managing Hierarchical Data in MySQL (and Gedmo Tree docs)
To next check if a User has access to a certain Route you need some form of AuthenticationService. Because I don't know Slim, make sure you fill this in with something from that framework. The logic is the same though. I use a service to be included/used on route access that checks if the User is known (Authenticated), and if not assigns a Guest Role, and then checks if the Route to be accessed is known to any of the assigned roles.
/**
* #param string $route
*
* #return bool
* #throws Exception
*/
public function isGranted(string $route) : bool
{
// Get assigned Role[] array or set Guest Role
if ($this->getAuthenticationService()->hasIdentity()) {
/** #var User $user */
$user = $this->getAuthenticationService()->getIdentity();
/** #var Collection|Role[] $roles */
$roles = $user->getRoles();
} else {
$roles = new ArrayCollection(
[
$this->getObjectManager()->getRepository(Role::class)->findOneBy(['name' => Role::NO_ACCOUNT_ROLE]),
]
);
}
foreach ($roles as $role) {
if ($this->checkRoutes($role, $route)) {
return true;
}
}
return false;
}
So, all of the above should get you more than going I'd say.
GL & HF

Unlink role - BjyAuthorize Doctirne

BjyAuthorize modifies the User entity and provides an addRole() method. This accepts a role object and populates the user_role_linker_table
How is it possible to remove a role once it is added to a user?
The associations are set in User:
/**
* #var \Doctrine\Common\Collections\Collection
* #ORM\ManyToMany(targetEntity="Application\Entity\Role")
* #ORM\JoinTable(name="user_role_linker",
* joinColumns={#ORM\JoinColumn(name="user_id", referencedColumnName="id")},
* inverseJoinColumns={#ORM\JoinColumn(name="role_id", referencedColumnName="id")}
* )
*/
protected $roles;
After hours of struggle I came up with the following solution:
$userDetails = $em->getRepository('Application\Entity\UserDetails')->findOneBy(['id' => $data['user-details-id']]);
$user = $userDetails->getUser();
$roleRepo = $em->getRepository('Application\Entity\Role');
$roleResult = $roleRepo->findOneBy(['id' => $id]); //$id is the role to delete
$user->removeRole($roleResult);
$em->merge($user);
$em->flush();
In the User entity I added the method:
public function removeRole($role)
{
return $this->roles->removeElement($role);
}
Not sure if this is the approach that the authors of BjyAuthorize intended but it works for me...
Looks good to me. Just want to add that you should first check if the roles contains that role you want to delete.
Such as this:
public function removeRole($role)
{
if (!$this->roles->contains($role))
{
return;
}
$this->roles->removeElement($role);
}

Error Creating Many to Many Relationship Using Doctrine 2

I'm trying to generate the schema for my database using Doctrine 2's ZF2 module but with the following definition:
/**
* #ORM\ManyToMany(targetEntity="Tag")
* #ORM\JoinTable(name="Manytomany_Issuetag",
* #ORM\joinColumns={#ORM\JoinColumn(name="IssueId", referencedColumnName="id")},
* #ORM\inverseJoinColumns={#ORM\JoinColumn(name="TagId", referencedColumnName="id")}
* )
*/
protected $tags;
When I run vendor/bin/doctrine-module orm:schema-tool:update --dump-sql I receive the following error:
Annotation #ORM\joinColumns is not allowed to be declared on property Application\Entity\Issue::$tags. You may only use this annotation on these code elements: PROPERTY
Edit: As requested here is the working annotation
/**
* #ORM\ManyToMany(targetEntity="Tag")
* #ORM\JoinTable(name="Manytomany_Issuetag",
* joinColumns={#ORM\JoinColumn(name="IssueId", referencedColumnName="id")},
* inverseJoinColumns={#ORM\JoinColumn(name="TagId", referencedColumnName="id")}
* )
*/
protected $tags;
I think you need to drop a couple of the #ORM\ declarations, it should look like this (obviously without my comments)
/**
* #ORM\ManyToMany(targetEntity="Tag")
* #ORM\JoinTable(name="Manytomany_Issuetag",
* joinColumns={#ORM\JoinColumn(name="IssueId", referencedColumnName="id")},
* ^ drop the #ORM\
* inverseJoinColumns={#ORM\JoinColumn(name="TagId", referencedColumnName="id")}
* ^ drop the #ORM\
* )
*/
protected $tags;

Doctrine2 Self Reference Query -- Doesn't work

$ I am trying to create following scenario with doctrine 2 query builder
SELECT
p . *
FROM
_tree p
LEFT JOIN
_tree c ON p.id = c.parent_id
AND (c.lft >= p.lft AND c.rgt <= p.rgt)
WHERE
p.id = 3
I have set following relationship self generated by Doctrine2
class Tree {
/**
* #var \Tree
*
* #ORM\ManyToOne(targetEntity="Tree")
* #ORM\JoinColumns({
* #ORM\JoinColumn(name="parent_id", referencedColumnName="id")
* })
*/
private $parent;
// other code
}
here is my repo class
_em->createQueryBuilder();
$qb->select('p')
->from('Entity\Tree', 'p')
->leftJoin('p.Entity\Tree','c', 'ON','p.id = c.parent_id');
return $qb->getQuery()->getResult();
}
}
but I couldn't get it done. It throws following errors
[Tue Oct 01 22:30:11 2013] [error] [client 127.0.0.1] PHP Fatal error:
Uncaught exception 'Doctrine\ORM\Query\QueryException' with
message 'SELECT p FROM Entity\Tree p LEFT JOIN p.Entity\Tree c ON
p.id = c.parent_id' in
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/QueryException.php:39\nStack
trace:\n#0
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/Parser.php(429):
Doctrine\ORM\Query\QueryException::dqlError('SELECT p FROM
E...')\n#1
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/Parser.php(925):
Doctrine\ORM\Query\Parser->semanticalError('Class
Entity\Ed...')\n#2
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/Parser.php(1561):
Doctrine\ORM\Query\Parser->JoinAssociationPathExpression()\n#3
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/Parser.php(1506):
Doctrine\ORM\Query\Parser->JoinAssociationDeclaration()\n#4
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/Parser.php(1435):
Doctrine\ORM\Query\Parser->Join()\n#5
/var/www/pcb_frame_work/System/Librari in
/var/www/pcb_frame_work/System/Libraries/Vendors/Doctrine/ORM/Query/QueryException.php
on line 49, referer:
I don't know if I understand you completely, but I think you have to change your ManyToOne relation to:
/**
* #var \Tree
*
* #ORM\ManyToOne(targetEntity="Tree", inversedBy="children")
* #ORM\JoinColumns({
* #ORM\JoinColumn(name="parent_id", referencedColumnName="id")
* })
*/
private $parent;
/**
* #ORM\OneToMany(targetEntity="Tree", mappedBy="parent")
*/
private $children;
That way you can access the children of a class with $Tree->children and it's parent with $Tree->parent.
More information about self referencing associations can be found here: http://docs.doctrine-project.org/en/latest/reference/association-mapping.html#one-to-many-self-referencing