I'm using the script from this post to download files from s3.
Everything works fine except the downloaded files are all 0B.
My script are basically the same as the script above.
I use ~/.aws/credentials to set my keys and set region in the script.
Here is my script:
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
Bucket = "logs" // Download from this bucket
Prefix = "local-dir/my_log/20150611/20150611" // Using this key prefix
LocalDirectory = "s3logs" // Into this directory
)
func main() {
manager := s3manager.NewDownloader(nil)
d := downloader{bucket: Bucket, dir: LocalDirectory, Downloader: manager}
client := s3.New(&aws.Config{Region: "ap-northeast-1"})
params := &s3.ListObjectsInput{Bucket: &Bucket, Prefix: &Prefix}
client.ListObjectsPages(params, d.eachPage)
}
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
file := filepath.Join(d.dir, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
fmt.Printf("Downloading " + key)
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
fmt.Printf("Downloading s3://%s/%s to %s...\n", d.bucket, key, file)
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
d.Download(fd, params)
}
The script listed the objects in the bucket very well, but did not download files into my local file system. And the script did not run into any exception.
Any idea why?
Thanks!
Related
Thanks in advance :) . I'm using the following code to get metadata from an s3 object after listing all the object in a bucket . But I don't know why it gives the error undefined: s3.HeadObject when running go run listObjects.go -bucket xxxx -prefix xxxx
I tried two solutions: giving the client as the one created from the config and from the context as in this link appears [1]. BUt both gave the same error. Can you give me any clue?
package main
import (
"context"
"flag"
"fmt"
"log"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/s3"
)
var (
bucketName string
objectPrefix string
objectDelimiter string
maxKeys int
)
func init() {
flag.StringVar(&bucketName, "bucket", "", "The `name` of the S3 bucket to list objects from.")
flag.StringVar(&objectPrefix, "prefix", "", "The optional `object prefix` of the S3 Object keys to list.")
flag.StringVar(&objectDelimiter, "delimiter", "",
"The optional `object key delimiter` used by S3 List objects to group object keys.")
flag.IntVar(&maxKeys, "max-keys", 0,
"The maximum number of `keys per page` to retrieve at once.")
}
// Lists all objects in a bucket using pagination
func main() {
flag.Parse()
if len(bucketName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, bucket name required")
}
// Load the SDK's configuration from environment and shared config, and
// create the client with this.
cfg, err := config.LoadDefaultConfig(context.TODO())
if err != nil {
log.Fatalf("failed to load SDK configuration, %v", err)
}
client := s3.NewFromConfig(cfg)
// Set the parameters based on the CLI flag inputs.
params := &s3.ListObjectsV2Input{
Bucket: &bucketName,
}
if len(objectPrefix) != 0 {
params.Prefix = &objectPrefix
}
if len(objectDelimiter) != 0 {
params.Delimiter = &objectDelimiter
}
// Create the Paginator for the ListObjectsV2 operation.
p := s3.NewListObjectsV2Paginator(client, params, func(o *s3.ListObjectsV2PaginatorOptions) {
if v := int32(maxKeys); v != 0 {
o.Limit = v
}
})
// Iterate through the S3 object pages, printing each object returned.
var i int
log.Println("Objects:")
for p.HasMorePages() {
i++
// Next Page takes a new context for each page retrieval. This is where
// you could add timeouts or deadlines.
page, err := p.NextPage(context.TODO())
if err != nil {
log.Fatalf("failed to get page %v, %v", i, err)
}
// Log the objects found
// Headobject function is called
for _, obj := range page.Contents {
input := &s3.HeadObjectInput{
Bucket: &bucketName,
Key: obj.Key,
}
result, err := &s3.HeadObject(client, input)
if err != nil {
panic(err)
}
fmt.Println("Object:", *obj.Key)
}
}
}
./listObjects.go:86:20: undefined: s3.HeadObject
1
Doing the headObject as an auxiliary method works
package main
import (
"context"
"flag"
"fmt"
"log"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/s3"
)
var (
bucketName string
objectPrefix string
objectDelimiter string
maxKeys int
)
func init() {
flag.StringVar(&bucketName, "bucket", "", "The `name` of the S3 bucket to list objects from.")
flag.StringVar(&objectPrefix, "prefix", "", "The optional `object prefix` of the S3 Object keys to list.")
flag.StringVar(&objectDelimiter, "delimiter", "",
"The optional `object key delimiter` used by S3 List objects to group object keys.")
flag.IntVar(&maxKeys, "max-keys", 0,
"The maximum number of `keys per page` to retrieve at once.")
}
// Lists all objects in a bucket using pagination
func main() {
flag.Parse()
if len(bucketName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, bucket name required")
}
// Load the SDK's configuration from environment and shared config, and
// create the client with this.
cfg, err := config.LoadDefaultConfig(context.TODO())
if err != nil {
log.Fatalf("failed to load SDK configuration, %v", err)
}
client := s3.NewFromConfig(cfg)
// Set the parameters based on the CLI flag inputs.
params := &s3.ListObjectsV2Input{
Bucket: &bucketName,
}
if len(objectPrefix) != 0 {
params.Prefix = &objectPrefix
}
if len(objectDelimiter) != 0 {
params.Delimiter = &objectDelimiter
}
// Create the Paginator for the ListObjectsV2 operation.
p := s3.NewListObjectsV2Paginator(client, params, func(o *s3.ListObjectsV2PaginatorOptions) {
if v := int32(maxKeys); v != 0 {
o.Limit = v
}
})
// Iterate through the S3 object pages, printing each object returned.
var i int
log.Println("Objects:")
for p.HasMorePages() {
i++
// Next Page takes a new context for each page retrieval. This is where
// you could add timeouts or deadlines.
page, err := p.NextPage(context.TODO())
if err != nil {
log.Fatalf("failed to get page %v, %v", i, err)
}
// Log the objects found
// Headobject function is called
for _, obj := range page.Contents {
fmt.Println("Object:", *obj.Key)
OpHeadObject(client, bucketName, *obj.Key)
}
}
}
func OpHeadObject(sess *s3.Client, bucketName, objectName string) {
input := &s3.HeadObjectInput{
Bucket: &bucketName,
Key: &objectName,
}
resp, err := sess.HeadObject(context.TODO(), input)
if err != nil {
panic(err)
}
fmt.Println(resp.StorageClass) // that you want.
}
I have the following struct:
type ProcessedRecords struct {
CustIndividualID string `json:"individual id"`
Household string `json:"Household"`
}
And I have a slice of many structs that share this value. I'm trying to submit them using the PutRecordBatch operation from the AWS SDK:
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/firehose"
)
type ProcessedRecords struct {
CustIndividualID string `json:"individual id"`
Household string `json:"Household"`
}
func main() {
submitToFirehose(recordList)
}
func submitToFirehose(records []ProcessedRecords) {
streamName := "processed-stream"
sess := session.Must(session.NewSession())
// Create a Firehose client with additional configuration
firehoseService := firehose.New(sess, aws.NewConfig().WithRegion("us-east-1"))
recordsBatchInput := &firehose.PutRecordBatchInput{}
recordsBatchInput = recordsBatchInput.SetDeliveryStreamName(streamName)
recordsInput := []*firehose.Record{}
for i := 0; i < len(records); i++ {
if len(recordsInput) == 500 {
recordsBatchInput = recordsBatchInput.SetRecords(recordsInput)
resp, err := firehoseService.PutRecordBatch(recordsBatchInput)
if err != nil {
fmt.Printf("PutRecordBatch err: %v\n", err)
} else {
fmt.Printf("FailedPuts: %v\n", *resp.FailedPutCount)
}
recordsInput = []*firehose.Record{}
}
b, err := json.Marshal(records[i])
if err != nil {
log.Printf("Error: %v", err)
}
record := &firehose.Record{Data: b}
recordsInput = append(recordsInput, record)
}
}
This seems to work and it would appear that my Glue backend is setup correctly, however CustIndividualID is not being written to S3. I suspect it's because it's reading the json:"individual id" as the column name and not the CustIndividualID.
this is a problem because glue tables can't have spaces in the column name. What am I doing wrong?
Goal: To empty an existing S3 bucket using the AWS SDK for GOlang.
AWS SDK now has BatchDeleteIterator that can do the job. Example provided via Amazon docs.
package main
import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"fmt"
"os"
)
// go run s3_delete_objects BUCKET
func main() {
if len(os.Args) != 2 {
exitErrorf("Bucket name required\nUsage: %s BUCKET", os.Args[0])
}
bucket := os.Args[1]
// Initialize a session in us-west-2 that the SDK will use to load
// credentials from the shared credentials file ~/.aws/credentials.
sess, _ := session.NewSession(&aws.Config{
Region: aws.String("us-west-2")},
)
// Create S3 service client
svc := s3.New(sess)
// Setup BatchDeleteIterator to iterate through a list of objects.
iter := s3manager.NewDeleteListIterator(svc, &s3.ListObjectsInput{
Bucket: aws.String(bucket),
})
// Traverse iterator deleting each object
if err := s3manager.NewBatchDeleteWithClient(svc).Delete(aws.BackgroundContext(), iter); err != nil {
exitErrorf("Unable to delete objects from bucket %q, %v", bucket, err)
}
fmt.Printf("Deleted object(s) from bucket: %s", bucket)
}
func exitErrorf(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, msg+"\n", args...)
os.Exit(1)
}
NOTE: These are code snippets that might require YOU to make changes on YOUR SIDE to make it run.
You will need to implement the below method:
//EmptyBucket empties the Amazon S3 bucket
func (s awsS3) EmptyBucket(bucket string) error {
log.Info("removing objects from S3 bucket : ", bucket)
params := &s3.ListObjectsInput{
Bucket: aws.String(bucket),
}
for {
//Requesting for batch of objects from s3 bucket
objects, err := s.Client.ListObjects(params)
if err != nil {
return err
}
//Checks if the bucket is already empty
if len((*objects).Contents) == 0 {
log.Info("Bucket is already empty")
return nil
}
log.Info("First object in batch | ", *(objects.Contents[0].Key))
//creating an array of pointers of ObjectIdentifier
objectsToDelete := make([]*s3.ObjectIdentifier, 0, 1000)
for _, object := range (*objects).Contents {
obj := s3.ObjectIdentifier{
Key: object.Key,
}
objectsToDelete = append(objectsToDelete, &obj)
}
//Creating JSON payload for bulk delete
deleteArray := s3.Delete{Objects: objectsToDelete}
deleteParams := &s3.DeleteObjectsInput{
Bucket: aws.String(bucket),
Delete: &deleteArray,
}
//Running the Bulk delete job (limit 1000)
_, err = s.Client.DeleteObjects(deleteParams)
if err != nil {
return err
}
if *(*objects).IsTruncated { //if there are more objects in the bucket, IsTruncated = true
params.Marker = (*deleteParams).Delete.Objects[len((*deleteParams).Delete.Objects)-1].Key
log.Info("Requesting next batch | ", *(params.Marker))
} else { //if all objects in the bucket have been cleaned up.
break
}
}
log.Info("Emptied S3 bucket : ", bucket)
return nil
}
UPDATE : The latest version of AWS SDK for GO has resolved the prior issue I had.
The AWS SDK for Go has a Amazon S3 batching abstraction. Take a look here.
Don't forget that by default ListObjects only returns up to 1000 bucket items. If you might have more than 1000, check the IsTruncated property on the return value. If true, use the NextMarker property from the return value to get the next 1000 items.
See my example in the Go dev guide: http://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/s3-example-basic-bucket-operations.html#s3-examples-bucket-ops-delete-all-bucket-items
I'm trying to download Objects from S3, the following is my code:
func listFile(bucket, prefix string) error {
svc := s3.New(sess)
params := &s3.ListObjectsInput{
Bucket: aws.String(bucket), // Required
Prefix: aws.String(prefix),
}
return svc.ListObjectsPages(params, func(p *s3.ListObjectsOutput, lastPage bool) bool {
for _, o := range p.Contents {
//log.Println(*o.Key)
log.Println(*o.Key)
download(bucket, *o.Key)
return true
}
return lastPage
})
}
func download(bucket, key string) {
logDir := conf.Cfg.Section("share").Key("LOG_DIR").MustString(".")
tmpLogPath := filepath.Join(logDir, bucket, key)
s3Svc := s3.New(sess)
downloader := s3manager.NewDownloaderWithClient(s3Svc, func(d *s3manager.Downloader) {
d.PartSize = 2 * 1024 * 1024 // 2MB per part
})
f, err := os.OpenFile(tmpLogPath, os.O_CREATE|os.O_WRONLY, 0644)
if _, err = downloader.Download(f, &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
}); err != nil {
log.Fatal(err)
}
f.Close()
}
func main() {
bucket := "mybucket"
key := "myprefix"
listFile(bucket, key)
}
I can get the objects list in the function listFile(), but a 404 returned when call download, why?
I had the same problem with recent versions of the library. Sometimes, the object key will be prefixed with a "./" that the SDK will remove by default making the download fail.
Try adding this to your aws.Config and see if it helps:
config := aws.Config{
...
DisableRestProtocolURICleaning: aws.Bool(true),
}
I submitted an issue.
I'm going to start with showing the code and then what I'm trying to do, code:
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
// empty strings for security reasons
Bucket = "" // Download from this bucket
Prefix = "" // Using this key prefix
LocalDirectory = "s3logs" // Into this directory
)
func main() {
sess := session.New()
client := s3.New(sess, &aws.Config{Region: aws.String("us-west-1")})
params := &s3.ListObjectsInput{Bucket: &Bucket, Prefix: &Prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 64 * 1024 * 1024 // 64MB per part
d.Concurrency = 8
}) // works
//manager := s3manager.NewDownloaderWithClient(client) //works
d := downloader{bucket: Bucket, dir: LocalDirectory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
}
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
file := filepath.Join(d.dir, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
fmt.Printf("Downloading " + key)
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
fmt.Printf("Downloading s3://%s/%s to %s...\n", d.bucket, key, file)
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
}
I'm trying to download the log files from a particular bucket and eventually many buckets. I need the download to be as fast as possible because. There is lots of data. My question is what is the most effective way to download huge amounts of data quickly? The whole process is nil if those logs can't be downloaded at a reasonable speed. Is there a faster way, it's already concurrent according to amazons doc? Any ideas? Also, i've noticed a curious thing. It doesn't matter if I set the Concurrency to 1, 4, or 20. Everything is still downloading at ~.70 - ~/.80 gb / min