summaryrefslogblamecommitdiffstats
path: root/feed/feed.go
blob: 3872af4d7d28a560c8d61915c3b03bae6bdb6a47 (plain) (tree)
1
2
3
4
5
6
7
8
9
10





                                                   
                      
             

                  

              


                                      





                                      
                      
                      
                           

 





                                                               

























                                                                             


                                 








                                                            

























                                                     
 





















                                                                 

















                                                                                      
















                                                                                    
// SPDX-License-Identifier: ISC
// Copyright © 2021 siddharth <s@ricketyspace.net>

package feed

import (
	"encoding/xml"
	"fmt"
	"io"
	"net/http"
	"os"
	"path"
	"time"

	"ricketyspace.net/fern/schema"
)

type Feed struct {
	Id      string `json:"id"`
	Source  string `json:"source"`
	Schema  string `json:"schema"`
	YDLPath string
	DumpDir string
	Object  interface{}
}

func (feed *Feed) Validate(ydlPath, baseDumpDir string) error {
	_, err := os.Stat(ydlPath)
	if err != nil {
		return err
	}
	_, err = os.Stat(baseDumpDir)
	if err != nil {
		return err
	}

	// Check 'id'
	if len(feed.Id) == 0 {
		return fmt.Errorf("'id' not set in a feed")
	}

	// Check 'source'
	if len(feed.Source) == 0 {
		return fmt.Errorf("'source' not set in a feed '%s'", feed.Id)
	}

	// Check 'schema'
	schemaOK := false
	for _, schema := range []string{"npr", "youtube"} {
		if feed.Schema == schema {
			schemaOK = true
		}
	}
	if !schemaOK {
		return fmt.Errorf("schema '%s' for feed '%s' is not valid",
			feed.Schema, feed.Id)
	}

	// Set ydl-path for feed.
	feed.YDLPath = ydlPath

	// Set dump directory for feed and ensure it exists.
	feed.DumpDir = path.Join(baseDumpDir, feed.Id)
	err = os.MkdirAll(feed.DumpDir, 0755)
	if err != nil {
		return err
	}

	return nil
}

// Get the feed.
func (feed *Feed) get() ([]byte, error) {
	// Init byte container to store feed content.
	bs := make([]byte, 0)

	resp, err := http.Get(feed.Source)
	if err != nil {
		return bs, err
	}

	// Slurp body.
	chunk := make([]byte, 100)
	for {
		c, err := resp.Body.Read(chunk)
		if c < 1 {
			break
		}
		if err != nil && err != io.EOF {
			return bs, err
		}
		bs = append(bs, chunk[0:c]...)
	}
	return bs, nil
}


// Unmarshal raw feed into an object.
func (feed *Feed) unmarshal(bs []byte) error {
	var err error

	// Unmarshal based on feed's schema type.
	switch {
	case feed.Schema == "npr":
		feed.Object, err = nprUnmarshal(bs)
		if err != nil {
			return err
		}
		return nil
	case feed.Schema == "youtube":
		feed.Object, err = youtubeUnmarshal(bs)
		if err != nil {
			return err
		}
		return nil
	}
	return fmt.Errorf("schema of feed '%s' unknown", feed.Id)
}

// Unmarshal a NPR feed.
func nprUnmarshal(bs []byte) (schema.NPRFeed, error) {
	nprFeed := new(schema.NPRFeed)
	err := xml.Unmarshal(bs, nprFeed)
	if err != nil {
		return *nprFeed, err
	}

	// Parse time for all entries.
	for i, entry := range nprFeed.Entries {
		nprFeed.Entries[i].PubTime, err = time.Parse(time.RFC1123Z, entry.Pub)
		if err != nil {
			return *nprFeed, err
		}
	}
	return *nprFeed, nil
}

// Unmarshal a YouTube feed.
func youtubeUnmarshal(bs []byte) (schema.YouTubeFeed, error) {
	ytFeed := new(schema.YouTubeFeed)
	err := xml.Unmarshal(bs, ytFeed)
	if err != nil {
		return *ytFeed, err
	}

	// Parse time for all entries.
	for i, entry := range ytFeed.Entries {
		ytFeed.Entries[i].PubTime, err = time.Parse(time.RFC3339, entry.Pub)
		if err != nil {
			return *ytFeed, err
		}
	}
	return *ytFeed, nil
}