// SPDX-License-Identifier: ISC // Copyright © 2021 siddharth package feed import ( "encoding/xml" "fmt" "io" "net/http" "os" "path" "time" "ricketyspace.net/fern/schema" ) type Feed struct { Id string `json:"id"` Source string `json:"source"` Schema string `json:"schema"` DumpDir string Object interface{} } func (feed *Feed) Validate(baseDumpDir string) error { _, err := os.Stat(baseDumpDir) if err != nil { return err } // Check 'id' if len(feed.Id) == 0 { return fmt.Errorf("'id' not set in a feed") } // Check 'source' if len(feed.Source) == 0 { return fmt.Errorf("'source' not set in a feed '%s'", feed.Id) } // Check 'schema' schemaOK := false for _, schema := range []string{"npr", "youtube"} { if feed.Schema == schema { schemaOK = true } } if !schemaOK { return fmt.Errorf("schema '%s' for feed '%s' is not valid", feed.Schema, feed.Id) } // Set dump directory for feed and ensure it exists. feed.DumpDir = path.Join(baseDumpDir, feed.Id) err = os.MkdirAll(feed.DumpDir, 0755) if err != nil { return err } return nil } // Get the feed. func (feed *Feed) get() ([]byte, error) { // Init byte container to store feed content. bs := make([]byte, 0) resp, err := http.Get(feed.Source) if err != nil { return bs, err } // Slurp body. chunk := make([]byte, 100) for { c, err := resp.Body.Read(chunk) if c < 1 { break } if err != nil && err != io.EOF { return bs, err } bs = append(bs, chunk[0:c]...) } return bs, nil } // Unmarshal raw feed into an object. func (feed *Feed) unmarshal(bs []byte) error { var err error // Unmarshal based on feed's schema type. switch { case feed.Schema == "npr": feed.Object, err = nprUnmarshal(bs) if err != nil { return err } return nil case feed.Schema == "youtube": feed.Object, err = youtubeUnmarshal(bs) if err != nil { return err } return nil } return fmt.Errorf("schema of feed '%s' unknown", feed.Id) } // Unmarshal a NPR feed. func nprUnmarshal(bs []byte) (schema.NPRFeed, error) { nprFeed := new(schema.NPRFeed) err := xml.Unmarshal(bs, nprFeed) if err != nil { return *nprFeed, err } // Parse time for all entries. for i, entry := range nprFeed.Entries { nprFeed.Entries[i].PubTime, err = time.Parse(time.RFC1123Z, entry.Pub) if err != nil { return *nprFeed, err } } return *nprFeed, nil } // Unmarshal a YouTube feed. func youtubeUnmarshal(bs []byte) (schema.YouTubeFeed, error) { ytFeed := new(schema.YouTubeFeed) err := xml.Unmarshal(bs, ytFeed) if err != nil { return *ytFeed, err } // Parse time for all entries. for i, entry := range ytFeed.Entries { ytFeed.Entries[i].PubTime, err = time.Parse(time.RFC3339, entry.Pub) if err != nil { return *ytFeed, err } } return *ytFeed, nil }