1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
// SPDX-License-Identifier: ISC
// Copyright © 2021 siddharth <s@ricketyspace.net>
package feed
import (
"encoding/xml"
"fmt"
"io"
"net/http"
"os"
"path"
"time"
"ricketyspace.net/fern/schema"
)
type Feed struct {
Id string `json:"id"`
Source string `json:"source"`
Schema string `json:"schema"`
DumpDir string
Object interface{}
}
func (feed *Feed) Validate(baseDumpDir string) error {
_, err := os.Stat(baseDumpDir)
if err != nil {
return err
}
// Check 'id'
if len(feed.Id) == 0 {
return fmt.Errorf("'id' not set in a feed")
}
// Check 'source'
if len(feed.Source) == 0 {
return fmt.Errorf("'source' not set in a feed '%s'", feed.Id)
}
// Check 'schema'
schemaOK := false
for _, schema := range []string{"npr", "youtube"} {
if feed.Schema == schema {
schemaOK = true
}
}
if !schemaOK {
return fmt.Errorf("schema '%s' for feed '%s' is not valid",
feed.Schema, feed.Id)
}
// Set dump directory for feed and ensure it exists.
feed.DumpDir = path.Join(baseDumpDir, feed.Id)
err = os.MkdirAll(feed.DumpDir, 0755)
if err != nil {
return err
}
return nil
}
// Get the feed.
func (feed *Feed) get() ([]byte, error) {
// Init byte container to store feed content.
bs := make([]byte, 0)
resp, err := http.Get(feed.Source)
if err != nil {
return bs, err
}
// Slurp body.
chunk := make([]byte, 100)
for {
c, err := resp.Body.Read(chunk)
if c < 1 {
break
}
if err != nil && err != io.EOF {
return bs, err
}
bs = append(bs, chunk[0:c]...)
}
return bs, nil
}
// Unmarshal a NPR feed.
func nprUnmarshal(bs []byte) (schema.NPRFeed, error) {
nprFeed := new(schema.NPRFeed)
err := xml.Unmarshal(bs, nprFeed)
if err != nil {
return *nprFeed, err
}
// Parse time for all entries.
for i, entry := range nprFeed.Entries {
nprFeed.Entries[i].PubTime, err = time.Parse(time.RFC1123Z, entry.Pub)
if err != nil {
return *nprFeed, err
}
}
return *nprFeed, nil
}
// Unmarshal a YouTube feed.
func youtubeUnmarshal(bs []byte) (schema.YouTubeFeed, error) {
ytFeed := new(schema.YouTubeFeed)
err := xml.Unmarshal(bs, ytFeed)
if err != nil {
return *ytFeed, err
}
// Parse time for all entries.
for i, entry := range ytFeed.Entries {
ytFeed.Entries[i].PubTime, err = time.Parse(time.RFC3339, entry.Pub)
if err != nil {
return *ytFeed, err
}
}
return *ytFeed, nil
}
|