diff --git a/exercise/web-crawler/main.go b/exercise/web-crawler/main.go new file mode 100644 index 0000000..a4816d7 --- /dev/null +++ b/exercise/web-crawler/main.go @@ -0,0 +1,120 @@ +package main + +import ( + "fmt" + "sync" +) + +type Fetcher interface { + // Fetch returns the body of URL and + // a slice of URLs found on that page. + Fetch(url string) (body string, urls []string, err error) +} + +type SafeCounter struct { + mu sync.Mutex + wg sync.WaitGroup + visited map[string]struct{} +} + +var ( + safeCounter *SafeCounter +) + +// Crawl uses fetcher to recursively crawl +// pages starting with url, to a maximum of depth. +func Crawl(url string, depth int, fetcher Fetcher) { + // TODO: Fetch URLs in parallel. + // TODO: Don't fetch the same URL twice. + // This implementation doesn't do either: + defer safeCounter.wg.Done() + + safeCounter.mu.Lock() + defer safeCounter.mu.Unlock() + if url != "" { + _, ok := safeCounter.visited[url] + if ok { + return + } + safeCounter.visited[url] = struct{}{} + } + + if depth <= 0 { + safeCounter.wg.Done() + return + } + + body, urls, err := fetcher.Fetch(url) + if err != nil { + fmt.Println(err) + return + } + + fmt.Printf("found: %s %q\n", url, body) + + for _, u := range urls { + safeCounter.wg.Add(1) + go Crawl(u, depth-1, fetcher) + } + return +} + +func main() { + safeCounter = &SafeCounter{ + visited: make(map[string]struct{}), + } + + safeCounter.wg.Add(1) + go Crawl("https://golang.org/", 4, fetcher) + safeCounter.wg.Wait() + +} + +// fakeFetcher is Fetcher that returns canned results. +type fakeFetcher map[string]*fakeResult + +type fakeResult struct { + body string + urls []string +} + +func (f fakeFetcher) Fetch(url string) (string, []string, error) { + if res, ok := f[url]; ok { + return res.body, res.urls, nil + } + return "", nil, fmt.Errorf("not found: %s", url) +} + +// fetcher is a populated fakeFetcher. +var fetcher = fakeFetcher{ + "https://golang.org/": &fakeResult{ + "The Go Programming Language", + []string{ + "https://golang.org/pkg/", + "https://golang.org/cmd/", + }, + }, + "https://golang.org/pkg/": &fakeResult{ + "Packages", + []string{ + "https://golang.org/", + "https://golang.org/cmd/", + "https://golang.org/pkg/fmt/", + "https://golang.org/pkg/os/", + }, + }, + "https://golang.org/pkg/fmt/": &fakeResult{ + "Package fmt", + []string{ + "https://golang.org/", + "https://golang.org/pkg/", + }, + }, + "https://golang.org/pkg/os/": &fakeResult{ + "Package os", + []string{ + "https://golang.org/", + "https://golang.org/pkg/", + }, + }, +}