exercise: complete the exercise web crawler with sync.Mutex and sync.WaitGroup
This commit is contained in:
		
							parent
							
								
									9a390fa84b
								
							
						
					
					
						commit
						1703797170
					
				
							
								
								
									
										120
									
								
								exercise/web-crawler/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								exercise/web-crawler/main.go
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,120 @@
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"sync"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Fetcher interface {
 | 
			
		||||
	// Fetch returns the body of URL and
 | 
			
		||||
	// a slice of URLs found on that page.
 | 
			
		||||
	Fetch(url string) (body string, urls []string, err error)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type SafeCounter struct {
 | 
			
		||||
	mu      sync.Mutex
 | 
			
		||||
	wg      sync.WaitGroup
 | 
			
		||||
	visited map[string]struct{}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	safeCounter *SafeCounter
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Crawl uses fetcher to recursively crawl
 | 
			
		||||
// pages starting with url, to a maximum of depth.
 | 
			
		||||
func Crawl(url string, depth int, fetcher Fetcher) {
 | 
			
		||||
	// TODO: Fetch URLs in parallel.
 | 
			
		||||
	// TODO: Don't fetch the same URL twice.
 | 
			
		||||
	// This implementation doesn't do either:
 | 
			
		||||
	defer safeCounter.wg.Done()
 | 
			
		||||
 | 
			
		||||
	safeCounter.mu.Lock()
 | 
			
		||||
	defer safeCounter.mu.Unlock()
 | 
			
		||||
	if url != "" {
 | 
			
		||||
		_, ok := safeCounter.visited[url]
 | 
			
		||||
		if ok {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		safeCounter.visited[url] = struct{}{}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if depth <= 0 {
 | 
			
		||||
		safeCounter.wg.Done()
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	body, urls, err := fetcher.Fetch(url)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		fmt.Println(err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fmt.Printf("found: %s %q\n", url, body)
 | 
			
		||||
 | 
			
		||||
	for _, u := range urls {
 | 
			
		||||
		safeCounter.wg.Add(1)
 | 
			
		||||
		go Crawl(u, depth-1, fetcher)
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	safeCounter = &SafeCounter{
 | 
			
		||||
		visited: make(map[string]struct{}),
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	safeCounter.wg.Add(1)
 | 
			
		||||
	go Crawl("https://golang.org/", 4, fetcher)
 | 
			
		||||
	safeCounter.wg.Wait()
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// fakeFetcher is Fetcher that returns canned results.
 | 
			
		||||
type fakeFetcher map[string]*fakeResult
 | 
			
		||||
 | 
			
		||||
type fakeResult struct {
 | 
			
		||||
	body string
 | 
			
		||||
	urls []string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
 | 
			
		||||
	if res, ok := f[url]; ok {
 | 
			
		||||
		return res.body, res.urls, nil
 | 
			
		||||
	}
 | 
			
		||||
	return "", nil, fmt.Errorf("not found: %s", url)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// fetcher is a populated fakeFetcher.
 | 
			
		||||
var fetcher = fakeFetcher{
 | 
			
		||||
	"https://golang.org/": &fakeResult{
 | 
			
		||||
		"The Go Programming Language",
 | 
			
		||||
		[]string{
 | 
			
		||||
			"https://golang.org/pkg/",
 | 
			
		||||
			"https://golang.org/cmd/",
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	"https://golang.org/pkg/": &fakeResult{
 | 
			
		||||
		"Packages",
 | 
			
		||||
		[]string{
 | 
			
		||||
			"https://golang.org/",
 | 
			
		||||
			"https://golang.org/cmd/",
 | 
			
		||||
			"https://golang.org/pkg/fmt/",
 | 
			
		||||
			"https://golang.org/pkg/os/",
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	"https://golang.org/pkg/fmt/": &fakeResult{
 | 
			
		||||
		"Package fmt",
 | 
			
		||||
		[]string{
 | 
			
		||||
			"https://golang.org/",
 | 
			
		||||
			"https://golang.org/pkg/",
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	"https://golang.org/pkg/os/": &fakeResult{
 | 
			
		||||
		"Package os",
 | 
			
		||||
		[]string{
 | 
			
		||||
			"https://golang.org/",
 | 
			
		||||
			"https://golang.org/pkg/",
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user