diff --git a/cmd/api/main.go b/cmd/api/main.go index 7fb290c..f49b376 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -5,8 +5,8 @@ import ( "log" "os" - "cis-engine/internal/api" "cis-engine/internal/search" + "cis-engine/internal/server" "cis-engine/internal/storage/postgres" "github.com/joho/godotenv" @@ -31,12 +31,12 @@ func main() { log.Println("Успешное подключение к базе данных.") searchService := search.NewService(db) - apiHandler := api.NewHandler(searchService) - router := api.NewRouter(apiHandler) + apiHandler := server.NewHandler(searchService) + router := server.NewRouter(apiHandler) serverAddr := ":8080" log.Printf("Запуск API сервера на http://localhost%s", serverAddr) if err := router.Run(serverAddr); err != nil { log.Fatalf("Не удалось запустить сервер: %v", err) } -} +} \ No newline at end of file diff --git a/go.mod b/go.mod index 91393df..c41520f 100644 --- a/go.mod +++ b/go.mod @@ -75,6 +75,7 @@ require ( github.com/shirou/gopsutil/v4 v4.25.1 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.6 // indirect + github.com/stretchr/objx v0.5.2 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect diff --git a/internal/api/api_test.go b/internal/api/api_test.go deleted file mode 100644 index db6e451..0000000 --- a/internal/api/api_test.go +++ /dev/null @@ -1,176 +0,0 @@ -package api - -import ( - "context" - "encoding/json" - "errors" - "net/http" - "net/http/httptest" - "strings" - "testing" - - "cis-engine/internal/search" - "cis-engine/internal/storage" - - "github.com/stretchr/testify/require" -) - -type mockSearchService struct { - searchFunc func(ctx context.Context, query string) ([]search.Result, error) - scheduleCrawlFunc func(ctx context.Context, url string) error - getStatsFunc func(ctx context.Context) (*storage.Metrics, error) -} - -func (m *mockSearchService) Search(ctx context.Context, query string) ([]search.Result, error) { - if m.searchFunc != nil { - return m.searchFunc(ctx, query) - } - return nil, errors.New("searchFunc не был определен") -} - -func (m *mockSearchService) ScheduleCrawl(ctx context.Context, url string) error { - if m.scheduleCrawlFunc != nil { - return m.scheduleCrawlFunc(ctx, url) - } - return errors.New("scheduleCrawlFunc не был определен") -} - -func (m *mockSearchService) GetStats(ctx context.Context) (*storage.Metrics, error) { - if m.getStatsFunc != nil { - return m.getStatsFunc(ctx) - } - return nil, errors.New("getStatsFunc не был определен") -} - -func TestSearchHandler(t *testing.T) { - t.Run("Успешный запрос", func(t *testing.T) { - mockService := &mockSearchService{ - searchFunc: func(ctx context.Context, query string) ([]search.Result, error) { - require.Equal(t, "test", query) - return []search.Result{{URL: "test.com", Title: "Test"}}, nil - }, - } - handler := NewHandler(mockService) - router := NewRouter(handler) - - req := httptest.NewRequest(http.MethodGet, "/api/v1/search?q=test", nil) - rec := httptest.NewRecorder() - - router.ServeHTTP(rec, req) - require.Equal(t, http.StatusOK, rec.Code) - - var responseBody map[string]interface{} - err := json.Unmarshal(rec.Body.Bytes(), &responseBody) - require.NoError(t, err) - require.Equal(t, "test", responseBody["query"]) - }) - - t.Run("Запрос без параметра q", func(t *testing.T) { - mockService := &mockSearchService{} - handler := NewHandler(mockService) - router := NewRouter(handler) - - req := httptest.NewRequest(http.MethodGet, "/api/v1/search", nil) - rec := httptest.NewRecorder() - - router.ServeHTTP(rec, req) - - require.Equal(t, http.StatusBadRequest, rec.Code) - }) - - t.Run("Сервис возвращает ошибку", func(t *testing.T) { - mockService := &mockSearchService{ - searchFunc: func(ctx context.Context, query string) ([]search.Result, error) { - return nil, errors.New("internal error") - }, - } - handler := NewHandler(mockService) - router := NewRouter(handler) - - req := httptest.NewRequest(http.MethodGet, "/api/v1/search?q=error", nil) - rec := httptest.NewRecorder() - - router.ServeHTTP(rec, req) - - require.Equal(t, http.StatusInternalServerError, rec.Code) - }) -} - -func TestCrawlHandler(t *testing.T) { - t.Run("Успешный запрос на сканирование", func(t *testing.T) { - crawlURL := "https://example.com/to-crawl" - scheduleCrawlCalled := false - mockService := &mockSearchService{ - scheduleCrawlFunc: func(ctx context.Context, url string) error { - require.Equal(t, crawlURL, url) - scheduleCrawlCalled = true - return nil - }, - } - handler := NewHandler(mockService) - router := NewRouter(handler) - - requestBody := `{"url": "https://example.com/to-crawl"}` - req := httptest.NewRequest(http.MethodPost, "/api/v1/crawl", strings.NewReader(requestBody)) - req.Header.Set("Content-Type", "application/json") - rec := httptest.NewRecorder() - - router.ServeHTTP(rec, req) - - require.Equal(t, http.StatusAccepted, rec.Code) - require.True(t, scheduleCrawlCalled, "Метод ScheduleCrawl должен был быть вызван") - }) - - t.Run("Запрос с неверным JSON", func(t *testing.T) { - mockService := &mockSearchService{} - handler := NewHandler(mockService) - router := NewRouter(handler) - - requestBody := `{"invalid_field": "test"}` - req := httptest.NewRequest(http.MethodPost, "/api/v1/crawl", strings.NewReader(requestBody)) - req.Header.Set("Content-Type", "application/json") - rec := httptest.NewRecorder() - - router.ServeHTTP(rec, req) - require.Equal(t, http.StatusBadRequest, rec.Code) - }) -} - -func TestStatusHandler(t *testing.T) { - t.Run("Успешное получение статуса", func(t *testing.T) { - mockService := &mockSearchService{ - getStatsFunc: func(ctx context.Context) (*storage.Metrics, error) { - return &storage.Metrics{PagesCount: 123}, nil - }, - } - handler := NewHandler(mockService) - router := NewRouter(handler) - - req := httptest.NewRequest(http.MethodGet, "/api/v1/status", nil) - rec := httptest.NewRecorder() - router.ServeHTTP(rec, req) - - require.Equal(t, http.StatusOK, rec.Code) - - var stats storage.Metrics - err := json.Unmarshal(rec.Body.Bytes(), &stats) - require.NoError(t, err) - require.Equal(t, int64(123), stats.PagesCount) - }) - - t.Run("Сервис возвращает ошибку при получении статуса", func(t *testing.T) { - mockService := &mockSearchService{ - getStatsFunc: func(ctx context.Context) (*storage.Metrics, error) { - return nil, errors.New("db is down") - }, - } - handler := NewHandler(mockService) - router := NewRouter(handler) - - req := httptest.NewRequest(http.MethodGet, "/api/v1/status", nil) - rec := httptest.NewRecorder() - router.ServeHTTP(rec, req) - - require.Equal(t, http.StatusInternalServerError, rec.Code) - }) -} diff --git a/internal/api/api.go b/internal/server/server.go similarity index 62% rename from internal/api/api.go rename to internal/server/server.go index ad7bb3c..324492f 100644 --- a/internal/api/api.go +++ b/internal/server/server.go @@ -1,12 +1,12 @@ -package api +package server import ( "context" - "log" - "net/http" - "cis-engine/internal/search" "cis-engine/internal/storage" + "log" + "net/http" + "net/url" "github.com/gin-gonic/gin" ) @@ -29,6 +29,8 @@ func NewRouter(h *Handler) *gin.Engine { gin.SetMode(gin.ReleaseMode) router := gin.Default() + router.Use(errorHandler) + apiV1 := router.Group("/api/v1") { apiV1.GET("/search", h.searchHandler) @@ -42,14 +44,14 @@ func NewRouter(h *Handler) *gin.Engine { func (h *Handler) searchHandler(c *gin.Context) { query := c.Query("q") if query == "" { - c.JSON(http.StatusBadRequest, gin.H{"error": "Параметр 'q' не может быть пустым"}) + c.JSON(http.StatusBadRequest, gin.H{"error": "query parameter 'q' cannot be empty"}) return } results, err := h.searchService.Search(c.Request.Context(), query) if err != nil { log.Printf("ERROR: search service failed for query '%s': %v", query, err) - c.JSON(http.StatusInternalServerError, gin.H{"error": "Внутренняя ошибка сервера"}) + c.JSON(http.StatusInternalServerError, gin.H{"error": "internal server error"}) return } @@ -62,31 +64,47 @@ func (h *Handler) crawlHandler(c *gin.Context) { } if err := c.ShouldBindJSON(&request); err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": "Неверный формат запроса. Ожидается JSON с полем 'url'."}) + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request format, expected JSON with 'url' field"}) return } - if request.URL == "" { - c.JSON(http.StatusBadRequest, gin.H{"error": "Поле 'url' не может быть пустым."}) + if _, err := url.ParseRequestURI(request.URL); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid URL format"}) return } if err := h.searchService.ScheduleCrawl(c.Request.Context(), request.URL); err != nil { log.Printf("ERROR: failed to schedule crawl for url '%s': %v", request.URL, err) - c.JSON(http.StatusInternalServerError, gin.H{"error": "Не удалось добавить URL в очередь"}) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to add URL to the queue"}) return } - c.JSON(http.StatusAccepted, gin.H{"message": "URL принят в очередь на сканирование."}) + c.JSON(http.StatusAccepted, gin.H{"message": "URL accepted for scanning"}) } func (h *Handler) statusHandler(c *gin.Context) { stats, err := h.searchService.GetStats(c.Request.Context()) if err != nil { log.Printf("ERROR: failed to get system stats: %v", err) - c.JSON(http.StatusInternalServerError, gin.H{"error": "Не удалось получить статистику системы"}) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get system statistics"}) return } c.JSON(http.StatusOK, stats) } + +func errorHandler(c *gin.Context) { + c.Next() + + if len(c.Errors) > 0 { + // Log errors + for _, e := range c.Errors { + log.Printf("ERROR: %v", e.Err) + } + + // Return a generic error message + c.JSON(http.StatusInternalServerError, gin.H{ + "error": "internal server error", + }) + } +} \ No newline at end of file diff --git a/internal/server/server_test.go b/internal/server/server_test.go new file mode 100644 index 0000000..3424017 --- /dev/null +++ b/internal/server/server_test.go @@ -0,0 +1,128 @@ +package server + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "cis-engine/internal/search" + "cis-engine/internal/storage" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type MockSearcher struct { + mock.Mock +} + +func (m *MockSearcher) Search(ctx context.Context, query string) ([]search.Result, error) { + args := m.Called(ctx, query) + return args.Get(0).([]search.Result), args.Error(1) +} + +func (m *MockSearcher) ScheduleCrawl(ctx context.Context, url string) error { + args := m.Called(ctx, url) + return args.Error(0) +} + +func (m *MockSearcher) GetStats(ctx context.Context) (*storage.Metrics, error) { + args := m.Called(ctx) + return args.Get(0).(*storage.Metrics), args.Error(1) +} + +func TestSearchHandler(t *testing.T) { + gin.SetMode(gin.TestMode) + + t.Run("successful search", func(t *testing.T) { + mockSearcher := new(MockSearcher) + handler := NewHandler(mockSearcher) + router := NewRouter(handler) + + rr := httptest.NewRecorder() + req, _ := http.NewRequest(http.MethodGet, "/api/v1/search?q=test", nil) + + expectedResults := []search.Result{{URL: "http://example.com"}} + mockSearcher.On("Search", mock.Anything, "test").Return(expectedResults, nil) + + router.ServeHTTP(rr, req) + + require.Equal(t, http.StatusOK, rr.Code) + mockSearcher.AssertExpectations(t) + }) + + t.Run("empty query", func(t *testing.T) { + mockSearcher := new(MockSearcher) + handler := NewHandler(mockSearcher) + router := NewRouter(handler) + + rr := httptest.NewRecorder() + req, _ := http.NewRequest(http.MethodGet, "/api/v1/search", nil) + + router.ServeHTTP(rr, req) + + require.Equal(t, http.StatusBadRequest, rr.Code) + }) +} + +func TestCrawlHandler(t *testing.T) { + gin.SetMode(gin.TestMode) + + t.Run("successful crawl", func(t *testing.T) { + mockSearcher := new(MockSearcher) + handler := NewHandler(mockSearcher) + router := NewRouter(handler) + + rr := httptest.NewRecorder() + body, _ := json.Marshal(gin.H{"url": "http://example.com"}) + req, _ := http.NewRequest(http.MethodPost, "/api/v1/crawl", bytes.NewBuffer(body)) + req.Header.Set("Content-Type", "application/json") + + mockSearcher.On("ScheduleCrawl", mock.Anything, "http://example.com").Return(nil) + + router.ServeHTTP(rr, req) + + require.Equal(t, http.StatusAccepted, rr.Code) + mockSearcher.AssertExpectations(t) + }) + + t.Run("invalid url", func(t *testing.T) { + mockSearcher := new(MockSearcher) + handler := NewHandler(mockSearcher) + router := NewRouter(handler) + + rr := httptest.NewRecorder() + body, _ := json.Marshal(gin.H{"url": "not a url"}) + req, _ := http.NewRequest(http.MethodPost, "/api/v1/crawl", bytes.NewBuffer(body)) + req.Header.Set("Content-Type", "application/json") + + router.ServeHTTP(rr, req) + + require.Equal(t, http.StatusBadRequest, rr.Code) + }) +} + +func TestStatusHandler(t *testing.T) { + gin.SetMode(gin.TestMode) + + t.Run("successful status", func(t *testing.T) { + mockSearcher := new(MockSearcher) + handler := NewHandler(mockSearcher) + router := NewRouter(handler) + + rr := httptest.NewRecorder() + req, _ := http.NewRequest(http.MethodGet, "/api/v1/status", nil) + + expectedMetrics := &storage.Metrics{PagesCount: 10} + mockSearcher.On("GetStats", mock.Anything).Return(expectedMetrics, nil) + + router.ServeHTTP(rr, req) + + require.Equal(t, http.StatusOK, rr.Code) + mockSearcher.AssertExpectations(t) + }) +} \ No newline at end of file diff --git a/internal/storage/postgres/db.go b/internal/storage/postgres/db.go index 1108869..714d539 100644 --- a/internal/storage/postgres/db.go +++ b/internal/storage/postgres/db.go @@ -7,11 +7,19 @@ import ( "time" "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgxpool" ) +type DBTX interface { + Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) + Query(context.Context, string, ...interface{}) (pgx.Rows, error) + QueryRow(context.Context, string, ...interface{}) pgx.Row +} + type DB struct { pool *pgxpool.Pool + q *Queries } var _ storage.Storer = (*DB)(nil) @@ -19,13 +27,13 @@ var _ storage.Storer = (*DB)(nil) func New(ctx context.Context, connString string) (*DB, error) { pool, err := pgxpool.New(ctx, connString) if err != nil { - return nil, fmt.Errorf("не удалось создать пул соединений: %w", err) + return nil, fmt.Errorf("failed to create connection pool: %w", err) } if err := pool.Ping(ctx); err != nil { pool.Close() - return nil, fmt.Errorf("не удалось подключиться к базе данных: %w", err) + return nil, fmt.Errorf("failed to ping database: %w", err) } - return &DB{pool: pool}, nil + return &DB{pool: pool, q: NewQuerier(pool)}, nil } func (db *DB) Close() { @@ -33,87 +41,22 @@ func (db *DB) Close() { } func (db *DB) StorePage(ctx context.Context, page *storage.Page) (int64, error) { - query := ` - INSERT INTO pages (url, html_content, title, last_crawled_at) - VALUES ($1, $2, $3, $4) - ON CONFLICT (url) DO UPDATE - SET html_content = EXCLUDED.html_content, - title = EXCLUDED.title, - last_crawled_at = EXCLUDED.last_crawled_at, - content_tsvector = NULL - RETURNING id - ` - var pageID int64 - err := db.pool.QueryRow(ctx, query, page.URL, page.Body, page.Title, time.Now()).Scan(&pageID) - if err != nil { - return 0, fmt.Errorf("ошибка при сохранении страницы %s: %w", page.URL, err) - } - return pageID, nil + page.CrawledAt = time.Now() + return db.q.StorePage(ctx, page) } func (db *DB) GetNextPageToIndex(ctx context.Context) (*storage.Page, error) { - query := `SELECT id, url, title, html_content FROM pages WHERE content_tsvector IS NULL LIMIT 1` - var p storage.Page - err := db.pool.QueryRow(ctx, query).Scan(&p.ID, &p.URL, &p.Title, &p.Body) - if err != nil { - if err == pgx.ErrNoRows { - return nil, nil - } - return nil, fmt.Errorf("ошибка при получении страницы для индексации: %w", err) - } - return &p, nil + return db.q.GetNextPageToIndex(ctx) } func (db *DB) UpdatePageVector(ctx context.Context, page *storage.Page) error { - query := ` - UPDATE pages - SET content_tsvector = to_tsvector('russian', coalesce(title, '') || ' ' || coalesce(html_content, '')) - WHERE id = $1 - ` - _, err := db.pool.Exec(ctx, query, page.ID) - if err != nil { - return fmt.Errorf("ошибка при обновлении tsvector для страницы %d: %w", page.ID, err) - } - return nil + return db.q.UpdatePageVector(ctx, page.ID) } func (db *DB) SearchPages(ctx context.Context, query string) ([]*storage.Page, error) { - sql := ` - SELECT - id, - url, - title, - ts_rank(content_tsvector, websearch_to_tsquery('russian', $1)) as rank - FROM pages - WHERE content_tsvector @@ websearch_to_tsquery('russian', $1) - ORDER BY rank DESC - LIMIT 20 - ` - rows, err := db.pool.Query(ctx, sql, query) - if err != nil { - return nil, fmt.Errorf("ошибка при выполнении полнотекстового поиска: %w", err) - } - defer rows.Close() - - var pages []*storage.Page - for rows.Next() { - var p storage.Page - var rank float32 - if err := rows.Scan(&p.ID, &p.URL, &p.Title, &rank); err != nil { - return nil, fmt.Errorf("ошибка при сканировании результата поиска: %w", err) - } - pages = append(pages, &p) - } - - return pages, rows.Err() + return db.q.SearchPages(ctx, query) } func (db *DB) GetMetrics(ctx context.Context) (*storage.Metrics, error) { - var count int64 - query := "SELECT COUNT(*) FROM pages" - err := db.pool.QueryRow(ctx, query).Scan(&count) - if err != nil { - return nil, fmt.Errorf("ошибка при получении количества страниц: %w", err) - } - return &storage.Metrics{PagesCount: count}, nil -} + return db.q.GetMetrics(ctx) +} \ No newline at end of file diff --git a/internal/storage/postgres/db_test.go b/internal/storage/postgres/db_test.go index 0be5581..355cc53 100644 --- a/internal/storage/postgres/db_test.go +++ b/internal/storage/postgres/db_test.go @@ -4,6 +4,7 @@ package postgres import ( "context" + "os" "path/filepath" "testing" "time" @@ -17,6 +18,10 @@ import ( ) func setupTestDB(t *testing.T) *DB { + if os.Getenv("RUN_DOCKER_TESTS") != "true" { + t.Skip("Skipping Docker-dependent tests. Set RUN_DOCKER_TESTS=true to run them.") + } + ctx := context.Background() schemaPath, err := filepath.Abs("../schema.sql") @@ -110,4 +115,4 @@ func TestIndexingAndSearchWorkflow(t *testing.T) { require.NoError(t, err) require.Len(t, results, 0) }) -} +} \ No newline at end of file diff --git a/internal/storage/postgres/querier.go b/internal/storage/postgres/querier.go new file mode 100644 index 0000000..4d3dd9f --- /dev/null +++ b/internal/storage/postgres/querier.go @@ -0,0 +1,118 @@ +package postgres + +import ( + "cis-engine/internal/storage" + "context" + "fmt" + + "github.com/jackc/pgx/v5" +) + +const ( + storePageQuery = ` + INSERT INTO pages (url, html_content, title, last_crawled_at) + VALUES ($1, $2, $3, $4) + ON CONFLICT (url) DO UPDATE + SET html_content = EXCLUDED.html_content, + title = EXCLUDED.title, + last_crawled_at = EXCLUDED.last_crawled_at, + content_tsvector = NULL + RETURNING id + ` + getNextPageToIndexQuery = `SELECT id, url, title, html_content FROM pages WHERE content_tsvector IS NULL LIMIT 1` + updatePageVectorQuery = ` + UPDATE pages + SET content_tsvector = to_tsvector('russian', coalesce(title, '') || ' ' || coalesce(html_content, '')) + WHERE id = $1 + ` + searchPagesQuery = ` + SELECT + id, + url, + title, + ts_rank(content_tsvector, websearch_to_tsquery('russian', $1)) as rank + FROM pages + WHERE content_tsvector @@ websearch_to_tsquery('russian', $1) + ORDER BY rank DESC + LIMIT 20 + ` + getMetricsQuery = `SELECT COUNT(*) FROM pages` +) + +type Querier interface { + StorePage(ctx context.Context, arg *storage.Page) (int64, error) + GetNextPageToIndex(ctx context.Context) (*storage.Page, error) + UpdatePageVector(ctx context.Context, pageID int64) error + SearchPages(ctx context.Context, query string) ([]*storage.Page, error) + GetMetrics(ctx context.Context) (*storage.Metrics, error) +} + +type Queries struct { + db DBTX +} + +func NewQuerier(db DBTX) *Queries { + return &Queries{db: db} +} + +func (q *Queries) StorePage(ctx context.Context, page *storage.Page) (int64, error) { + var pageID int64 + err := q.db.QueryRow(ctx, storePageQuery, page.URL, page.Body, page.Title, page.CrawledAt).Scan(&pageID) + if err != nil { + return 0, fmt.Errorf("failed to store page %s: %w", page.URL, err) + } + return pageID, nil +} + +func (q *Queries) GetNextPageToIndex(ctx context.Context) (*storage.Page, error) { + var p storage.Page + err := q.db.QueryRow(ctx, getNextPageToIndexQuery).Scan(&p.ID, &p.URL, &p.Title, &p.Body) + if err != nil { + if err == pgx.ErrNoRows { + return nil, nil + } + return nil, fmt.Errorf("failed to get next page to index: %w", err) + } + return &p, nil +} + +func (q *Queries) UpdatePageVector(ctx context.Context, pageID int64) error { + _, err := q.db.Exec(ctx, updatePageVectorQuery, pageID) + if err != nil { + return fmt.Errorf("failed to update page vector for page %d: %w", pageID, err) + } + return nil +} + +func (q *Queries) SearchPages(ctx context.Context, query string) ([]*storage.Page, error) { + rows, err := q.db.Query(ctx, searchPagesQuery, query) + if err != nil { + return nil, fmt.Errorf("failed to execute full-text search: %w", err) + } + defer rows.Close() + + var pages []*storage.Page + for rows.Next() { + var p storage.Page + var rank float32 + if err := rows.Scan(&p.ID, &p.URL, &p.Title, &rank); err != nil { + return nil, fmt.Errorf("failed to scan search result: %w", err) + } + pages = append(pages, &p) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("error reading search results: %w", err) + } + + return pages, nil +} + +func (q *Queries) GetMetrics(ctx context.Context) (*storage.Metrics, error) { + var count int64 + err := q.db.QueryRow(ctx, getMetricsQuery).Scan(&count) + if err != nil { + return nil, fmt.Errorf("failed to get page count: %w", err) + } + return &storage.Metrics{PagesCount: count}, nil +} \ No newline at end of file diff --git a/internal/storage/postgres/storage_test.go b/internal/storage/postgres/storage_test.go new file mode 100644 index 0000000..c1a19a1 --- /dev/null +++ b/internal/storage/postgres/storage_test.go @@ -0,0 +1,42 @@ +package postgres + +import ( + "context" + "testing" + + "cis-engine/internal/storage" + + "github.com/stretchr/testify/require" +) + +func TestGetNextPageToIndex(t *testing.T) { + db := setupTestDB(t) + ctx := context.Background() + + // 1. Test with no pages in the database + page, err := db.GetNextPageToIndex(ctx) + require.NoError(t, err) + require.Nil(t, page) + + // 2. Test with a page that needs indexing + pageToStore := &storage.Page{ + URL: "https://example.com", + Title: "Example Domain", + Body: "This is the body of the example page.", + } + _, err = db.StorePage(ctx, pageToStore) + require.NoError(t, err) + + page, err = db.GetNextPageToIndex(ctx) + require.NoError(t, err) + require.NotNil(t, page) + require.Equal(t, pageToStore.URL, page.URL) + + // 3. Test that the same page is not returned again + err = db.UpdatePageVector(ctx, page) + require.NoError(t, err) + + page, err = db.GetNextPageToIndex(ctx) + require.NoError(t, err) + require.Nil(t, page) +} \ No newline at end of file