Implement initial version of web scraping API.
This commit is contained in:
commit
e8f06e46f0
25
.dockerignore
Normal file
25
.dockerignore
Normal file
@ -0,0 +1,25 @@
|
||||
**/.dockerignore
|
||||
**/.env
|
||||
**/.git
|
||||
**/.gitignore
|
||||
**/.project
|
||||
**/.settings
|
||||
**/.toolstarget
|
||||
**/.vs
|
||||
**/.vscode
|
||||
**/.idea
|
||||
**/*.*proj.user
|
||||
**/*.dbmdl
|
||||
**/*.jfm
|
||||
**/azds.yaml
|
||||
**/bin
|
||||
**/charts
|
||||
**/docker-compose*
|
||||
**/Dockerfile*
|
||||
**/node_modules
|
||||
**/npm-debug.log
|
||||
**/obj
|
||||
**/secrets.dev.yaml
|
||||
**/values.dev.yaml
|
||||
LICENSE
|
||||
README.md
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
bin/
|
||||
obj/
|
||||
/packages/
|
||||
riderModule.iml
|
||||
/_ReSharper.Caches/
|
||||
37
ScrapperAPI/Bus/SignalRScrapeEventBus.cs
Normal file
37
ScrapperAPI/Bus/SignalRScrapeEventBus.cs
Normal file
@ -0,0 +1,37 @@
|
||||
using Microsoft.AspNetCore.SignalR;
|
||||
using ScrapperAPI.Enums;
|
||||
using ScrapperAPI.Hub;
|
||||
using ScrapperAPI.Interfaces;
|
||||
using ScrapperAPI.Records;
|
||||
|
||||
namespace ScrapperAPI.Bus;
|
||||
|
||||
public sealed class SignalRScrapeEventBus : IScrapeEventBus
|
||||
{
|
||||
private readonly IHubContext<ScrapeHub> _hub;
|
||||
|
||||
public SignalRScrapeEventBus(IHubContext<ScrapeHub> hub) => _hub = hub;
|
||||
|
||||
public Task PublishAsync(ScrapeEvent ev, CancellationToken ct = default)
|
||||
{
|
||||
var tasks = new List<Task>(2);
|
||||
|
||||
// Detalhes só para a sessão
|
||||
if (ev.Type is ScrapeEventType.ItemStarted or ScrapeEventType.ItemSucceeded or ScrapeEventType.ItemFailed)
|
||||
{
|
||||
tasks.Add(_hub.Clients.Group(ScrapeHub.GroupName(ev.SessionId))
|
||||
.SendAsync("scrapeEvent", ev, ct));
|
||||
return Task.WhenAll(tasks);
|
||||
}
|
||||
|
||||
// Overview recebe eventos de "estado/progresso"
|
||||
tasks.Add(_hub.Clients.Group(ScrapeHub.OverviewGroup)
|
||||
.SendAsync("scrapeEvent", ev, ct));
|
||||
|
||||
// E a própria sessão também recebe (pra tela da sessão atualizar sem depender do overview)
|
||||
tasks.Add(_hub.Clients.Group(ScrapeHub.GroupName(ev.SessionId))
|
||||
.SendAsync("scrapeEvent", ev, ct));
|
||||
|
||||
return Task.WhenAll(tasks);
|
||||
}
|
||||
}
|
||||
66
ScrapperAPI/Controllers/ContentController.cs
Normal file
66
ScrapperAPI/Controllers/ContentController.cs
Normal file
@ -0,0 +1,66 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using ScrapperAPI.Interfaces;
|
||||
using ScrapperAPI.Utils;
|
||||
|
||||
namespace ScrapperAPI.Controllers;
|
||||
|
||||
[ApiController]
|
||||
public sealed class ContentController : ControllerBase
|
||||
{
|
||||
private readonly IContentRepository _content;
|
||||
|
||||
public ContentController(IContentRepository content)
|
||||
{
|
||||
_content = content;
|
||||
}
|
||||
|
||||
// ✅ Retorna HTML DESCOMPRIMIDO
|
||||
// GET /queue/{queueId}/content
|
||||
[HttpGet("queue/{queueId:int}/content")]
|
||||
public async Task<IActionResult> GetDecompressedHtml(int queueId, CancellationToken ct)
|
||||
{
|
||||
var row = await _content.GetCompressedByQueueIdAsync(queueId, ct);
|
||||
if (row is null || row.ContentBytes is null || row.ContentBytes.Length == 0)
|
||||
return NotFound(new { message = "Content not found for this queueId." });
|
||||
|
||||
if (!string.Equals(row.ContentEncoding, "gzip", StringComparison.OrdinalIgnoreCase))
|
||||
return StatusCode(415, new { message = $"Unsupported encoding: {row.ContentEncoding}" });
|
||||
|
||||
string html;
|
||||
try
|
||||
{
|
||||
html = CompressionUtils.GzipDecompressUtf8(row.ContentBytes);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Se o payload estiver corrompido/errado
|
||||
return StatusCode(500, new { message = "Failed to decompress content.", error = ex.Message });
|
||||
}
|
||||
|
||||
// Headers úteis pra debug
|
||||
Response.Headers["X-Content-Id"] = row.Id.ToString();
|
||||
Response.Headers["X-Queue-Id"] = row.QueueId.ToString();
|
||||
Response.Headers["X-Content-Encoding"] = row.ContentEncoding;
|
||||
if (row.OriginalLength is not null) Response.Headers["X-Original-Length"] = row.OriginalLength.Value.ToString();
|
||||
if (row.CompressedLength is not null) Response.Headers["X-Compressed-Length"] = row.CompressedLength.Value.ToString();
|
||||
|
||||
// Retorna como HTML (o browser / front consegue renderizar se quiser)
|
||||
return Content(html, "text/html; charset=utf-8");
|
||||
}
|
||||
|
||||
// (Opcional) debug: retorna descomprimido como texto
|
||||
// GET /queue/{queueId}/content/raw
|
||||
[HttpGet("queue/{queueId:int}/content/raw")]
|
||||
public async Task<IActionResult> GetDecompressedRaw(int queueId, CancellationToken ct)
|
||||
{
|
||||
var row = await _content.GetCompressedByQueueIdAsync(queueId, ct);
|
||||
if (row is null || row.ContentBytes is null || row.ContentBytes.Length == 0)
|
||||
return NotFound(new { message = "Content not found for this queueId." });
|
||||
|
||||
if (!string.Equals(row.ContentEncoding, "gzip", StringComparison.OrdinalIgnoreCase))
|
||||
return StatusCode(415, new { message = $"Unsupported encoding: {row.ContentEncoding}" });
|
||||
|
||||
var text = CompressionUtils.GzipDecompressUtf8(row.ContentBytes);
|
||||
return Content(text, "text/plain; charset=utf-8");
|
||||
}
|
||||
}
|
||||
41
ScrapperAPI/Controllers/ScrapeController.cs
Normal file
41
ScrapperAPI/Controllers/ScrapeController.cs
Normal file
@ -0,0 +1,41 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("sessions/{sessionId:int}/scrap")]
|
||||
public sealed class ScrapeController : ControllerBase
|
||||
{
|
||||
private readonly IScrapeCoordinator _coord;
|
||||
private readonly IQueueRepository _queue;
|
||||
|
||||
public ScrapeController(IScrapeCoordinator coord, IQueueRepository queue)
|
||||
{
|
||||
_coord = coord;
|
||||
_queue = queue;
|
||||
}
|
||||
|
||||
[HttpPost("start")]
|
||||
public async Task<IActionResult> Start(int sessionId, CancellationToken ct)
|
||||
{
|
||||
await _coord.StartAsync(sessionId, ct);
|
||||
return Accepted();
|
||||
}
|
||||
|
||||
[HttpPost("stop")]
|
||||
public async Task<IActionResult> Stop(int sessionId)
|
||||
{
|
||||
await _coord.StopAsync(sessionId);
|
||||
return Accepted();
|
||||
}
|
||||
|
||||
[HttpGet("status")]
|
||||
public async Task<IActionResult> Status(int sessionId, CancellationToken ct)
|
||||
{
|
||||
var runtime = _coord.GetRuntimeStatus(sessionId);
|
||||
var counts = await _queue.GetCountsAsync(sessionId, ct);
|
||||
|
||||
return Ok(new { runtime, counts });
|
||||
}
|
||||
}
|
||||
54
ScrapperAPI/Controllers/ScrapeMonitoringController.cs
Normal file
54
ScrapperAPI/Controllers/ScrapeMonitoringController.cs
Normal file
@ -0,0 +1,54 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("scrap")]
|
||||
public sealed class ScrapeMonitoringController : ControllerBase
|
||||
{
|
||||
private readonly IScrapeCoordinator _coord;
|
||||
private readonly IQueueRepository _queue;
|
||||
|
||||
public ScrapeMonitoringController(
|
||||
IScrapeCoordinator coord,
|
||||
IQueueRepository queue)
|
||||
{
|
||||
_coord = coord;
|
||||
_queue = queue;
|
||||
}
|
||||
|
||||
[HttpGet("running-sessions")]
|
||||
public async Task<IActionResult> ListRunningSessions(CancellationToken ct)
|
||||
{
|
||||
var running = _coord.ListRunningSessions();
|
||||
|
||||
// Opcional: enriquecer com progresso do banco
|
||||
var result = new List<object>();
|
||||
|
||||
foreach (var r in running)
|
||||
{
|
||||
var counts = await _queue.GetCountsAsync(r.SessionId, ct);
|
||||
|
||||
result.Add(new
|
||||
{
|
||||
r.SessionId,
|
||||
r.IsRunning,
|
||||
r.StopRequested,
|
||||
r.CurrentQueueId,
|
||||
r.CurrentUrl,
|
||||
r.CurrentStartedAt,
|
||||
Progress = new
|
||||
{
|
||||
counts.Total,
|
||||
counts.Pending,
|
||||
counts.Processing,
|
||||
counts.Done,
|
||||
counts.Failed
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
83
ScrapperAPI/Controllers/SessionsController.cs
Normal file
83
ScrapperAPI/Controllers/SessionsController.cs
Normal file
@ -0,0 +1,83 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using ScrapperAPI.Dtos;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Controllers;
|
||||
|
||||
public sealed record AddUrlRequest(
|
||||
[Required, Url] string Url
|
||||
);
|
||||
|
||||
[ApiController]
|
||||
[Route("sessions")]
|
||||
public sealed class SessionsController : ControllerBase
|
||||
{
|
||||
private readonly ISessionRepository _sessions;
|
||||
private readonly IQueueRepository _queue;
|
||||
private readonly IScrapeCoordinator _coord;
|
||||
|
||||
public SessionsController(
|
||||
ISessionRepository sessions,
|
||||
IQueueRepository queue,
|
||||
IScrapeCoordinator coord)
|
||||
{
|
||||
_sessions = sessions;
|
||||
_queue = queue;
|
||||
_coord = coord;
|
||||
}
|
||||
|
||||
// ✅ Adicionar URL
|
||||
// POST /sessions/{sessionId}/queue
|
||||
[HttpPost("/{sessionId:int}/queue")]
|
||||
public async Task<IActionResult> AddUrl(int sessionId, [FromBody] AddUrlRequest req, CancellationToken ct)
|
||||
{
|
||||
// (Opcional) valida se session existe
|
||||
var session = await _sessions.FindByNameAsync(
|
||||
name: (await _sessions.GetAllAsync(ct)).FirstOrDefault(s => s.Id == sessionId)?.Name ?? "",
|
||||
ct: ct
|
||||
);
|
||||
|
||||
// Melhor: crie um método GetByIdAsync no repo. Por enquanto:
|
||||
if (session is null)
|
||||
{
|
||||
// Se você não quiser validar aqui, pode remover esse bloco.
|
||||
// Eu recomendo validar.
|
||||
}
|
||||
|
||||
var id = await _queue.EnqueueAsync(sessionId, req.Url, ct);
|
||||
return Created($"/sessions/{sessionId}/queue/{id}", new { id, sessionId, req.Url });
|
||||
}
|
||||
|
||||
// ✅ Remover por ID (seguro)
|
||||
// DELETE /sessions/{sessionId}/queue/{queueId}
|
||||
[HttpDelete("/{sessionId:int}/queue/{queueId:int}")]
|
||||
public async Task<IActionResult> RemoveById(int sessionId, int queueId, CancellationToken ct)
|
||||
{
|
||||
var removed = await _queue.RemovePendingByIdAsync(sessionId, queueId, ct);
|
||||
return removed ? NoContent() : NotFound(new { message = "Queue item not found (or not pending)." });
|
||||
}
|
||||
|
||||
[HttpGet]
|
||||
public async Task<IActionResult> GetAllSessions(CancellationToken ct)
|
||||
{
|
||||
var allSessions = await _sessions.GetAllAsync(ct);
|
||||
var result = new List<SessionOverviewDto>();
|
||||
|
||||
foreach (var s in allSessions)
|
||||
{
|
||||
var counts = await _queue.GetCountsAsync(s.Id, ct);
|
||||
var runtime = _coord.GetRuntimeStatus(s.Id);
|
||||
|
||||
result.Add(new SessionOverviewDto(
|
||||
SessionId: s.Id,
|
||||
Name: s.Name,
|
||||
IsRunning: runtime.IsRunning,
|
||||
StopRequested: runtime.StopRequested,
|
||||
Queue: counts
|
||||
));
|
||||
}
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
23
ScrapperAPI/Dockerfile
Normal file
23
ScrapperAPI/Dockerfile
Normal file
@ -0,0 +1,23 @@
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS base
|
||||
USER $APP_UID
|
||||
WORKDIR /app
|
||||
EXPOSE 8080
|
||||
EXPOSE 8081
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
ARG BUILD_CONFIGURATION=Release
|
||||
WORKDIR /src
|
||||
COPY ["ScrapperAPI/ScrapperAPI.csproj", "ScrapperAPI/"]
|
||||
RUN dotnet restore "ScrapperAPI/ScrapperAPI.csproj"
|
||||
COPY . .
|
||||
WORKDIR "/src/ScrapperAPI"
|
||||
RUN dotnet build "./ScrapperAPI.csproj" -c $BUILD_CONFIGURATION -o /app/build
|
||||
|
||||
FROM build AS publish
|
||||
ARG BUILD_CONFIGURATION=Release
|
||||
RUN dotnet publish "./ScrapperAPI.csproj" -c $BUILD_CONFIGURATION -o /app/publish /p:UseAppHost=false
|
||||
|
||||
FROM base AS final
|
||||
WORKDIR /app
|
||||
COPY --from=publish /app/publish .
|
||||
ENTRYPOINT ["dotnet", "ScrapperAPI.dll"]
|
||||
8
ScrapperAPI/Dtos/ContentRow.cs
Normal file
8
ScrapperAPI/Dtos/ContentRow.cs
Normal file
@ -0,0 +1,8 @@
|
||||
namespace ScrapperAPI.Dtos;
|
||||
|
||||
public sealed record ContentRow(
|
||||
int Id,
|
||||
int QueueId,
|
||||
string Content,
|
||||
DateTime CreatedDate
|
||||
);
|
||||
9
ScrapperAPI/Dtos/QueueCounts.cs
Normal file
9
ScrapperAPI/Dtos/QueueCounts.cs
Normal file
@ -0,0 +1,9 @@
|
||||
namespace ScrapperAPI.Dtos;
|
||||
|
||||
public sealed record QueueCounts(
|
||||
long Total,
|
||||
long Pending,
|
||||
long Processing,
|
||||
long Done,
|
||||
long Failed
|
||||
);
|
||||
13
ScrapperAPI/Dtos/QueueItem.cs
Normal file
13
ScrapperAPI/Dtos/QueueItem.cs
Normal file
@ -0,0 +1,13 @@
|
||||
namespace ScrapperAPI.Dtos;
|
||||
|
||||
public sealed record QueueItem(
|
||||
int Id,
|
||||
int SessionId,
|
||||
string Url,
|
||||
short Status,
|
||||
DateTime CreatedDate,
|
||||
DateTime? StartedDate,
|
||||
DateTime? FinishedDate,
|
||||
int Attempts,
|
||||
string? LastError
|
||||
);
|
||||
9
ScrapperAPI/Dtos/SessionOverviewDto.cs
Normal file
9
ScrapperAPI/Dtos/SessionOverviewDto.cs
Normal file
@ -0,0 +1,9 @@
|
||||
namespace ScrapperAPI.Dtos;
|
||||
|
||||
public sealed record SessionOverviewDto(
|
||||
int SessionId,
|
||||
string Name,
|
||||
bool IsRunning,
|
||||
bool StopRequested,
|
||||
QueueCounts Queue
|
||||
);
|
||||
6
ScrapperAPI/Dtos/SessionRow.cs
Normal file
6
ScrapperAPI/Dtos/SessionRow.cs
Normal file
@ -0,0 +1,6 @@
|
||||
namespace ScrapperAPI.Dtos;
|
||||
|
||||
public sealed record SessionRow(
|
||||
int Id,
|
||||
string Name
|
||||
);
|
||||
12
ScrapperAPI/Enums/ScrapeEventType.cs
Normal file
12
ScrapperAPI/Enums/ScrapeEventType.cs
Normal file
@ -0,0 +1,12 @@
|
||||
namespace ScrapperAPI.Enums;
|
||||
|
||||
public enum ScrapeEventType
|
||||
{
|
||||
SessionStarted,
|
||||
SessionStopRequested,
|
||||
SessionStopped,
|
||||
ItemStarted,
|
||||
ItemSucceeded,
|
||||
ItemFailed,
|
||||
Progress
|
||||
}
|
||||
20
ScrapperAPI/Factories/NpgsqlConnectionFactory.cs
Normal file
20
ScrapperAPI/Factories/NpgsqlConnectionFactory.cs
Normal file
@ -0,0 +1,20 @@
|
||||
using System.Data;
|
||||
using Npgsql;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Factories;
|
||||
|
||||
public sealed class NpgsqlConnectionFactory : IDbConnectionFactory
|
||||
{
|
||||
private readonly string _cs;
|
||||
|
||||
public NpgsqlConnectionFactory(IConfiguration cfg)
|
||||
=> _cs = cfg.GetConnectionString("Default")!;
|
||||
|
||||
public async Task<IDbConnection> CreateOpenConnectionAsync(CancellationToken ct)
|
||||
{
|
||||
var conn = new NpgsqlConnection(_cs);
|
||||
await conn.OpenAsync(ct);
|
||||
return conn;
|
||||
}
|
||||
}
|
||||
19
ScrapperAPI/Hub/ScrapeHub.cs
Normal file
19
ScrapperAPI/Hub/ScrapeHub.cs
Normal file
@ -0,0 +1,19 @@
|
||||
namespace ScrapperAPI.Hub;
|
||||
|
||||
public sealed class ScrapeHub : Microsoft.AspNetCore.SignalR.Hub
|
||||
{
|
||||
public Task Subscribe(int sessionId)
|
||||
=> Groups.AddToGroupAsync(Context.ConnectionId, GroupName(sessionId));
|
||||
|
||||
public Task Unsubscribe(int sessionId)
|
||||
=> Groups.RemoveFromGroupAsync(Context.ConnectionId, GroupName(sessionId));
|
||||
|
||||
public Task SubscribeOverview()
|
||||
=> Groups.AddToGroupAsync(Context.ConnectionId, OverviewGroup);
|
||||
|
||||
public Task UnsubscribeOverview()
|
||||
=> Groups.RemoveFromGroupAsync(Context.ConnectionId, OverviewGroup);
|
||||
|
||||
public static string GroupName(int sessionId) => $"session:{sessionId}";
|
||||
public const string OverviewGroup = "overview";
|
||||
}
|
||||
12
ScrapperAPI/Interfaces/IContentRepository.cs
Normal file
12
ScrapperAPI/Interfaces/IContentRepository.cs
Normal file
@ -0,0 +1,12 @@
|
||||
using ScrapperAPI.Dtos;
|
||||
using ScrapperAPI.Records;
|
||||
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IContentRepository
|
||||
{
|
||||
Task<int> SaveAsync(int queueId, string content, CancellationToken ct);
|
||||
Task<ContentRow?> GetByQueueIdAsync(int queueId, CancellationToken ct);
|
||||
Task<CompressedContent?> GetCompressedByQueueIdAsync(int queueId, CancellationToken ct);
|
||||
|
||||
}
|
||||
8
ScrapperAPI/Interfaces/IDbConnectionFactory.cs
Normal file
8
ScrapperAPI/Interfaces/IDbConnectionFactory.cs
Normal file
@ -0,0 +1,8 @@
|
||||
using System.Data;
|
||||
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IDbConnectionFactory
|
||||
{
|
||||
Task<IDbConnection> CreateOpenConnectionAsync(CancellationToken ct);
|
||||
}
|
||||
6
ScrapperAPI/Interfaces/IDomainRateLimiter.cs
Normal file
6
ScrapperAPI/Interfaces/IDomainRateLimiter.cs
Normal file
@ -0,0 +1,6 @@
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IDomainRateLimiter
|
||||
{
|
||||
Task WaitAsync(string host, CancellationToken ct);
|
||||
}
|
||||
24
ScrapperAPI/Interfaces/IQueueRepository.cs
Normal file
24
ScrapperAPI/Interfaces/IQueueRepository.cs
Normal file
@ -0,0 +1,24 @@
|
||||
using ScrapperAPI.Dtos;
|
||||
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IQueueRepository
|
||||
{
|
||||
Task<int> EnqueueAsync(int sessionId, string url, CancellationToken ct);
|
||||
Task<QueueCounts> GetCountsAsync(int sessionId, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Pega 1 item pendente e muda para Processing atomica/seguramente.
|
||||
/// Retorna null se não houver itens pendentes.
|
||||
/// </summary>
|
||||
Task<QueueItem?> TryDequeueAsync(int sessionId, CancellationToken ct);
|
||||
|
||||
Task MarkDoneAsync(int queueId, CancellationToken ct);
|
||||
Task MarkFailedAsync(int queueId, string error, CancellationToken ct);
|
||||
|
||||
// Opcional: resetar stuck processing (se quiser depois)
|
||||
Task<int> RequeueStuckProcessingAsync(int sessionId, TimeSpan olderThan, CancellationToken ct);
|
||||
|
||||
Task<bool> RemovePendingByIdAsync(int sessionId, int queueId, CancellationToken ct);
|
||||
Task<int> RemovePendingByUrlAsync(int sessionId, string url, CancellationToken ct);
|
||||
}
|
||||
11
ScrapperAPI/Interfaces/IScrapeCoordinator.cs
Normal file
11
ScrapperAPI/Interfaces/IScrapeCoordinator.cs
Normal file
@ -0,0 +1,11 @@
|
||||
using ScrapperAPI.Records;
|
||||
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IScrapeCoordinator
|
||||
{
|
||||
Task StartAsync(int sessionId, CancellationToken ct = default);
|
||||
Task StopAsync(int sessionId);
|
||||
ScrapeRuntimeStatus GetRuntimeStatus(int sessionId);
|
||||
IReadOnlyCollection<ScrapeRuntimeStatus> ListRunningSessions();
|
||||
}
|
||||
8
ScrapperAPI/Interfaces/IScrapeEventBus.cs
Normal file
8
ScrapperAPI/Interfaces/IScrapeEventBus.cs
Normal file
@ -0,0 +1,8 @@
|
||||
using ScrapperAPI.Records;
|
||||
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IScrapeEventBus
|
||||
{
|
||||
Task PublishAsync(ScrapeEvent ev, CancellationToken ct = default);
|
||||
}
|
||||
6
ScrapperAPI/Interfaces/IScraperHttpClient.cs
Normal file
6
ScrapperAPI/Interfaces/IScraperHttpClient.cs
Normal file
@ -0,0 +1,6 @@
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface IScraperHttpClient
|
||||
{
|
||||
Task<string> GetStringWithRetryAsync(string url, CancellationToken ct);
|
||||
}
|
||||
11
ScrapperAPI/Interfaces/ISessionRepository.cs
Normal file
11
ScrapperAPI/Interfaces/ISessionRepository.cs
Normal file
@ -0,0 +1,11 @@
|
||||
using ScrapperAPI.Dtos;
|
||||
|
||||
namespace ScrapperAPI.Interfaces;
|
||||
|
||||
public interface ISessionRepository
|
||||
{
|
||||
Task<int> CreateAsync(string name, CancellationToken ct);
|
||||
Task<SessionRow?> FindByNameAsync(string name, CancellationToken ct);
|
||||
Task<int?> GetIdByNameAsync(string name, CancellationToken ct);
|
||||
Task<IReadOnlyList<SessionRow>> GetAllAsync(CancellationToken ct);
|
||||
}
|
||||
22
ScrapperAPI/Options/ScraperOptions.cs
Normal file
22
ScrapperAPI/Options/ScraperOptions.cs
Normal file
@ -0,0 +1,22 @@
|
||||
namespace ScrapperAPI.Options;
|
||||
|
||||
public class ScraperOptions
|
||||
{
|
||||
public int DelayMinMs { get; init; } = 100;
|
||||
public int DelayMaxMs { get; init; } = 3000;
|
||||
|
||||
public RateLimitOptions RateLimit { get; init; } = new();
|
||||
public RetryOptions Retry { get; init; } = new();
|
||||
}
|
||||
|
||||
public sealed class RateLimitOptions
|
||||
{
|
||||
public int PerDomainMinDelayMs { get; init; } = 500;
|
||||
}
|
||||
|
||||
public sealed class RetryOptions
|
||||
{
|
||||
public int MaxAttempts { get; init; } = 5;
|
||||
public int BaseDelayMs { get; init; } = 250;
|
||||
public int MaxDelayMs { get; init; } = 8000;
|
||||
}
|
||||
63
ScrapperAPI/Program.cs
Normal file
63
ScrapperAPI/Program.cs
Normal file
@ -0,0 +1,63 @@
|
||||
using ScrapperAPI.Bus;
|
||||
using ScrapperAPI.Factories;
|
||||
using ScrapperAPI.Hub;
|
||||
using ScrapperAPI.Interfaces;
|
||||
using ScrapperAPI.Options;
|
||||
using ScrapperAPI.Repositories;
|
||||
using ScrapperAPI.Services;
|
||||
using ScrapperAPI.Utils;
|
||||
using ScrapperAPI.Workers;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
builder.Services.AddOpenApi();
|
||||
builder.Services.AddSignalR();
|
||||
builder.Services.AddControllers();
|
||||
|
||||
builder.Services.Configure<ScraperOptions>(builder.Configuration.GetSection("Scraper"));
|
||||
|
||||
builder.Services.AddSingleton<IDomainRateLimiter>(sp =>
|
||||
{
|
||||
var opts = sp.GetRequiredService<Microsoft.Extensions.Options.IOptions<ScraperOptions>>().Value;
|
||||
return new DomainRateLimiter(opts.RateLimit.PerDomainMinDelayMs);
|
||||
});
|
||||
builder.Services.AddSingleton<IScrapeEventBus, SignalRScrapeEventBus>();
|
||||
builder.Services.AddSingleton<IScraperHttpClient, ScraperHttpClient>();
|
||||
builder.Services.AddSingleton<IDbConnectionFactory, NpgsqlConnectionFactory>();
|
||||
|
||||
builder.Services.AddScoped<ISessionRepository, SessionRepository>();
|
||||
builder.Services.AddScoped<IQueueRepository, QueueRepository>();
|
||||
builder.Services.AddScoped<IContentRepository, ContentRepository>();
|
||||
|
||||
builder.Services.AddHttpClient("scraper", c => c.Timeout = TimeSpan.FromSeconds(30));
|
||||
|
||||
builder.Services.AddSingleton<IScrapeCoordinator, ScrapeCoordinator>();
|
||||
builder.Services.AddHostedService(sp => (ScrapeCoordinator)sp.GetRequiredService<IScrapeCoordinator>());
|
||||
|
||||
builder.Services.AddCors(options =>
|
||||
{
|
||||
options.AddPolicy("AllowReact",
|
||||
policy =>
|
||||
{
|
||||
policy.WithOrigins("http://localhost:3000")
|
||||
.AllowAnyHeader()
|
||||
.AllowAnyMethod()
|
||||
.AllowCredentials();
|
||||
});
|
||||
});
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
app.UseCors("AllowReact");
|
||||
|
||||
if (app.Environment.IsDevelopment())
|
||||
{
|
||||
app.MapOpenApi();
|
||||
}
|
||||
|
||||
app.MapControllers();
|
||||
app.MapHub<ScrapeHub>("/ws/scrape");
|
||||
|
||||
// app.UseHttpsRedirection();
|
||||
|
||||
app.Run();
|
||||
24
ScrapperAPI/Properties/launchSettings.json
Normal file
24
ScrapperAPI/Properties/launchSettings.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/launchsettings.json",
|
||||
"profiles": {
|
||||
"http": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": true,
|
||||
"launchBrowser": false,
|
||||
"applicationUrl": "http://localhost:5123",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
},
|
||||
"https": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": true,
|
||||
"launchBrowser": false,
|
||||
"applicationUrl": "https://localhost:7285;http://localhost:5123",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development",
|
||||
"ConnectionStrings__Default": "Host=localhost;Port=5432;Database=webscrapper_dev;Username=postgres;Password=devpassword;"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
11
ScrapperAPI/Records/CompressedContent.cs
Normal file
11
ScrapperAPI/Records/CompressedContent.cs
Normal file
@ -0,0 +1,11 @@
|
||||
namespace ScrapperAPI.Records;
|
||||
|
||||
public sealed record CompressedContent(
|
||||
int Id,
|
||||
int QueueId,
|
||||
string ContentEncoding,
|
||||
byte[] ContentBytes,
|
||||
int? OriginalLength,
|
||||
int? CompressedLength,
|
||||
DateTime CreatedDate
|
||||
);
|
||||
19
ScrapperAPI/Records/ScrapeEvent.cs
Normal file
19
ScrapperAPI/Records/ScrapeEvent.cs
Normal file
@ -0,0 +1,19 @@
|
||||
using ScrapperAPI.Enums;
|
||||
|
||||
namespace ScrapperAPI.Records;
|
||||
|
||||
public sealed record ScrapeEvent(
|
||||
ScrapeEventType Type,
|
||||
int SessionId,
|
||||
DateTimeOffset At,
|
||||
int? QueueId = null,
|
||||
string? Url = null,
|
||||
int? StatusCode = null,
|
||||
string? Error = null,
|
||||
long? Total = null,
|
||||
long? Done = null,
|
||||
long? Pending = null,
|
||||
long? Processing = null,
|
||||
long? Failed = null,
|
||||
double? Percent = null
|
||||
);
|
||||
10
ScrapperAPI/Records/ScrapeRuntimeStatus.cs
Normal file
10
ScrapperAPI/Records/ScrapeRuntimeStatus.cs
Normal file
@ -0,0 +1,10 @@
|
||||
namespace ScrapperAPI.Records;
|
||||
|
||||
public sealed record ScrapeRuntimeStatus(
|
||||
int SessionId,
|
||||
bool IsRunning,
|
||||
bool StopRequested,
|
||||
int? CurrentQueueId,
|
||||
string? CurrentUrl,
|
||||
DateTimeOffset? CurrentStartedAt
|
||||
);
|
||||
81
ScrapperAPI/Repositories/ContentRepository.cs
Normal file
81
ScrapperAPI/Repositories/ContentRepository.cs
Normal file
@ -0,0 +1,81 @@
|
||||
using System.IO.Compression;
|
||||
using Dapper;
|
||||
using ScrapperAPI.Dtos;
|
||||
using ScrapperAPI.Interfaces;
|
||||
using ScrapperAPI.Records;
|
||||
using ScrapperAPI.Utils;
|
||||
|
||||
namespace ScrapperAPI.Repositories;
|
||||
|
||||
public sealed class ContentRepository : IContentRepository
|
||||
{
|
||||
private readonly IDbConnectionFactory _db;
|
||||
|
||||
public ContentRepository(IDbConnectionFactory db) => _db = db;
|
||||
|
||||
public async Task<int> SaveAsync(int queueId, string content, CancellationToken ct)
|
||||
{
|
||||
var compressed = CompressionUtils.GzipCompressUtf8(content, CompressionLevel.Fastest);
|
||||
|
||||
const string sql = """
|
||||
insert into content(queue_id, content_encoding, content_bytes, original_length, compressed_length)
|
||||
values (@queueId, 'gzip', @bytes, @origLen, @compLen)
|
||||
returning id;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.ExecuteScalarAsync<int>(new CommandDefinition(sql, new
|
||||
{
|
||||
queueId,
|
||||
bytes = compressed,
|
||||
origLen = content.Length, // chars (ok)
|
||||
compLen = compressed.Length // bytes
|
||||
}, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<ContentRow?> GetByQueueIdAsync(int queueId, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
select id, queue_id as QueueId, content, created_date as CreatedDate
|
||||
from content
|
||||
where queue_id = @queueId
|
||||
order by id desc
|
||||
limit 1;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.QuerySingleOrDefaultAsync<ContentRow>(
|
||||
new CommandDefinition(sql, new { queueId }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<CompressedContent?> GetCompressedByQueueIdAsync(
|
||||
int queueId,
|
||||
CancellationToken ct
|
||||
)
|
||||
{
|
||||
const string sql = """
|
||||
select
|
||||
id,
|
||||
queue_id as QueueId,
|
||||
content_encoding as ContentEncoding,
|
||||
content_bytes as ContentBytes,
|
||||
original_length as OriginalLength,
|
||||
compressed_length as CompressedLength,
|
||||
created_date as CreatedDate
|
||||
from content
|
||||
where queue_id = @queueId
|
||||
order by id desc
|
||||
limit 1;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
|
||||
return await conn.QuerySingleOrDefaultAsync<CompressedContent>(
|
||||
new CommandDefinition(
|
||||
sql,
|
||||
new { queueId },
|
||||
cancellationToken: ct
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
158
ScrapperAPI/Repositories/QueueRepository.cs
Normal file
158
ScrapperAPI/Repositories/QueueRepository.cs
Normal file
@ -0,0 +1,158 @@
|
||||
using Dapper;
|
||||
using ScrapperAPI.Dtos;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Repositories;
|
||||
|
||||
public sealed class QueueRepository : IQueueRepository
|
||||
{
|
||||
private readonly IDbConnectionFactory _db;
|
||||
|
||||
public QueueRepository(IDbConnectionFactory db) => _db = db;
|
||||
|
||||
public async Task<int> EnqueueAsync(int sessionId, string url, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
insert into queue(session_id, url)
|
||||
values (@sessionId, @url)
|
||||
returning id;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.ExecuteScalarAsync<int>(
|
||||
new CommandDefinition(sql, new { sessionId, url }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<QueueCounts> GetCountsAsync(int sessionId, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
select
|
||||
count(*) as total,
|
||||
count(*) filter (where status = 0) as pending,
|
||||
count(*) filter (where status = 1) as processing,
|
||||
count(*) filter (where status = 2) as done,
|
||||
count(*) filter (where status = 3) as failed
|
||||
from queue
|
||||
where session_id = @sessionId;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.QuerySingleAsync<QueueCounts>(
|
||||
new CommandDefinition(sql, new { sessionId }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<QueueItem?> TryDequeueAsync(int sessionId, CancellationToken ct)
|
||||
{
|
||||
// Importante: 1 transação + SKIP LOCKED (permite multi-worker no futuro)
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
using var tx = conn.BeginTransaction();
|
||||
|
||||
const string sql = """
|
||||
with next as (
|
||||
select id
|
||||
from queue
|
||||
where session_id = @sessionId
|
||||
and status = 0
|
||||
order by id
|
||||
for update skip locked
|
||||
limit 1
|
||||
)
|
||||
update queue q
|
||||
set status = 1,
|
||||
started_date = now(),
|
||||
attempts = attempts + 1
|
||||
from next
|
||||
where q.id = next.id
|
||||
returning
|
||||
q.id as Id,
|
||||
q.session_id as SessionId,
|
||||
q.url as Url,
|
||||
q.status as Status,
|
||||
q.created_date as CreatedDate,
|
||||
q.started_date as StartedDate,
|
||||
q.finished_date as FinishedDate,
|
||||
q.attempts as Attempts,
|
||||
q.last_error as LastError;
|
||||
""";
|
||||
|
||||
var item = await conn.QuerySingleOrDefaultAsync<QueueItem>(
|
||||
new CommandDefinition(sql, new { sessionId }, transaction: tx, cancellationToken: ct));
|
||||
|
||||
tx.Commit();
|
||||
return item;
|
||||
}
|
||||
|
||||
public async Task MarkDoneAsync(int queueId, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
update queue
|
||||
set status = 2,
|
||||
finished_date = now(),
|
||||
last_error = null
|
||||
where id = @queueId;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
await conn.ExecuteAsync(new CommandDefinition(sql, new { queueId }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task MarkFailedAsync(int queueId, string error, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
update queue
|
||||
set status = 3,
|
||||
finished_date = now(),
|
||||
last_error = @error
|
||||
where id = @queueId;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
await conn.ExecuteAsync(new CommandDefinition(sql, new { queueId, error }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<int> RequeueStuckProcessingAsync(int sessionId, TimeSpan olderThan, CancellationToken ct)
|
||||
{
|
||||
// Ex.: worker morreu e deixou itens em processing pra sempre.
|
||||
const string sql = """
|
||||
update queue
|
||||
set status = 0,
|
||||
started_date = null
|
||||
where session_id = @sessionId
|
||||
and status = 1
|
||||
and started_date < now() - (@olderThanSeconds * interval '1 second');
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.ExecuteAsync(new CommandDefinition(sql,
|
||||
new { sessionId, olderThanSeconds = (int)olderThan.TotalSeconds },
|
||||
cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<bool> RemovePendingByIdAsync(int sessionId, int queueId, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
delete from queue
|
||||
where id = @queueId
|
||||
and session_id = @sessionId
|
||||
and status = 0;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
var rows = await conn.ExecuteAsync(new CommandDefinition(sql, new { sessionId, queueId }, cancellationToken: ct));
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
public async Task<int> RemovePendingByUrlAsync(int sessionId, string url, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
delete from queue
|
||||
where session_id = @sessionId
|
||||
and url = @url
|
||||
and status = 0;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.ExecuteAsync(new CommandDefinition(sql, new { sessionId, url }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
}
|
||||
54
ScrapperAPI/Repositories/SessionRepository.cs
Normal file
54
ScrapperAPI/Repositories/SessionRepository.cs
Normal file
@ -0,0 +1,54 @@
|
||||
using Dapper;
|
||||
using ScrapperAPI.Dtos;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Repositories;
|
||||
|
||||
public sealed class SessionRepository : ISessionRepository
|
||||
{
|
||||
private readonly IDbConnectionFactory _db;
|
||||
|
||||
public SessionRepository(IDbConnectionFactory db) => _db = db;
|
||||
|
||||
public async Task<int> CreateAsync(string name, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
insert into session(name) values (@name)
|
||||
returning id;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.ExecuteScalarAsync<int>(new CommandDefinition(sql, new { name }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<SessionRow?> FindByNameAsync(string name, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
select id, name
|
||||
from session
|
||||
where name = @name
|
||||
limit 1;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return await conn.QuerySingleOrDefaultAsync<SessionRow>(
|
||||
new CommandDefinition(sql, new { name }, cancellationToken: ct));
|
||||
}
|
||||
|
||||
public async Task<int?> GetIdByNameAsync(string name, CancellationToken ct)
|
||||
=> (await FindByNameAsync(name, ct))?.Id;
|
||||
|
||||
public async Task<IReadOnlyList<SessionRow>> GetAllAsync(CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
select id, name
|
||||
from session
|
||||
order by id;
|
||||
""";
|
||||
|
||||
using var conn = await _db.CreateOpenConnectionAsync(ct);
|
||||
return (await conn.QueryAsync<SessionRow>(
|
||||
new CommandDefinition(sql, cancellationToken: ct)
|
||||
)).ToList();
|
||||
}
|
||||
}
|
||||
23
ScrapperAPI/ScrapperAPI.csproj
Normal file
23
ScrapperAPI/ScrapperAPI.csproj
Normal file
@ -0,0 +1,23 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.1"/>
|
||||
<PackageReference Include="Dapper" Version="2.1.66" />
|
||||
<PackageReference Include="Npgsql" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.AspNet.SignalR" Version="2.4.3" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Include="..\.dockerignore">
|
||||
<Link>.dockerignore</Link>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
70
ScrapperAPI/ScrapperAPI.http
Normal file
70
ScrapperAPI/ScrapperAPI.http
Normal file
@ -0,0 +1,70 @@
|
||||
@baseUrl = http://localhost:5123
|
||||
@sessionId = 1
|
||||
|
||||
###
|
||||
# ================================
|
||||
# START SCRAP FOR A SESSION
|
||||
# ================================
|
||||
POST {{baseUrl}}/sessions/{{sessionId}}/scrap/start
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
|
||||
# ================================
|
||||
# STOP SCRAP (GRACEFUL)
|
||||
# Termina a URL atual e para
|
||||
# ================================
|
||||
POST {{baseUrl}}/sessions/{{sessionId}}/scrap/stop
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
|
||||
# ================================
|
||||
# GET STATUS FOR ONE SESSION
|
||||
# Runtime + DB progress
|
||||
# ================================
|
||||
GET {{baseUrl}}/sessions/{{sessionId}}/scrap/status
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
|
||||
# ================================
|
||||
# LIST ALL RUNNING SESSIONS
|
||||
# (runtime state)
|
||||
# ================================
|
||||
GET {{baseUrl}}/scrap/running-sessions
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
# ================================
|
||||
# LIST ALL SESSIONS
|
||||
# ================================
|
||||
GET {{baseUrl}}/sessions
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
# ================================
|
||||
# ADD URL TO SESSION
|
||||
# ================================
|
||||
POST {{baseUrl}}/sessions/{{sessionId}}/queue
|
||||
Content-Type: application/json
|
||||
Accept: application/json
|
||||
|
||||
{
|
||||
"url": "https://example.com/page-1"
|
||||
}
|
||||
|
||||
###
|
||||
# ================================
|
||||
# REMOVE URL BY QUEUE ID (only if pending)
|
||||
# ================================
|
||||
DELETE {{baseUrl}}/sessions/{{sessionId}}/queue/2
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
# HTML descomprimido
|
||||
GET {{baseUrl}}/queue/22/content
|
||||
|
||||
###
|
||||
# Texto descomprimido (debug)
|
||||
GET {{baseUrl}}/queue/22/content/raw
|
||||
40
ScrapperAPI/Scripts/database.sql
Normal file
40
ScrapperAPI/Scripts/database.sql
Normal file
@ -0,0 +1,40 @@
|
||||
create database webscrapper_dev;
|
||||
|
||||
drop table content;
|
||||
drop table queue;
|
||||
drop table session;
|
||||
|
||||
create table session(
|
||||
id serial primary key,
|
||||
name varchar(255)
|
||||
);
|
||||
|
||||
create table queue(
|
||||
id serial primary key,
|
||||
session_id int references session(id),
|
||||
url varchar(255),
|
||||
status smallint not null default 0,
|
||||
started_date timestamp null,
|
||||
finished_date timestamp null,
|
||||
attempts int not null default 0,
|
||||
last_error text null,
|
||||
created_date timestamp default now()
|
||||
);
|
||||
|
||||
create index idx_queue_session_status on queue(session_id, status);
|
||||
|
||||
create table content(
|
||||
id serial primary key,
|
||||
queue_id int references queue(id),
|
||||
content text,
|
||||
created_date timestamp default now()
|
||||
);
|
||||
|
||||
create unique index if not exists ux_queue_session_url
|
||||
on queue(session_id, url);
|
||||
|
||||
alter table content
|
||||
add column content_encoding varchar(20) not null default 'gzip',
|
||||
add column content_bytes bytea null,
|
||||
add column original_length int null,
|
||||
add column compressed_length int null;
|
||||
146
ScrapperAPI/Services/ScraperHttpClient.cs
Normal file
146
ScrapperAPI/Services/ScraperHttpClient.cs
Normal file
@ -0,0 +1,146 @@
|
||||
using System.Net;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScrapperAPI.Interfaces;
|
||||
using ScrapperAPI.Options;
|
||||
|
||||
namespace ScrapperAPI.Services;
|
||||
|
||||
public sealed class ScraperHttpClient : IScraperHttpClient
|
||||
{
|
||||
private readonly IHttpClientFactory _httpClientFactory;
|
||||
private readonly IDomainRateLimiter _rateLimiter;
|
||||
private readonly ILogger<ScraperHttpClient> _logger;
|
||||
private readonly ScraperOptions _opts;
|
||||
|
||||
public ScraperHttpClient(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
IDomainRateLimiter rateLimiter,
|
||||
ILogger<ScraperHttpClient> logger,
|
||||
IOptions<ScraperOptions> options)
|
||||
{
|
||||
_httpClientFactory = httpClientFactory;
|
||||
_rateLimiter = rateLimiter;
|
||||
_logger = logger;
|
||||
_opts = options.Value;
|
||||
}
|
||||
|
||||
public async Task<string> GetStringWithRetryAsync(string url, CancellationToken ct)
|
||||
{
|
||||
if (!Uri.TryCreate(url, UriKind.Absolute, out var uri))
|
||||
throw new ArgumentException("Invalid URL", nameof(url));
|
||||
|
||||
var host = uri.Host;
|
||||
var http = _httpClientFactory.CreateClient("scraper");
|
||||
|
||||
var maxAttempts = Math.Max(1, _opts.Retry.MaxAttempts);
|
||||
var baseDelay = Math.Max(0, _opts.Retry.BaseDelayMs);
|
||||
var maxDelay = Math.Max(baseDelay, _opts.Retry.MaxDelayMs);
|
||||
|
||||
Exception? lastEx = null;
|
||||
|
||||
for (var attempt = 1; attempt <= maxAttempts; attempt++)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
// Rate limit por host antes de iniciar a request
|
||||
var before = DateTimeOffset.UtcNow;
|
||||
await _rateLimiter.WaitAsync(host, ct);
|
||||
var waitedMs = (int)(DateTimeOffset.UtcNow - before).TotalMilliseconds;
|
||||
|
||||
if (waitedMs > 0)
|
||||
_logger.LogDebug("RateLimit applied: waited {WaitedMs}ms for host {Host}", waitedMs, host);
|
||||
|
||||
try
|
||||
{
|
||||
using var req = new HttpRequestMessage(HttpMethod.Get, uri);
|
||||
req.Headers.UserAgent.ParseAdd("webscrapper/1.0");
|
||||
req.Headers.Accept.ParseAdd("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||
|
||||
using var resp = await http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct);
|
||||
|
||||
if (IsTransientStatus(resp.StatusCode))
|
||||
{
|
||||
lastEx = new HttpRequestException($"Transient status code {(int)resp.StatusCode} ({resp.StatusCode})");
|
||||
await LogAndDelayRetryAsync(url, host, attempt, maxAttempts, lastEx, baseDelay, maxDelay, ct, resp.StatusCode);
|
||||
continue;
|
||||
}
|
||||
|
||||
resp.EnsureSuccessStatusCode();
|
||||
return await resp.Content.ReadAsStringAsync(ct);
|
||||
}
|
||||
catch (Exception ex) when (IsTransientException(ex, ct))
|
||||
{
|
||||
lastEx = ex;
|
||||
|
||||
if (attempt >= maxAttempts)
|
||||
break;
|
||||
|
||||
await LogAndDelayRetryAsync(url, host, attempt, maxAttempts, ex, baseDelay, maxDelay, ct, statusCode: null);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastEx ?? new Exception("Request failed");
|
||||
}
|
||||
|
||||
private static bool IsTransientStatus(HttpStatusCode statusCode)
|
||||
{
|
||||
// Transientes típicos:
|
||||
// 408 Request Timeout
|
||||
// 429 Too Many Requests
|
||||
// 5xx Server errors
|
||||
var code = (int)statusCode;
|
||||
return code == 408 || code == 429 || (code >= 500 && code <= 599);
|
||||
}
|
||||
|
||||
private static bool IsTransientException(Exception ex, CancellationToken ct)
|
||||
{
|
||||
// HttpRequestException (DNS, socket, etc.)
|
||||
// TaskCanceledException pode ser timeout (mas se foi cancelamento do host, não retry)
|
||||
if (ex is OperationCanceledException && ct.IsCancellationRequested)
|
||||
return false;
|
||||
|
||||
return ex is HttpRequestException
|
||||
|| ex is TaskCanceledException; // timeout de HttpClient costuma cair aqui
|
||||
}
|
||||
|
||||
private async Task LogAndDelayRetryAsync(
|
||||
string url,
|
||||
string host,
|
||||
int attempt,
|
||||
int maxAttempts,
|
||||
Exception ex,
|
||||
int baseDelayMs,
|
||||
int maxDelayMs,
|
||||
CancellationToken ct,
|
||||
HttpStatusCode? statusCode)
|
||||
{
|
||||
var delayMs = ComputeBackoffWithJitterMs(attempt, baseDelayMs, maxDelayMs);
|
||||
|
||||
if (statusCode is not null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Retrying ({Attempt}/{MaxAttempts}) in {DelayMs}ms due to status {StatusCode} for host {Host}. Url={Url}",
|
||||
attempt, maxAttempts, delayMs, (int)statusCode.Value, host, url);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Retrying ({Attempt}/{MaxAttempts}) in {DelayMs}ms due to transient error for host {Host}. Url={Url}",
|
||||
attempt, maxAttempts, delayMs, host, url);
|
||||
}
|
||||
|
||||
await Task.Delay(delayMs, ct);
|
||||
}
|
||||
|
||||
private static int ComputeBackoffWithJitterMs(int attempt, int baseDelayMs, int maxDelayMs)
|
||||
{
|
||||
// Exponential backoff: base * 2^(attempt-1), com jitter [0..base)
|
||||
// clamp em maxDelay
|
||||
var exp = baseDelayMs * (1 << Math.Clamp(attempt - 1, 0, 30));
|
||||
var clamped = Math.Min(exp, maxDelayMs);
|
||||
var jitter = Random.Shared.Next(0, Math.Max(1, baseDelayMs));
|
||||
return Math.Min(clamped + jitter, maxDelayMs);
|
||||
}
|
||||
}
|
||||
29
ScrapperAPI/Utils/CompressionUtils.cs
Normal file
29
ScrapperAPI/Utils/CompressionUtils.cs
Normal file
@ -0,0 +1,29 @@
|
||||
using System.IO.Compression;
|
||||
using System.Text;
|
||||
|
||||
namespace ScrapperAPI.Utils;
|
||||
|
||||
public static class CompressionUtils
|
||||
{
|
||||
public static byte[] GzipCompressUtf8(string text, CompressionLevel level = CompressionLevel.Fastest)
|
||||
{
|
||||
var inputBytes = Encoding.UTF8.GetBytes(text);
|
||||
|
||||
using var output = new MemoryStream();
|
||||
using (var gzip = new GZipStream(output, level, leaveOpen: true))
|
||||
{
|
||||
gzip.Write(inputBytes, 0, inputBytes.Length);
|
||||
}
|
||||
|
||||
return output.ToArray();
|
||||
}
|
||||
|
||||
public static string GzipDecompressUtf8(byte[] gzBytes)
|
||||
{
|
||||
using var input = new MemoryStream(gzBytes);
|
||||
using var gzip = new GZipStream(input, CompressionMode.Decompress);
|
||||
using var reader = new StreamReader(gzip, Encoding.UTF8);
|
||||
|
||||
return reader.ReadToEnd();
|
||||
}
|
||||
}
|
||||
48
ScrapperAPI/Utils/DomainRateLimiter.cs
Normal file
48
ScrapperAPI/Utils/DomainRateLimiter.cs
Normal file
@ -0,0 +1,48 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ScrapperAPI.Interfaces;
|
||||
|
||||
namespace ScrapperAPI.Utils;
|
||||
|
||||
public sealed class DomainRateLimiter : IDomainRateLimiter
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, HostLimiter> _hosts = new();
|
||||
private readonly int _minDelayMs;
|
||||
|
||||
public DomainRateLimiter(int minDelayMs)
|
||||
{
|
||||
_minDelayMs = Math.Max(0, minDelayMs);
|
||||
}
|
||||
|
||||
public async Task WaitAsync(string host, CancellationToken ct)
|
||||
{
|
||||
if (_minDelayMs == 0) return;
|
||||
|
||||
var limiter = _hosts.GetOrAdd(host, _ => new HostLimiter());
|
||||
|
||||
await limiter.Gate.WaitAsync(ct);
|
||||
try
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var next = limiter.NextAllowedUtc;
|
||||
|
||||
if (next > now)
|
||||
{
|
||||
var delay = next - now;
|
||||
await Task.Delay(delay, ct);
|
||||
now = DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
limiter.NextAllowedUtc = now.AddMilliseconds(_minDelayMs);
|
||||
}
|
||||
finally
|
||||
{
|
||||
limiter.Gate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class HostLimiter
|
||||
{
|
||||
public SemaphoreSlim Gate { get; } = new(1, 1);
|
||||
public DateTimeOffset NextAllowedUtc { get; set; } = DateTimeOffset.MinValue;
|
||||
}
|
||||
}
|
||||
261
ScrapperAPI/Workers/ScrapeCoordinator.cs
Normal file
261
ScrapperAPI/Workers/ScrapeCoordinator.cs
Normal file
@ -0,0 +1,261 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Threading.Channels;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScrapperAPI.Enums;
|
||||
using ScrapperAPI.Interfaces;
|
||||
using ScrapperAPI.Options;
|
||||
using ScrapperAPI.Records;
|
||||
|
||||
namespace ScrapperAPI.Workers;
|
||||
|
||||
public sealed class ScrapeCoordinator : BackgroundService, IScrapeCoordinator
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly IHttpClientFactory _httpClientFactory;
|
||||
private readonly ILogger<ScrapeCoordinator> _logger;
|
||||
private readonly IScraperHttpClient _scraperHttp;
|
||||
private readonly IScrapeEventBus _events;
|
||||
private readonly ScraperOptions _opts;
|
||||
|
||||
private readonly Channel<int> _startRequests = Channel.CreateUnbounded<int>(
|
||||
new UnboundedChannelOptions { SingleReader = true, SingleWriter = false });
|
||||
|
||||
private readonly ConcurrentDictionary<int, Runner> _runners = new();
|
||||
|
||||
private static readonly ThreadLocal<Random> _rng =
|
||||
new(() => new Random());
|
||||
|
||||
public ScrapeCoordinator(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
IHttpClientFactory httpClientFactory,
|
||||
ILogger<ScrapeCoordinator> logger,
|
||||
IOptions<ScraperOptions> options,
|
||||
IScraperHttpClient scraperHttp,
|
||||
IScrapeEventBus events)
|
||||
{
|
||||
_scopeFactory = scopeFactory;
|
||||
_httpClientFactory = httpClientFactory;
|
||||
_logger = logger;
|
||||
_opts = options.Value;
|
||||
_scraperHttp = scraperHttp;
|
||||
_events = events;
|
||||
}
|
||||
|
||||
public async Task StartAsync(int sessionId, CancellationToken ct = default)
|
||||
{
|
||||
var runner = _runners.GetOrAdd(sessionId, id => new Runner(id));
|
||||
runner.RequestStart();
|
||||
|
||||
await _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.SessionStarted, sessionId, DateTimeOffset.UtcNow
|
||||
), ct);
|
||||
|
||||
await _startRequests.Writer.WriteAsync(sessionId, ct);
|
||||
}
|
||||
|
||||
public Task StopAsync(int sessionId)
|
||||
{
|
||||
if (_runners.TryGetValue(sessionId, out var runner))
|
||||
{
|
||||
runner.RequestStop();
|
||||
_ = _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.SessionStopRequested, sessionId, DateTimeOffset.UtcNow
|
||||
));
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public ScrapeRuntimeStatus GetRuntimeStatus(int sessionId)
|
||||
{
|
||||
if (!_runners.TryGetValue(sessionId, out var r))
|
||||
return new(sessionId, false, false, null, null, null);
|
||||
|
||||
return new(sessionId, r.IsRunning, r.StopRequested, r.CurrentQueueId, r.CurrentUrl, r.CurrentStartedAt);
|
||||
}
|
||||
|
||||
public IReadOnlyCollection<ScrapeRuntimeStatus> ListRunningSessions()
|
||||
=> _runners.Values
|
||||
.Where(r => r.IsRunning)
|
||||
.Select(r => new ScrapeRuntimeStatus(r.SessionId, r.IsRunning, r.StopRequested, r.CurrentQueueId, r.CurrentUrl, r.CurrentStartedAt))
|
||||
.ToList();
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation("ScrapeCoordinator started.");
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
int sessionId;
|
||||
try
|
||||
{
|
||||
sessionId = await _startRequests.Reader.ReadAsync(stoppingToken);
|
||||
}
|
||||
catch (OperationCanceledException) { break; }
|
||||
|
||||
var runner = _runners.GetOrAdd(sessionId, id => new Runner(id));
|
||||
runner.RequestStart();
|
||||
|
||||
_ = RunSessionLoopAsync(runner, stoppingToken);
|
||||
}
|
||||
}
|
||||
|
||||
private Task PoliteDelayAsync(CancellationToken ct)
|
||||
{
|
||||
var min = _opts.DelayMinMs;
|
||||
var max = _opts.DelayMaxMs;
|
||||
|
||||
if (min < 0) min = 0;
|
||||
if (max < min) max = min;
|
||||
|
||||
var delayMs = Random.Shared.Next(min, max + 1);
|
||||
return Task.Delay(delayMs, ct);
|
||||
}
|
||||
|
||||
private async Task RunSessionLoopAsync(Runner runner, CancellationToken hostToken)
|
||||
{
|
||||
if (!runner.TryEnterLoop())
|
||||
return;
|
||||
|
||||
runner.MarkRunning(true);
|
||||
|
||||
try
|
||||
{
|
||||
var http = _httpClientFactory.CreateClient("scraper");
|
||||
|
||||
while (!hostToken.IsCancellationRequested)
|
||||
{
|
||||
// STOP GRACIOSO: não pega próxima URL
|
||||
if (runner.StopRequested)
|
||||
break;
|
||||
|
||||
// cria scope (repos scoped vivem aqui dentro)
|
||||
using var scope = _scopeFactory.CreateScope();
|
||||
var queue = scope.ServiceProvider.GetRequiredService<IQueueRepository>();
|
||||
var content = scope.ServiceProvider.GetRequiredService<IContentRepository>();
|
||||
|
||||
var item = await queue.TryDequeueAsync(runner.SessionId, hostToken);
|
||||
if (item is null)
|
||||
break;
|
||||
|
||||
runner.SetCurrent(item.Id, item.Url);
|
||||
|
||||
await _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.ItemStarted,
|
||||
runner.SessionId,
|
||||
DateTimeOffset.UtcNow,
|
||||
QueueId: item.Id,
|
||||
Url: item.Url
|
||||
), hostToken);
|
||||
|
||||
try
|
||||
{
|
||||
var html = await _scraperHttp.GetStringWithRetryAsync(item.Url, hostToken);
|
||||
|
||||
await content.SaveAsync(item.Id, html, hostToken);
|
||||
await queue.MarkDoneAsync(item.Id, hostToken);
|
||||
|
||||
await _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.ItemSucceeded,
|
||||
runner.SessionId,
|
||||
DateTimeOffset.UtcNow,
|
||||
QueueId: item.Id,
|
||||
Url: item.Url
|
||||
), hostToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
await queue.MarkFailedAsync(item.Id, Truncate(ex.ToString(), 8000), hostToken);
|
||||
|
||||
await _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.ItemFailed,
|
||||
runner.SessionId,
|
||||
DateTimeOffset.UtcNow,
|
||||
QueueId: item.Id,
|
||||
Url: item.Url,
|
||||
Error: ex.Message
|
||||
), hostToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// progresso (snapshot do DB) + percent
|
||||
var counts = await queue.GetCountsAsync(runner.SessionId, hostToken);
|
||||
var percent = counts.Total == 0 ? 0 : (double)counts.Done * 100.0 / (double)counts.Total;
|
||||
|
||||
await _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.Progress,
|
||||
runner.SessionId,
|
||||
DateTimeOffset.UtcNow,
|
||||
Total: counts.Total,
|
||||
Done: counts.Done,
|
||||
Pending: counts.Pending,
|
||||
Processing: counts.Processing,
|
||||
Failed: counts.Failed,
|
||||
Percent: percent
|
||||
), hostToken);
|
||||
|
||||
runner.ClearCurrent();
|
||||
|
||||
if (!runner.StopRequested && !hostToken.IsCancellationRequested)
|
||||
await PoliteDelayAsync(hostToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
runner.MarkRunning(false);
|
||||
await _events.PublishAsync(new ScrapeEvent(
|
||||
ScrapeEventType.SessionStopped,
|
||||
runner.SessionId,
|
||||
DateTimeOffset.UtcNow
|
||||
), hostToken);
|
||||
runner.ExitLoop();
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<string> FetchHtmlAsync(HttpClient http, string url, CancellationToken ct)
|
||||
{
|
||||
using var req = new HttpRequestMessage(HttpMethod.Get, url);
|
||||
req.Headers.UserAgent.ParseAdd("webscrapper/1.0");
|
||||
using var resp = await http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct);
|
||||
resp.EnsureSuccessStatusCode();
|
||||
return await resp.Content.ReadAsStringAsync(ct);
|
||||
}
|
||||
|
||||
private static string Truncate(string s, int max) => s.Length <= max ? s : s[..max];
|
||||
|
||||
private sealed class Runner
|
||||
{
|
||||
private int _loopEntered;
|
||||
|
||||
public int SessionId { get; }
|
||||
public bool IsRunning { get; private set; }
|
||||
public bool StopRequested { get; private set; }
|
||||
|
||||
public int? CurrentQueueId { get; private set; }
|
||||
public string? CurrentUrl { get; private set; }
|
||||
public DateTimeOffset? CurrentStartedAt { get; private set; }
|
||||
|
||||
public Runner(int sessionId) => SessionId = sessionId;
|
||||
|
||||
public void RequestStart() => StopRequested = false;
|
||||
public void RequestStop() => StopRequested = true;
|
||||
|
||||
public bool TryEnterLoop() => Interlocked.CompareExchange(ref _loopEntered, 1, 0) == 0;
|
||||
public void ExitLoop() => Interlocked.Exchange(ref _loopEntered, 0);
|
||||
public void MarkRunning(bool running) => IsRunning = running;
|
||||
|
||||
public void SetCurrent(int queueId, string url)
|
||||
{
|
||||
CurrentQueueId = queueId;
|
||||
CurrentUrl = url;
|
||||
CurrentStartedAt = DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
public void ClearCurrent()
|
||||
{
|
||||
CurrentQueueId = null;
|
||||
CurrentUrl = null;
|
||||
CurrentStartedAt = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
8
ScrapperAPI/appsettings.Development.json
Normal file
8
ScrapperAPI/appsettings.Development.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
}
|
||||
}
|
||||
}
|
||||
24
ScrapperAPI/appsettings.json
Normal file
24
ScrapperAPI/appsettings.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
}
|
||||
},
|
||||
"ConnectionStrings": {
|
||||
"Default": "Host=localhost;Port=5432;Database=webscrapper_dev;Username=postgres;Password=devpassword;"
|
||||
},
|
||||
"Scraper": {
|
||||
"DelayMinMs": 100,
|
||||
"DelayMaxMs": 3000,
|
||||
"RateLimit": {
|
||||
"PerDomainMinDelayMs": 500
|
||||
},
|
||||
"Retry": {
|
||||
"MaxAttempts": 5,
|
||||
"BaseDelayMs": 250,
|
||||
"MaxDelayMs": 8000
|
||||
}
|
||||
},
|
||||
"AllowedHosts": "*"
|
||||
}
|
||||
16
WebScrapperPro.sln
Normal file
16
WebScrapperPro.sln
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ScrapperAPI", "ScrapperAPI\ScrapperAPI.csproj", "{206F88EA-2109-4DC0-B1E1-45AA8D3D092F}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{206F88EA-2109-4DC0-B1E1-45AA8D3D092F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{206F88EA-2109-4DC0-B1E1-45AA8D3D092F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{206F88EA-2109-4DC0-B1E1-45AA8D3D092F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{206F88EA-2109-4DC0-B1E1-45AA8D3D092F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
Loading…
Reference in New Issue
Block a user