╔══════════════════════════════════════════════════════════════════════════════╗
║ SECCIÓN 12: IMPLEMENTACIÓN DEL EXTRACTOR DE APIs ║
╚══════════════════════════════════════════════════════════════════════════════╝
// ═══════════════════════════════════════════════════════════════════════════
// ARCHIVO: Services/RegexApiExtractor.cs
// ═══════════════════════════════════════════════════════════════════════════
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace APIHunter.Services
{
/// <summary>
/// Extractor de APIs basado en regex con:
/// - Patrones corregidos para eliminar falsos positivos
/// - Soporte para múltiples frameworks JS (fetch, axios, jQuery, Angular)
/// - Detección de GraphQL endpoints
/// - Normalización robusta de URLs
/// - Timeout en regex para prevenir ReDoS
/// </summary>
public sealed class RegexApiExtractor : IApiExtractor
{
private readonly ILogger<RegexApiExtractor> _logger;
private readonly TimeSpan _regexTimeout;
// ═══════════════════════════════════════════════════════════════════
// PATRONES CORREGIDOS - Eliminan falsos positivos del código original
// ═══════════════════════════════════════════════════════════════════
private static readonly ApiPattern[] ApiPatterns = {
// ─────────────────────────────────────────────────────────────────
// PATRÓN 1: URLs con /api/ como segmento de path (NO substring)
// CORRIGE: El original capturaba "therapy", "rapid", etc.
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "API_PATH_SEGMENT",
Pattern: @"https?://[^""'\s<>]+/api(?:/[^""'\s<>]*)?",
Confidence: ApiConfidence.High,
Description: "URL with /api/ path segment"
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 2: Subdominio api.*
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "API_SUBDOMAIN",
Pattern: @"https?://api\.[a-zA-Z0-9][-a-zA-Z0-9]*\.[a-zA-Z]{2,}[^""'\s<>]*",
Confidence: ApiConfidence.VeryHigh,
Description: "API subdomain (api.example.com)"
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 3: REST versioned endpoints /v1/, /v2/, etc.
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "REST_VERSIONED",
Pattern: @"https?://[^""'\s<>]+/v[1-9]\d*/[^""'\s<>]*",
Confidence: ApiConfidence.High,
Description: "REST versioned endpoint"
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 4: GraphQL endpoints
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "GRAPHQL",
Pattern: @"https?://[^""'\s<>]+/graphql[^""'\s<>]*",
Confidence: ApiConfidence.VeryHigh,
Description: "GraphQL endpoint"
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 5: fetch() con URL literal - CORREGIDO
// Soporta: comillas simples, dobles, template literals
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "FETCH_LITERAL",
Pattern: @"fetch\s*\(\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.High,
Description: "fetch() with literal URL",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 6: fetch() con template literal y variable
// Captura la parte estática de templates como `${BASE}/api/users`
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "FETCH_TEMPLATE",
Pattern: @"fetch\s*\(\s*`([^`]*\$\{[^}]+\}[^`]*)`",
Confidence: ApiConfidence.Medium,
Description: "fetch() with template literal",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 7: axios con TODOS los métodos HTTP - CORREGIDO
// Original solo capturaba GET y POST
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "AXIOS_METHOD",
Pattern: @"axios\.(?:get|post|put|patch|delete|head|options|request)\s*\(\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.High,
Description: "axios HTTP method call",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 8: axios con objeto de configuración
// axios({ url: '/api/data', method: 'GET' })
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "AXIOS_CONFIG_URL",
Pattern: @"axios\s*\(\s*\{[^}]*url\s*:\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.High,
Description: "axios config object with url",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 9: axios.create() baseURL
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "AXIOS_CREATE_BASEURL",
Pattern: @"axios\.create\s*\(\s*\{[^}]*baseURL\s*:\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.VeryHigh,
Description: "axios.create() baseURL configuration",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 10: jQuery AJAX
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "JQUERY_AJAX",
Pattern: @"\$\.(?:ajax|get|post|getJSON)\s*\(\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.High,
Description: "jQuery AJAX call",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 11: Angular HttpClient
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "ANGULAR_HTTP",
Pattern: @"(?:this\.)?http\.(?:get|post|put|patch|delete)\s*[<(]\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.High,
Description: "Angular HttpClient call",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 12: Configuración de API en objetos JS
// const config = { apiUrl: 'https://...', endpoint: '...' }
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "CONFIG_API_URL",
Pattern: @"(?:api[_-]?(?:url|endpoint|base|host)|endpoint|baseUrl)\s*[=:]\s*[`""'](https?://[^`""'\n]+)[`""']",
Confidence: ApiConfidence.VeryHigh,
Description: "API URL in configuration",
CaptureGroup: 1
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 13: Endpoints JSON (.json)
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "JSON_ENDPOINT",
Pattern: @"https?://[^""'\s<>]+\.json(?:\?[^""'\s<>]*)?",
Confidence: ApiConfidence.High,
Description: "JSON file endpoint"
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 14: WebSocket endpoints (wss:// o ws://)
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "WEBSOCKET",
Pattern: @"wss?://[^""'\s<>]+",
Confidence: ApiConfidence.High,
Description: "WebSocket endpoint"
),
// ─────────────────────────────────────────────────────────────────
// PATRÓN 15: XMLHttpRequest
// ─────────────────────────────────────────────────────────────────
new ApiPattern(
Name: "XHR_OPEN",
Pattern: @"\.open\s*\(\s*[`""'](?:GET|POST|PUT|DELETE|PATCH)[`""']\s*,\s*[`""']([^`""'\n]+)[`""']",
Confidence: ApiConfidence.High,
Description: "XMLHttpRequest.open()",
CaptureGroup: 1
),
};
// Palabras clave que indican API (para URLs genéricas)
private static readonly HashSet<string> ApiKeywords = new(StringComparer.OrdinalIgnoreCase)
{
"/api/", "/rest/", "/graphql", "/v1/", "/v2/", "/v3/", "/v4/",
"/json/", "/data/", "/query/", "/mutation/", "/subscription/",
"/oauth/", "/auth/", "/token/", "/webhook/", "/callback/",
"/users/", "/posts/", "/items/", "/resources/"
};
// Extensiones a excluir (no son APIs)
private static readonly HashSet<string> ExcludedExtensions = new(StringComparer.OrdinalIgnoreCase)
{
".js", ".css", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico",
".woff", ".woff2", ".ttf", ".eot", ".map", ".html", ".htm",
".pdf", ".zip", ".tar", ".gz", ".mp4", ".webm", ".mp3"
};
public RegexApiExtractor(ILogger<RegexApiExtractor> logger, TimeSpan? regexTimeout = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_regexTimeout = regexTimeout ?? TimeSpan.FromSeconds(2);
}
public IReadOnlyCollection<ExtractedApi> ExtractApis(string content, string sourceUrl)
{
if (string.IsNullOrEmpty(content))
return Array.Empty<ExtractedApi>();
var extractedApis = new Dictionary<string, ExtractedApi>(StringComparer.OrdinalIgnoreCase);
foreach (var pattern in ApiPatterns)
{
try
{
var regex = new Regex(pattern.Pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline,
_regexTimeout);
foreach (Match match in regex.Matches(content))
{
// Extraer URL del grupo de captura correcto
var rawUrl = pattern.CaptureGroup > 0 && match.Groups.Count > pattern.CaptureGroup
? match.Groups[pattern.CaptureGroup].Value
: match.Value;
// Normalizar URL
var normalizedUrl = NormalizeUrl(rawUrl, sourceUrl);
if (string.IsNullOrEmpty(normalizedUrl))
continue;
// Validar que parece ser una API
if (!IsLikelyApi(normalizedUrl, pattern.Confidence))
continue;
// Evitar duplicados, pero mantener el de mayor confianza
if (extractedApis.TryGetValue(normalizedUrl, out var existing))
{
if (pattern.Confidence > existing.Confidence)
{
extractedApis[normalizedUrl] = CreateExtractedApi(
normalizedUrl, sourceUrl, pattern);
}
}
else
{
extractedApis[normalizedUrl] = CreateExtractedApi(
normalizedUrl, sourceUrl, pattern);
}
}
}
catch (RegexMatchTimeoutException)
{
_logger.LogWarning("Regex timeout for pattern {Pattern} in {Source}",
pattern.Name, sourceUrl);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error processing pattern {Pattern}", pattern.Name);
}
}
_logger.LogInformation("Extracted {Count} APIs from {Source}",
extractedApis.Count, sourceUrl);
return extractedApis.Values.ToList().AsReadOnly();
}
public IReadOnlyCollection<string> ExtractJavaScriptUrls(string html, string baseUrl)
{
if (string.IsNullOrEmpty(html))
return Array.Empty<string>();
var jsUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
try
{
// Patrón mejorado para <script src="...">
// Soporta: atributos en cualquier orden, comillas simples/dobles
var scriptPattern = new Regex(
@"<script[^>]*\ssrc\s*=\s*[""']([^""']+)[""'][^>]*>",
RegexOptions.Compiled | RegexOptions.IgnoreCase,
_regexTimeout);
foreach (Match match in scriptPattern.Matches(html))
{
var jsUrl = match.Groups[1].Value;
var normalized = NormalizeUrl(jsUrl, baseUrl);
if (!string.IsNullOrEmpty(normalized) &&
(normalized.EndsWith(".js", StringComparison.OrdinalIgnoreCase) ||
normalized.Contains(".js?", StringComparison.OrdinalIgnoreCase)))
{
// Excluir scripts de terceros comunes (CDNs de tracking, analytics)
if (!IsExcludedThirdPartyScript(normalized))
{
jsUrls.Add(normalized);
}
}
}
// También buscar en import statements ES6
var importPattern = new Regex(
@"import\s+.*?\s+from\s+[""']([^""']+\.js)[""']",
RegexOptions.Compiled | RegexOptions.IgnoreCase,
_regexTimeout);
foreach (Match match in importPattern.Matches(html))
{
var jsUrl = match.Groups[1].Value;
var normalized = NormalizeUrl(jsUrl, baseUrl);
if (!string.IsNullOrEmpty(normalized))
{
jsUrls.Add(normalized);
}
}
}
catch (RegexMatchTimeoutException)
{
_logger.LogWarning("Regex timeout extracting JS URLs from {Source}", baseUrl);
}
_logger.LogDebug("Found {Count} JavaScript files in {Source}", jsUrls.Count, baseUrl);
return jsUrls.ToList().AsReadOnly();
}
// ═══════════════════════════════════════════════════════════════════
// MÉTODOS PRIVADOS
// ═══════════════════════════════════════════════════════════════════
private string? NormalizeUrl(string url, string baseUrl)
{
if (string.IsNullOrWhiteSpace(url))
return null;
url = url.Trim();
// Ignorar data URIs, javascript:, mailto:, etc.
if (url.StartsWith("data:", StringComparison.OrdinalIgnoreCase) ||
url.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase) ||
url.StartsWith("mailto:", StringComparison.OrdinalIgnoreCase) ||
url.StartsWith("#", StringComparison.Ordinal))
{
return null;
}
try
{
Uri resultUri;
if (url.StartsWith("//", StringComparison.Ordinal))
{
// Protocol-relative URL
var baseUri = new Uri(baseUrl);
resultUri = new Uri($"{baseUri.Scheme}:{url}");
}
else if (url.StartsWith("/", StringComparison.Ordinal))
{
// Absolute path
var baseUri = new Uri(baseUrl);
resultUri = new Uri($"{baseUri.Scheme}://{baseUri.Host}{url}");
}
else if (!url.StartsWith("http", StringComparison.OrdinalIgnoreCase))
{
// Relative path
resultUri = new Uri(new Uri(baseUrl), url);
}
else
{
// Already absolute
if (!Uri.TryCreate(url, UriKind.Absolute, out resultUri!))
return null;
}
// Limpiar URL (remover fragmentos, normalizar)
var cleanUrl = $"{resultUri.Scheme}://{resultUri.Host}";
if (!resultUri.IsDefaultPort)
cleanUrl += $":{resultUri.Port}";
cleanUrl += resultUri.AbsolutePath;
// Mantener query string si existe
if (!string.IsNullOrEmpty(resultUri.Query))
cleanUrl += resultUri.Query;
return cleanUrl;
}
catch (Exception ex)
{
_logger.LogDebug("Failed to normalize URL '{Url}': {Error}", url, ex.Message);
return null;
}
}
private bool IsLikelyApi(string url, ApiConfidence patternConfidence)
{
// Si el patrón ya es de alta confianza, confiar en él
if (patternConfidence >= ApiConfidence.High)
return true;
var urlLower = url.ToLowerInvariant();
// Excluir archivos estáticos
foreach (var ext in ExcludedExtensions)
{
if (urlLower.EndsWith(ext, StringComparison.Ordinal) ||
urlLower.Contains(ext + "?", StringComparison.Ordinal))
{
return false;
}
}
// Verificar palabras clave de API
foreach (var keyword in ApiKeywords)
{
if (urlLower.Contains(keyword, StringComparison.Ordinal))
return true;
}
// Si tiene subdominio api.*
if (Uri.TryCreate(url, UriKind.Absolute, out var uri))
{
if (uri.Host.StartsWith("api.", StringComparison.OrdinalIgnoreCase))
return true;
}
return false;
}
private static bool IsExcludedThirdPartyScript(string url)
{
// CDNs de analytics/tracking que no contienen APIs útiles
var excludedDomains = new[]
{
"google-analytics.com", "googletagmanager.com", "googlesyndication.com",
"facebook.net", "fbcdn.net", "twitter.com", "platform.twitter.com",
"connect.facebook.net", "ads.", "tracking.", "analytics.",
"hotjar.com", "mixpanel.com", "segment.com", "newrelic.com"
};
return excludedDomains.Any(domain =>
url.Contains(domain, StringComparison.OrdinalIgnoreCase));
}
private static ExtractedApi CreateExtractedApi(string url, string source, ApiPattern pattern)
{
return new ExtractedApi
{
Url = url,
Source = source,
Pattern = pattern.Name,
Confidence = pattern.Confidence
};
}
private record ApiPattern(
string Name,
string Pattern,
ApiConfidence Confidence,
string Description,
int CaptureGroup = 0);
}
}
================================================================================
SECCIÓN 13: MOTOR PRINCIPAL - API HUNTER ENGINE
================================================================================
// ═══════════════════════════════════════════════════════════════════════════
// ARCHIVO: Core/ApiHunterEngine.cs
// ═══════════════════════════════════════════════════════════════════════════
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Channels;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace APIHunter.Core
{
/// <summary>
/// Motor principal de API-HUNTER que orquesta todo el proceso de escaneo.
/// Implementa el patrón Pipeline con Channels para procesamiento eficiente.
///
/// Flujo:
/// 1. Fetch página principal
/// 2. Extraer APIs del HTML
/// 3. Extraer URLs de JavaScript
/// 4. Fetch y analizar archivos JS en paralelo
/// 5. Detectar tokens en todo el contenido
/// 6. Probar endpoints descubiertos
/// 7. Generar resultado final
/// </summary>
public sealed class ApiHunterEngine : IAsyncDisposable
{
private readonly IHttpFetcher _httpFetcher;
private readonly IApiExtractor _apiExtractor;
private readonly ITokenDetector _tokenDetector;
private readonly ILogger<ApiHunterEngine> _logger;
private readonly IProgress<ScanProgress>? _progressReporter;
private bool _disposed;
public ApiHunterEngine(
IHttpFetcher httpFetcher,
IApiExtractor apiExtractor,
ITokenDetector tokenDetector,
ILogger<ApiHunterEngine> logger,
IProgress<ScanProgress>? progressReporter = null)
{
_httpFetcher = httpFetcher ?? throw new ArgumentNullException(nameof(httpFetcher));
_apiExtractor = apiExtractor ?? throw new ArgumentNullException(nameof(apiExtractor));
_tokenDetector = tokenDetector ?? throw new ArgumentNullException(nameof(tokenDetector));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_progressReporter = progressReporter;
}
/// <summary>
/// Ejecuta un escaneo completo del sitio web objetivo.
/// </summary>
public async Task<ScanResult> ScanAsync(string targetUrl, ScanConfig? config = null,
CancellationToken cancellationToken = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var context = new ScanContext(targetUrl, config);
_logger.LogInformation("Starting scan of {Url}", targetUrl);
ReportProgress(ScanPhase.Starting, "Initializing scan...");
try
{
// ═══════════════════════════════════════════════════════════
// FASE 1: Fetch página principal
// ═══════════════════════════════════════════════════════════
ReportProgress(ScanPhase.FetchingMain, "Fetching main page...");
var mainPageResult = await _httpFetcher.FetchAsync(targetUrl, cancellationToken);
if (!mainPageResult.Success)
{
_logger.LogError("Failed to fetch main page: {Error}", mainPageResult.ErrorMessage);
return CreateErrorResult(context, $"Failed to fetch main page: {mainPageResult.ErrorMessage}");
}
context.IncrementPagesScanned();
var htmlContent = mainPageResult.Content!;
_logger.LogInformation("Main page fetched: {Size} bytes in {Time}ms",
htmlContent.Length, mainPageResult.ResponseTime.TotalMilliseconds);
// ═══════════════════════════════════════════════════════════
// FASE 2: Extraer APIs del HTML
// ═══════════════════════════════════════════════════════════
ReportProgress(ScanPhase.AnalyzingHtml, "Analyzing HTML content...");
var htmlApis = _apiExtractor.ExtractApis(htmlContent, targetUrl);
foreach (var api in htmlApis)
{
context.TryAddApi(api.Url);
}
// Detectar tokens en HTML
var htmlTokens = _tokenDetector.DetectTokens(htmlContent, targetUrl);
foreach (var token in htmlTokens)
{
context.AddToken(token);
}
_logger.LogInformation("Found {ApiCount} APIs and {TokenCount} tokens in HTML",
htmlApis.Count, htmlTokens.Count);
// ═══════════════════════════════════════════════════════════
// FASE 3: Extraer y analizar archivos JavaScript
// ═══════════════════════════════════════════════════════════
ReportProgress(ScanPhase.AnalyzingJavaScript, "Finding JavaScript files...");
var jsUrls = _apiExtractor.ExtractJavaScriptUrls(htmlContent, targetUrl);
var jsUrlsToProcess = jsUrls.Take(context.Config.MaxJavaScriptFiles).ToList();
_logger.LogInformation("Found {Total} JS files, processing {Count}",
jsUrls.Count, jsUrlsToProcess.Count);
// Procesar JS files con paralelismo controlado
await ProcessJavaScriptFilesAsync(context, jsUrlsToProcess, cancellationToken);
// ═══════════════════════════════════════════════════════════
// FASE 4: Probar endpoints descubiertos
// ═══════════════════════════════════════════════════════════
ReportProgress(ScanPhase.TestingEndpoints, "Testing discovered endpoints...");
var apisToTest = context.GetDiscoveredApis()
.Take(context.Config.MaxApisToTest)
.ToList();
await TestEndpointsAsync(context, apisToTest, cancellationToken);
// ═══════════════════════════════════════════════════════════
// FASE 5: Generar resultado
// ═══════════════════════════════════════════════════════════
ReportProgress(ScanPhase.Completed, "Scan completed");
var result = context.ToResult();
_logger.LogInformation(
"Scan completed: {TotalApis} APIs discovered, {Working} working, {Tokens} tokens found",
result.TotalApisDiscovered, result.WorkingEndpointsCount, result.TokensFoundCount);
return result;
}
catch (OperationCanceledException)
{
_logger.LogWarning("Scan cancelled by user");
ReportProgress(ScanPhase.Cancelled, "Scan cancelled");
var result = context.ToResult();
return result with { WasCancelled = true };
}
catch (Exception ex)
{
_logger.LogError(ex, "Unexpected error during scan");
return CreateErrorResult(context, $"Unexpected error: {ex.Message}");
}
}
// ═══════════════════════════════════════════════════════════════════
// PROCESAMIENTO DE JAVASCRIPT CON CHANNELS
// ═══════════════════════════════════════════════════════════════════
private async Task ProcessJavaScriptFilesAsync(
ScanContext context,
IReadOnlyList<string> jsUrls,
CancellationToken cancellationToken)
{
if (!jsUrls.Any()) return;
// Usar Channel para procesar resultados a medida que llegan
var resultsChannel = Channel.CreateBounded<JsAnalysisResult>(
new BoundedChannelOptions(jsUrls.Count)
{
FullMode = BoundedChannelFullMode.Wait,
SingleReader = true,
SingleWriter = false
});
// Productor: Fetch y analizar JS files en paralelo
var producerTask = Task.Run(async () =>
{
var semaphore = new SemaphoreSlim(context.Config.MaxConcurrentRequests);
var tasks = jsUrls.Select(async jsUrl =>
{
await semaphore.WaitAsync(cancellationToken);
try
{
var result = await AnalyzeJavaScriptFile(jsUrl, cancellationToken);
await resultsChannel.Writer.WriteAsync(result, cancellationToken);
}
finally
{
semaphore.Release();
}
});
await Task.WhenAll(tasks);
resultsChannel.Writer.Complete();
}, cancellationToken);
// Consumidor: Procesar resultados
await foreach (var result in resultsChannel.Reader.ReadAllAsync(cancellationToken))
{
if (!result.Success) continue;
context.IncrementJsFiles();
foreach (var api in result.DiscoveredApis)
{
context.TryAddApi(api.Url);
}
foreach (var token in result.FoundTokens)
{
context.AddToken(token);
}
ReportProgress(ScanPhase.AnalyzingJavaScript,
$"Analyzed {context.JsFilesAnalyzed}/{jsUrls.Count} JS files");
}
await producerTask;
}
private async Task<JsAnalysisResult> AnalyzeJavaScriptFile(
string jsUrl,
CancellationToken cancellationToken)
{
_logger.LogDebug("Analyzing JavaScript: {Url}", jsUrl);
var fetchResult = await _httpFetcher.FetchAsync(jsUrl, cancellationToken);
if (!fetchResult.Success)
{
_logger.LogDebug("Failed to fetch JS {Url}: {Error}", jsUrl, fetchResult.ErrorMessage);
return JsAnalysisResult.Failed(jsUrl);
}
var content = fetchResult.Content!;
var apis = _apiExtractor.ExtractApis(content, jsUrl);
var tokens = _tokenDetector.DetectTokens(content, jsUrl);
return new JsAnalysisResult(
JsUrl: jsUrl,
Success: true,
DiscoveredApis: apis,
FoundTokens: tokens);
}
// ═══════════════════════════════════════════════════════════════════
// TESTING DE ENDPOINTS CON PARALELISMO CONTROLADO
// ═══════════════════════════════════════════════════════════════════
private async Task TestEndpointsAsync(
ScanContext context,
IReadOnlyList<string> apisToTest,
CancellationToken cancellationToken)
{
if (!apisToTest.Any()) return;
_logger.LogInformation("Testing {Count} endpoints", apisToTest.Count);
var testedCount = 0;
var workingCount = 0;
// Usar Parallel.ForEachAsync para control de concurrencia nativo
await Parallel.ForEachAsync(
apisToTest,
new ParallelOptions
{
MaxDegreeOfParallelism = context.Config.MaxConcurrentRequests,
CancellationToken = cancellationToken
},
async (apiUrl, ct) =>
{
var result = await _httpFetcher.TestEndpointAsync(apiUrl, ct);
Interlocked.Increment(ref testedCount);
if (result.IsWorking)
{
context.TryAddWorkingEndpoint(apiUrl);
Interlocked.Increment(ref workingCount);
_logger.LogInformation("✓ Working endpoint: {Url} ({StatusCode})",
apiUrl, result.StatusCode);
}
else
{
_logger.LogDebug("✗ Not working: {Url} - {Reason}",
apiUrl, result.ErrorReason ?? $"HTTP {result.StatusCode}");
}
ReportProgress(ScanPhase.TestingEndpoints,
$"Tested {testedCount}/{apisToTest.Count} endpoints ({workingCount} working)");
});
}
// ═══════════════════════════════════════════════════════════════════
// HELPERS
// ═══════════════════════════════════════════════════════════════════
private void ReportProgress(ScanPhase phase, string message)
{
_progressReporter?.Report(new ScanProgress(phase, message));
}
private static ScanResult CreateErrorResult(ScanContext context, string errorMessage)
{
var result = context.ToResult();
return result with { ErrorMessage = errorMessage };
}
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
if (_httpFetcher is IAsyncDisposable asyncDisposable)
await asyncDisposable.DisposeAsync();
else if (_httpFetcher is IDisposable disposable)
disposable.Dispose();
}
// ═══════════════════════════════════════════════════════════════════
// TIPOS AUXILIARES
// ═══════════════════════════════════════════════════════════════════
private record JsAnalysisResult(
string JsUrl,
bool Success,
IReadOnlyCollection<ExtractedApi> DiscoveredApis,
IReadOnlyCollection<DetectedToken> FoundTokens)
{
public static JsAnalysisResult Failed(string jsUrl) =>
new(jsUrl, false, Array.Empty<ExtractedApi>(), Array.Empty<DetectedToken>());
}
}
public enum ScanPhase
{
Starting,
FetchingMain,
AnalyzingHtml,
AnalyzingJavaScript,
TestingEndpoints,
Completed,
Cancelled,
Error
}
public record ScanProgress(ScanPhase Phase, string Message);
}
================================================================================
SECCIÓN 14: EXPORTADOR DE RESULTADOS
================================================================================
// ═══════════════════════════════════════════════════════════════════════════
// ARCHIVO: Services/FileResultExporter.cs
// ═══════════════════════════════════════════════════════════════════════════
using System;
using System.IO;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace APIHunter.Services
{
/// <summary>
/// Exportador de resultados con múltiples formatos y opciones de seguridad.
/// Soporta: TXT, JSON, CSV, HTML
/// Características de seguridad:
/// - Enmascaramiento opcional de tokens
/// - Backup automático de archivos existentes
/// - Validación de path traversal
/// </summary>
public sealed class FileResultExporter : IResultExporter
{
private readonly ILogger<FileResultExporter> _logger;
public FileResultExporter(ILogger<FileResultExporter> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<ExportResult> ExportAsync(ScanResult result, ExportOptions options)
{
ArgumentNullException.ThrowIfNull(result);
ArgumentNullException.ThrowIfNull(options);
try
{
// Validar y sanitizar path
var safePath = GetSafeFilePath(options.OutputPath);
if (safePath == null)
{
return new ExportResult
{
Success = false,
ErrorMessage = "Invalid output path"
};
}
// Crear backup si existe
if (options.CreateBackup && File.Exists(safePath))
{
await CreateBackupAsync(safePath);
}
// Exportar según formato
var content = options.Format switch
{
ExportFormat.Text => GenerateTextReport(result, options),
ExportFormat.Json => GenerateJsonReport(result, options),
ExportFormat.Csv => GenerateCsvReport(result, options),
ExportFormat.Html => GenerateHtmlReport(result, options),
_ => throw new ArgumentException($"Unsupported format: {options.Format}")
};
// Asegurar directorio
var directory = Path.GetDirectoryName(safePath);
if (!string.IsNullOrEmpty(directory) && !Directory.Exists(directory))
{
Directory.CreateDirectory(directory);
}
// Escribir archivo
await File.WriteAllTextAsync(safePath, content, Encoding.UTF8);
_logger.LogInformation("Results exported to {Path} ({Format})",
safePath, options.Format);
return new ExportResult
{
Success = true,
FilePath = Path.GetFullPath(safePath)
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to export results");
return new ExportResult
{
Success = false,
ErrorMessage = ex.Message
};
}
}
// ═══════════════════════════════════════════════════════════════════
// GENERADORES DE FORMATO
// ═══════════════════════════════════════════════════════════════════
private string GenerateTextReport(ScanResult result, ExportOptions options)
{
var sb = new StringBuilder();
sb.AppendLine("╔══════════════════════════════════════════════════════════════════╗");
sb.AppendLine("║ API-HUNTER v2.0 SCAN REPORT ║");
sb.AppendLine("╚══════════════════════════════════════════════════════════════════╝");
sb.AppendLine();
// Metadata
sb.AppendLine("═══ SCAN INFORMATION ═══");
sb.AppendLine($" Target URL: {result.TargetUrl}");
sb.AppendLine($" Scan Start: {result.StartTime:yyyy-MM-dd HH:mm:ss} UTC");
sb.AppendLine($" Scan End: {result.EndTime:yyyy-MM-dd HH:mm:ss} UTC");
sb.AppendLine($" Duration: {result.Duration.TotalSeconds:F1} seconds");
sb.AppendLine($" Scanner Version: {result.ScannerVersion}");
if (result.WasCancelled)
sb.AppendLine($" Status: CANCELLED");
if (!string.IsNullOrEmpty(result.ErrorMessage))
sb.AppendLine($" Error: {result.ErrorMessage}");
sb.AppendLine();
// Statistics
sb.AppendLine("═══ STATISTICS ═══");
sb.AppendLine($" Total APIs Discovered: {result.TotalApisDiscovered}");
sb.AppendLine($" Working Endpoints: {result.WorkingEndpointsCount}");
sb.AppendLine($" Tokens/Secrets Found: {result.TokensFoundCount}");
sb.AppendLine($" Pages Scanned: {result.PagesScanned}");
sb.AppendLine($" JavaScript Files Analyzed: {result.JsFilesAnalyzed}");
sb.AppendLine();
// Working Endpoints
if (result.WorkingEndpoints.Any())
{
sb.AppendLine("═══ WORKING ENDPOINTS ═══");
foreach (var endpoint in result.WorkingEndpoints)
{
sb.AppendLine($" ✓ {endpoint}");
}
sb.AppendLine();
}
// Tokens (con enmascaramiento opcional)
if (result.FoundTokens.Any())
{
sb.AppendLine("═══ DETECTED TOKENS/SECRETS ═══");
sb.AppendLine("

WARNING: Review these findings carefully");
sb.AppendLine();
foreach (var token in result.FoundTokens)
{
sb.AppendLine($" [{token.Classification.Type}] (Severity: {token.Classification.Severity})");
// Mostrar valor completo o enmascarado según configuración
var displayValue = options.IncludeTokenValues
? token.Value
: token.MaskedValue;
sb.AppendLine($" Value: {displayValue}");
sb.AppendLine($" Source: {token.Source}:{token.LineNumber}");
sb.AppendLine($" Info: {token.Classification.Description}");
sb.AppendLine();
}
}
// All Discovered APIs
sb.AppendLine("═══ ALL DISCOVERED APIs ═══");
var apiCount = 1;
foreach (var api in result.DiscoveredApis)
{
var status = result.WorkingEndpoints.Contains(api) ? "✓" : " ";
sb.AppendLine($" {apiCount,3}. [{status}] {api}");
apiCount++;
}
sb.AppendLine();
sb.AppendLine("═══════════════════════════════════════════════════════════════════");
sb.AppendLine($" Report generated: {DateTime.UtcNow:yyyy-MM-dd HH:mm:ss} UTC");
sb.AppendLine("═══════════════════════════════════════════════════════════════════");
return sb.ToString();
}
private string GenerateJsonReport(ScanResult result, ExportOptions options)
{
// Crear objeto anónimo para controlar qué se serializa
var report = new
{
metadata = new
{
scannerVersion = result.ScannerVersion,
targetUrl = result.TargetUrl,
startTime = result.StartTime,
endTime = result.EndTime,
durationSeconds = result.Duration.TotalSeconds,
wasCancelled = result.WasCancelled,
errorMessage = result.ErrorMessage
},
statistics = new
{
totalApisDiscovered = result.TotalApisDiscovered,
workingEndpoints = result.WorkingEndpointsCount,
tokensFound = result.TokensFoundCount,
pagesScanned = result.PagesScanned,
jsFilesAnalyzed = result.JsFilesAnalyzed
},
workingEndpoints = result.WorkingEndpoints,
discoveredApis = result.DiscoveredApis,
tokens = result.FoundTokens.Select(t => new
{
type = t.Classification.Type,
severity = t.Classification.Severity.ToString(),
confidence = t.Classification.Confidence.ToString(),
value = options.IncludeTokenValues ? t.Value : t.MaskedValue,
source = t.Source,
lineNumber = t.LineNumber,
description = t.Classification.Description
})
};
return JsonSerializer.Serialize(report, new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
}
private string GenerateCsvReport(ScanResult result, ExportOptions options)
{
var sb = new StringBuilder();
// APIs CSV
sb.AppendLine("# DISCOVERED APIs");
sb.AppendLine("URL,IsWorking");
foreach (var api in result.DiscoveredApis)
{
var isWorking = result.WorkingEndpoints.Contains(api);
sb.AppendLine($"\"{EscapeCsv(api)}\",{isWorking}");
}
sb.AppendLine();
// Tokens CSV
sb.AppendLine("# DETECTED TOKENS");
sb.AppendLine("Type,Severity,Value,Source,LineNumber");
foreach (var token in result.FoundTokens)
{
var displayValue = options.IncludeTokenValues ? token.Value : token.MaskedValue;
sb.AppendLine($"\"{token.Classification.Type}\",\"{token.Classification.Severity}\",\"{EscapeCsv(displayValue)}\",\"{EscapeCsv(token.Source)}\",{token.LineNumber}");
}
return sb.ToString();
}
private string GenerateHtmlReport(ScanResult result, ExportOptions options)
{
var sb = new StringBuilder();
sb.AppendLine("<!DOCTYPE html>");
sb.AppendLine("<html lang=\"en\">");
sb.AppendLine("<head>");
sb.AppendLine(" <meta charset=\"UTF-8\">");
sb.AppendLine(" <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">");
sb.AppendLine(" <title>API-HUNTER Scan Report</title>");
sb.AppendLine(" <style>");
sb.AppendLine(" body { font-family: 'Segoe UI', Arial, sans-serif; margin: 40px; background:
#1e1e1e; color:
#d4d4d4; }");
sb.AppendLine(" h1, h2 { color:
#569cd6; }");
sb.AppendLine(" .stat-card { background:
#2d2d2d; padding: 20px; margin: 10px 0; border-radius: 8px; }");
sb.AppendLine(" .working { color:
#4ec9b0; }");
sb.AppendLine(" .token-critical { background:
#5a1d1d; border-left: 4px solid
#f44747; padding: 10px; margin: 10px 0; }");
sb.AppendLine(" .token-high { background:
#5a4a1d; border-left: 4px solid
#dcdcaa; padding: 10px; margin: 10px 0; }");
sb.AppendLine(" .token-medium { background:
#1d3a5a; border-left: 4px solid
#569cd6; padding: 10px; margin: 10px 0; }");
sb.AppendLine(" table { width: 100%; border-collapse: collapse; }");
sb.AppendLine(" th, td { padding: 10px; text-align: left; border-bottom: 1px solid #404040; }");
sb.AppendLine(" th { background:
#2d2d2d; }");
sb.AppendLine(" code { background:
#1e1e1e; padding: 2px 6px; border-radius: 4px; }");
sb.AppendLine(" </style>");
sb.AppendLine("</head>");
sb.AppendLine("<body>");
sb.AppendLine($"<h1>

API-HUNTER Scan Report</h1>");
sb.AppendLine($"<p>Target: <code>{System.Net.WebUtility.HtmlEncode(result.TargetUrl)}</code></p>");
sb.AppendLine($"<p>Scan Time: {result.StartTime:yyyy-MM-dd HH:mm:ss} UTC ({result.Duration.TotalSeconds:F1}s)</p>");
// Stats
sb.AppendLine("<div class=\"stat-card\">");
sb.AppendLine($" <strong>APIs Discovered:</strong> {result.TotalApisDiscovered} | ");
sb.AppendLine($" <strong class=\"working\">Working:</strong> {result.WorkingEndpointsCount} | ");
sb.AppendLine($" <strong>Tokens Found:</strong> {result.TokensFoundCount}");
sb.AppendLine("</div>");
// Tokens
if (result.FoundTokens.Any())
{
sb.AppendLine("<h2>

Detected Tokens/Secrets</h2>");
foreach (var token in result.FoundTokens)
{
var cssClass = token.Classification.Severity switch
{
TokenSeverity.Critical => "token-critical",
TokenSeverity.High => "token-high",
_ => "token-medium"
};
var displayValue = options.IncludeTokenValues ? token.Value : token.MaskedValue;
sb.AppendLine($"<div class=\"{cssClass}\">");
sb.AppendLine($" <strong>[{token.Classification.Type}]</strong> - Severity: {token.Classification.Severity}<br>");
sb.AppendLine($" <code>{System.Net.WebUtility.HtmlEncode(displayValue)}</code><br>");
sb.AppendLine($" <small>Source: {System.Net.WebUtility.HtmlEncode(token.Source)}:{token.LineNumber}</small>");
sb.AppendLine("</div>");
}
}
// APIs Table
sb.AppendLine("<h2>

Discovered APIs</h2>");
sb.AppendLine("<table>");
sb.AppendLine("<tr><th>#</th><th>Status</th><th>URL</th></tr>");
var count = 1;
foreach (var api in result.DiscoveredApis)
{
var status = result.WorkingEndpoints.Contains(api)
? "<span class=\"working\">✓ Working</span>"
: "○ Not tested";
sb.AppendLine($"<tr><td>{count++}</td><td>{status}</td><td><code>{System.Net.WebUtility.HtmlEncode(api)}</code></td></tr>");
}
sb.AppendLine("</table>");
sb.AppendLine($"<hr><p><small>Generated by API-HUNTER v{result.ScannerVersion} at {DateTime.UtcNow:yyyy-MM-dd HH:mm:ss} UTC</small></p>");
sb.AppendLine("</body></html>");
return sb.ToString();
}
// ═══════════════════════════════════════════════════════════════════
// HELPERS DE SEGURIDAD
// ═══════════════════════════════════════════════════════════════════
private string? GetSafeFilePath(string path)
{
if (string.IsNullOrWhiteSpace(path))
return null;
// Obtener solo el nombre del archivo si contiene path
var fileName = Path.GetFileName(path);
// Remover caracteres inválidos
foreach (var c in Path.GetInvalidFileNameChars())
{
fileName = fileName.Replace(c, '_');
}
// Prevenir archivos ocultos en Unix
if (fileName.StartsWith('.'))
fileName = "_" + fileName;
// Reconstruir path seguro
var directory = Path.GetDirectoryName(path);
if (string.IsNullOrEmpty(directory))
directory = Directory.GetCurrentDirectory();
return Path.Combine(directory, fileName);
}
private async Task CreateBackupAsync(string filePath)
{
var backupPath = $"{filePath}.{DateTime.Now:yyyyMMdd_HHmmss}.bak";
File.Copy(filePath, backupPath);
_logger.LogInformation("Created backup: {BackupPath}", backupPath);
}
private static string EscapeCsv(string value)
{
if (string.IsNullOrEmpty(value)) return "";
return value.Replace("\"", "\"\"");
}
}
}