Spaces:

MHamdan
/

SPARKNET

Sleeping

App Files Files Community

SPARKNET / src /document /validation /verifier.py

MHamdan

Initial commit: SPARKNET framework

d520909 about 1 month ago

raw

history blame contribute delete

13.3 kB

	"""
	Evidence Verifier

	Verifies that claims are supported by document evidence.
	Cross-references extracted information with source documents.
	"""

	from typing import List, Optional, Dict, Any, Tuple
	from enum import Enum
	from pydantic import BaseModel, Field
	from loguru import logger
	import re


	class EvidenceStrength(str, Enum):
	"""Evidence strength levels."""
	STRONG = "strong" # Directly quoted/stated
	MODERATE = "moderate" # Implied or paraphrased
	WEAK = "weak" # Tangentially related
	NONE = "none" # No supporting evidence


	class VerifierConfig(BaseModel):
	"""Configuration for evidence verifier."""
	# Matching settings
	fuzzy_match: bool = Field(default=True, description="Enable fuzzy matching")
	case_sensitive: bool = Field(default=False, description="Case-sensitive matching")
	min_match_ratio: float = Field(
	default=0.6,
	ge=0.0,
	le=1.0,
	description="Minimum match ratio for fuzzy matching"
	)

	# Scoring
	strong_threshold: float = Field(default=0.9, ge=0.0, le=1.0)
	moderate_threshold: float = Field(default=0.7, ge=0.0, le=1.0)
	weak_threshold: float = Field(default=0.5, ge=0.0, le=1.0)

	# Processing
	max_evidence_per_claim: int = Field(default=5, ge=1)
	context_window: int = Field(default=100, description="Characters around match")


	class EvidenceMatch(BaseModel):
	"""A match between claim and evidence."""
	evidence_text: str
	match_score: float
	strength: EvidenceStrength

	# Location
	chunk_id: Optional[str] = None
	page: Optional[int] = None
	position: Optional[int] = None

	# Context
	context_before: Optional[str] = None
	context_after: Optional[str] = None


	class VerificationResult(BaseModel):
	"""Result of evidence verification."""
	claim: str
	verified: bool
	strength: EvidenceStrength
	confidence: float

	# Evidence
	evidence_matches: List[EvidenceMatch]
	best_match: Optional[EvidenceMatch] = None

	# Analysis
	coverage_score: float # How much of claim is covered
	contradiction_found: bool = False
	notes: Optional[str] = None


	class EvidenceVerifier:
	"""
	Verifies claims against document evidence.

	Features:
	- Text matching (exact and fuzzy)
	- Evidence strength scoring
	- Contradiction detection
	- Context extraction
	"""

	def __init__(self, config: Optional[VerifierConfig] = None):
	"""Initialize evidence verifier."""
	self.config = config or VerifierConfig()

	def verify_claim(
	self,
	claim: str,
	evidence_chunks: List[Dict[str, Any]],
	) -> VerificationResult:
	"""
	Verify a claim against evidence.

	Args:
	claim: The claim to verify
	evidence_chunks: List of evidence chunks with text

	Returns:
	VerificationResult
	"""
	if not claim or not evidence_chunks:
	return VerificationResult(
	claim=claim,
	verified=False,
	strength=EvidenceStrength.NONE,
	confidence=0.0,
	evidence_matches=[],
	coverage_score=0.0,
	)

	# Find matches in evidence
	matches = []
	for chunk in evidence_chunks:
	chunk_text = chunk.get("text", "")
	if not chunk_text:
	continue

	chunk_matches = self._find_matches(claim, chunk_text, chunk)
	matches.extend(chunk_matches)

	# Sort by score and take top matches
	matches.sort(key=lambda m: m.match_score, reverse=True)
	top_matches = matches[:self.config.max_evidence_per_claim]

	# Calculate overall scores
	if top_matches:
	best_match = top_matches[0]
	overall_strength = best_match.strength
	confidence = best_match.match_score
	coverage_score = self._calculate_coverage(claim, top_matches)
	else:
	best_match = None
	overall_strength = EvidenceStrength.NONE
	confidence = 0.0
	coverage_score = 0.0

	# Determine verification status
	verified = (
	overall_strength in [EvidenceStrength.STRONG, EvidenceStrength.MODERATE]
	and confidence >= self.config.moderate_threshold
	)

	# Check for contradictions
	contradiction_found = self._check_contradictions(claim, evidence_chunks)

	return VerificationResult(
	claim=claim,
	verified=verified and not contradiction_found,
	strength=overall_strength,
	confidence=confidence,
	evidence_matches=top_matches,
	best_match=best_match,
	coverage_score=coverage_score,
	contradiction_found=contradiction_found,
	)

	def verify_multiple(
	self,
	claims: List[str],
	evidence_chunks: List[Dict[str, Any]],
	) -> List[VerificationResult]:
	"""
	Verify multiple claims against evidence.

	Args:
	claims: List of claims to verify
	evidence_chunks: Evidence chunks

	Returns:
	List of VerificationResult
	"""
	return [self.verify_claim(claim, evidence_chunks) for claim in claims]

	def verify_extraction(
	self,
	extraction: Dict[str, Any],
	evidence_chunks: List[Dict[str, Any]],
	) -> Dict[str, VerificationResult]:
	"""
	Verify extracted fields as claims.

	Args:
	extraction: Dictionary of field -> value
	evidence_chunks: Evidence chunks

	Returns:
	Dictionary of field -> VerificationResult
	"""
	results = {}

	for field, value in extraction.items():
	if value is None:
	continue

	# Convert to claim
	claim = f"{field}: {value}"
	results[field] = self.verify_claim(claim, evidence_chunks)

	return results

	def _find_matches(
	self,
	claim: str,
	text: str,
	chunk: Dict[str, Any],
	) -> List[EvidenceMatch]:
	"""Find matches for claim in text."""
	matches = []

	# Normalize texts
	claim_normalized = claim.lower() if not self.config.case_sensitive else claim
	text_normalized = text.lower() if not self.config.case_sensitive else text

	# Extract key terms from claim
	terms = self._extract_terms(claim_normalized)

	# Try exact substring match
	if claim_normalized in text_normalized:
	pos = text_normalized.find(claim_normalized)
	match = self._create_match(
	text, pos, len(claim), chunk,
	score=1.0, strength=EvidenceStrength.STRONG
	)
	matches.append(match)

	# Try term matching
	term_scores = []
	for term in terms:
	if term in text_normalized:
	pos = text_normalized.find(term)
	term_scores.append((term, pos, 1.0))
	elif self.config.fuzzy_match:
	# Try fuzzy match
	fuzzy_score, fuzzy_pos = self._fuzzy_find(term, text_normalized)
	if fuzzy_score >= self.config.min_match_ratio:
	term_scores.append((term, fuzzy_pos, fuzzy_score))

	if term_scores:
	# Calculate combined score
	avg_score = sum(s[2] for s in term_scores) / len(terms) if terms else 0
	coverage = len(term_scores) / len(terms) if terms else 0
	combined_score = (avg_score * 0.7) + (coverage * 0.3)

	# Determine strength
	if combined_score >= self.config.strong_threshold:
	strength = EvidenceStrength.STRONG
	elif combined_score >= self.config.moderate_threshold:
	strength = EvidenceStrength.MODERATE
	elif combined_score >= self.config.weak_threshold:
	strength = EvidenceStrength.WEAK
	else:
	strength = EvidenceStrength.NONE

	# Create match at first term position
	if strength != EvidenceStrength.NONE:
	best_term = max(term_scores, key=lambda t: t[2])
	match = self._create_match(
	text, best_term[1], len(best_term[0]), chunk,
	score=combined_score, strength=strength
	)
	matches.append(match)

	return matches

	def _create_match(
	self,
	text: str,
	position: int,
	length: int,
	chunk: Dict[str, Any],
	score: float,
	strength: EvidenceStrength,
	) -> EvidenceMatch:
	"""Create an evidence match with context."""
	# Extract context
	window = self.config.context_window
	start = max(0, position - window)
	end = min(len(text), position + length + window)

	context_before = text[start:position] if position > 0 else ""
	evidence_text = text[position:position + length]
	context_after = text[position + length:end] if position + length < len(text) else ""

	return EvidenceMatch(
	evidence_text=evidence_text,
	match_score=score,
	strength=strength,
	chunk_id=chunk.get("chunk_id"),
	page=chunk.get("page"),
	position=position,
	context_before=context_before[-50:] if context_before else None,
	context_after=context_after[:50] if context_after else None,
	)

	def _extract_terms(self, text: str) -> List[str]:
	"""Extract key terms from text."""
	# Remove common stop words and punctuation
	stop_words = {
	"the", "a", "an", "is", "are", "was", "were", "be", "been",
	"being", "have", "has", "had", "do", "does", "did", "will",
	"would", "could", "should", "may", "might", "must", "shall",
	"can", "need", "dare", "ought", "used", "to", "of", "in",
	"for", "on", "with", "at", "by", "from", "as", "into", "through",
	"during", "before", "after", "above", "below", "between",
	"and", "but", "if", "or", "because", "until", "while",
	}

	# Tokenize
	words = re.findall(r'\b\w+\b', text.lower())

	# Filter
	terms = [w for w in words if w not in stop_words and len(w) > 2]

	return terms

	def _fuzzy_find(self, term: str, text: str) -> Tuple[float, int]:
	"""Find term in text with fuzzy matching."""
	# Simple sliding window match
	best_score = 0.0
	best_pos = 0

	term_len = len(term)
	for i in range(len(text) - term_len + 1):
	window = text[i:i + term_len]
	# Calculate simple match ratio
	matches = sum(1 for a, b in zip(term, window) if a == b)
	score = matches / term_len

	if score > best_score:
	best_score = score
	best_pos = i

	return best_score, best_pos

	def _calculate_coverage(
	self,
	claim: str,
	matches: List[EvidenceMatch],
	) -> float:
	"""Calculate how much of the claim is covered by evidence."""
	claim_terms = set(self._extract_terms(claim.lower()))
	if not claim_terms:
	return 0.0

	covered_terms = set()
	for match in matches:
	match_terms = set(self._extract_terms(match.evidence_text.lower()))
	covered_terms.update(match_terms.intersection(claim_terms))

	return len(covered_terms) / len(claim_terms)

	def _check_contradictions(
	self,
	claim: str,
	evidence_chunks: List[Dict[str, Any]],
	) -> bool:
	"""Check if evidence contains contradictions to the claim."""
	# Simple negation patterns
	negation_patterns = [
	r'\bnot\b', r'\bno\b', r'\bnever\b', r'\bnone\b',
	r'\bwithout\b', r'\bfailed\b', r'\bdenied\b',
	]

	claim_lower = claim.lower()
	claim_terms = set(self._extract_terms(claim_lower))

	for chunk in evidence_chunks:
	text = chunk.get("text", "").lower()

	# Check if evidence has claim terms with negation
	for term in claim_terms:
	if term in text:
	# Check for nearby negation
	for pattern in negation_patterns:
	matches = list(re.finditer(pattern, text))
	for match in matches:
	# Check if negation is near the term
	term_pos = text.find(term)
	if abs(match.start() - term_pos) < 30:
	return True

	return False


	# Global instance and factory
	_evidence_verifier: Optional[EvidenceVerifier] = None


	def get_evidence_verifier(
	config: Optional[VerifierConfig] = None,
	) -> EvidenceVerifier:
	"""Get or create singleton evidence verifier."""
	global _evidence_verifier
	if _evidence_verifier is None:
	_evidence_verifier = EvidenceVerifier(config)
	return _evidence_verifier


	def reset_evidence_verifier():
	"""Reset the global verifier instance."""
	global _evidence_verifier
	_evidence_verifier = None