<?php
namespace App\Service;
class ScholarService
{
private function cleanString(string $text): string
{
return html_entity_decode(trim(strip_tags($text)), ENT_QUOTES | ENT_HTML5, 'UTF-8');
}
public function fetchScholarData(string $user): array
{
$html = @file_get_contents('https://scholar.google.com/citations?user=' . urlencode($user) . '&hl=fr' . '&pagesize=100&view_op=list_works&sortby=pubdate');
if (!$html) {
return [];
}
if ($html !== false) {
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
}
// Extraire le nom et la photo
preg_match('/<div id="gsc_prf_in">(.*?)<\/div>/s', $html, $name);
preg_match('/<img id="gsc_prf_pup-img" src="(.*?)"/s', $html, $photo);
// Extraire les stats
preg_match_all('/<td class="gsc_rsb_std">(\d+)<\/td>/s', $html, $stats);
// --- Extraire les années ---
preg_match_all('/<span class="gsc_g_t"[^>]*>(\d+)<\/span>/', $html, $years);
// --- Extraire les valeurs ---
preg_match_all('/<span class="gsc_g_al">(\d+)<\/span>/', $html, $counts);
// Associer années et valeurs
$citationsPerYear = [];
$yearData = $years[1];
$countData = $counts[1];
if (count($yearData) === count($countData)) {
for ($i = 0; $i < count($yearData); $i++) {
$citationsPerYear[$yearData[$i]] = (int)$countData[$i];
}
}
// Publications (titres et autres infos)
$publications = [];
preg_match_all('/<tr class="gsc_a_tr">(.*?)<\/tr>/s', $html, $publicationRows);
foreach ($publicationRows[1] as $row) {
preg_match('/<a href="([^"]+)" class="gsc_a_at">(.*?)<\/a>/si', $row, $title);
preg_match_all('/<div class="gs_gray">(.*?)<\/div>/s', $row, $authorsVenue);
preg_match('/<span class="gsc_a_h gsc_a_hc gs_ibl">(.*?)<\/span>/s', $row, $year);
preg_match('/<a[^>]*class="gsc_a_ac[^"]*"[^>]*>(.*?)<\/a>/s', $row, $citation);
$publications[] = [
'titre' => isset($title[2]) ? $this->cleanString($title[2]) : '',
'link' => isset($title[1]) ? 'https://scholar.google.com' . html_entity_decode($title[1]) : '',
'authors' => isset($authorsVenue[1][0]) ? $this->cleanString($authorsVenue[1][0]) : '',
'venue' => isset($authorsVenue[1][1]) ? $this->cleanString($authorsVenue[1][1]) : '',
'year' => isset($year[1]) && is_numeric($year[1]) ? (int)$year[1] : 0,
'citations' => isset($citation[1]) ? (int)$citation[1] : 0
];
}
return [
'name' => $name[1] ?? 'Nom du Chercheur',
'photo_url' => isset($photo[1]) ? 'https://scholar.google.com' . $photo[1] : 'https://cdn-icons-png.flaticon.com/512/847/847969.png',
'total_citations' => $stats[1][0] ?? 0,
'h_index' => $stats[1][2] ?? 0,
'i10_index' => $stats[1][4] ?? 0,
'citations_per_year' => $citationsPerYear ?? [],
'publications' => $publications?? [],
];
}
}
?>