src/Service/ScholarService.php line 19

Open in your IDE?
  1. <?php
  2. namespace App\Service;
  3. class ScholarService
  4. {
  5.     private function cleanString(string $text): string
  6.     {
  7.         return html_entity_decode(trim(strip_tags($text)), ENT_QUOTES ENT_HTML5'UTF-8');
  8.     }
  9.     public function fetchScholarData(string $user): array
  10.     {
  11.         $html = @file_get_contents('https://scholar.google.com/citations?user=' urlencode($user) . '&hl=fr' '&pagesize=100&view_op=list_works&sortby=pubdate');
  12.         if (!$html) {
  13.             return [];
  14.         }
  15.         if ($html !== false) {
  16.             $html mb_convert_encoding($html'HTML-ENTITIES''UTF-8');
  17.         }
  18.         // Extraire le nom et la photo
  19.         preg_match('/<div id="gsc_prf_in">(.*?)<\/div>/s'$html$name);
  20.         preg_match('/<img id="gsc_prf_pup-img" src="(.*?)"/s'$html$photo);
  21.         // Extraire les stats
  22.         preg_match_all('/<td class="gsc_rsb_std">(\d+)<\/td>/s'$html$stats);
  23.         // --- Extraire les années ---
  24.     preg_match_all('/<span class="gsc_g_t"[^>]*>(\d+)<\/span>/'$html$years);
  25.     
  26.     // --- Extraire les valeurs ---
  27.     preg_match_all('/<span class="gsc_g_al">(\d+)<\/span>/'$html$counts);
  28.     // Associer années et valeurs
  29.     $citationsPerYear = [];
  30.     $yearData $years[1];
  31.     $countData $counts[1];
  32.     if (count($yearData) === count($countData)) {
  33.         for ($i 0$i count($yearData); $i++) {
  34.             $citationsPerYear[$yearData[$i]] = (int)$countData[$i];
  35.         }
  36.     }
  37.         // Publications (titres et autres infos)
  38.         $publications = [];
  39.         preg_match_all('/<tr class="gsc_a_tr">(.*?)<\/tr>/s'$html$publicationRows);
  40.         foreach ($publicationRows[1] as $row) {
  41.             preg_match('/<a href="([^"]+)" class="gsc_a_at">(.*?)<\/a>/si'$row$title);
  42.             preg_match_all('/<div class="gs_gray">(.*?)<\/div>/s'$row$authorsVenue);
  43.             preg_match('/<span class="gsc_a_h gsc_a_hc gs_ibl">(.*?)<\/span>/s'$row$year);
  44.             preg_match('/<a[^>]*class="gsc_a_ac[^"]*"[^>]*>(.*?)<\/a>/s'$row$citation);
  45.             $publications[] = [
  46.                 'titre' => isset($title[2]) ? $this->cleanString($title[2]) : '',
  47.                 'link' => isset($title[1]) ? 'https://scholar.google.com' html_entity_decode($title[1]) : '',
  48.                 'authors' => isset($authorsVenue[1][0]) ? $this->cleanString($authorsVenue[1][0]) : '',
  49.                 'venue' => isset($authorsVenue[1][1]) ? $this->cleanString($authorsVenue[1][1]) : '',
  50.                 'year' => isset($year[1]) && is_numeric($year[1]) ? (int)$year[1] : 0,
  51.                 'citations' => isset($citation[1]) ? (int)$citation[1] : 0
  52.             ];
  53.         }
  54.         return [
  55.             'name' => $name[1] ?? 'Nom du Chercheur',
  56.             'photo_url' => isset($photo[1]) ? 'https://scholar.google.com' $photo[1] : 'https://cdn-icons-png.flaticon.com/512/847/847969.png',
  57.             'total_citations' => $stats[1][0] ?? 0,
  58.             'h_index' => $stats[1][2] ?? 0,
  59.             'i10_index' => $stats[1][4] ?? 0,
  60.             'citations_per_year' => $citationsPerYear ?? [],
  61.             'publications' => $publications?? [],
  62.         ];
  63.     }
  64. }
  65. ?>