mirror of
https://github.com/ow-mods/ow-mod-db.git
synced 2025-12-11 20:15:24 +01:00
get thumbnails from html images (#1100)
This commit is contained in:
parent
53f7617eae
commit
a8392abd0f
2
.github/workflows/pr-from-issue.yml
vendored
2
.github/workflows/pr-from-issue.yml
vendored
@ -28,7 +28,7 @@ jobs:
|
||||
|
||||
- uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: 8.6.10
|
||||
version: 9.4.0
|
||||
run_install: true
|
||||
|
||||
- name: Build
|
||||
|
||||
2
.github/workflows/update-releases.yml
vendored
2
.github/workflows/update-releases.yml
vendored
@ -44,7 +44,7 @@ jobs:
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 8.6.10
|
||||
version: 9.4.0
|
||||
run_install: true
|
||||
|
||||
- name: Build
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
"lint": "eslint src"
|
||||
},
|
||||
"dependencies": {
|
||||
"@actions/core": "^1.10.1",
|
||||
"@actions/core": "^1.11.1",
|
||||
"@google-analytics/data": "^3.3.0",
|
||||
"@octokit/action": "^4.0.10",
|
||||
"@octokit/core": "^4.2.4",
|
||||
@ -21,15 +21,16 @@
|
||||
"@octokit/plugin-throttling": "^4.3.2",
|
||||
"@octokit/request-error": "^3.0.3",
|
||||
"@octokit/types": "^9.3.2",
|
||||
"@types/commonmark": "^0.27.9",
|
||||
"@types/node": "^18.19.34",
|
||||
"@types/commonmark": "^0.27.10",
|
||||
"@types/node": "^18.19.120",
|
||||
"@types/sharp": "^0.31.1",
|
||||
"@typescript-eslint/eslint-plugin": "^5.62.0",
|
||||
"@typescript-eslint/parser": "^5.62.0",
|
||||
"commonmark": "^0.30.0",
|
||||
"eslint": "^8.57.0",
|
||||
"eslint": "^8.57.1",
|
||||
"htmlparser2": "^10.0.0",
|
||||
"node-fetch": "^3.3.2",
|
||||
"sharp": "^0.33.4",
|
||||
"sharp": "^0.33.5",
|
||||
"typescript": "^4.9.5"
|
||||
},
|
||||
"packageManager": "pnpm@9.4.0+sha512.f549b8a52c9d2b8536762f99c0722205efc5af913e77835dbccc3b0b0b2ca9e7dc8022b78062c17291c48e88749c70ce88eb5a74f1fa8c4bf5e18bb46c8bd83a"
|
||||
|
||||
3258
scripts/pnpm-lock.yaml
generated
3258
scripts/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@ import path from "path";
|
||||
import fetch from "node-fetch";
|
||||
import { getReadmeMarkdown } from "./readmes.js";
|
||||
import { GITHUB_RAW_CONTENT_URL } from "../constants.js";
|
||||
import { Parser as HtmlParser } from "htmlparser2";
|
||||
|
||||
export const thumbnailSize = {
|
||||
width: 450,
|
||||
@ -111,6 +112,32 @@ function tryGetUrl(url: string): URL | null {
|
||||
}
|
||||
}
|
||||
|
||||
function extractImageUrlFromHtml(html: string): string | null {
|
||||
try {
|
||||
let imageUrl: string | null = null;
|
||||
|
||||
const parser = new HtmlParser(
|
||||
{
|
||||
onopentag(name, attribs) {
|
||||
if (name === "img" && !imageUrl) {
|
||||
imageUrl = attribs.src || null;
|
||||
parser.pause();
|
||||
}
|
||||
},
|
||||
},
|
||||
{ decodeEntities: true }
|
||||
);
|
||||
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
|
||||
return imageUrl;
|
||||
} catch (error) {
|
||||
console.error("Failed to parse HTML with DOMParser:", error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getFirstImageUrl(
|
||||
readmeUrl: string | undefined
|
||||
): Promise<string | null> {
|
||||
@ -126,22 +153,34 @@ export async function getFirstImageUrl(
|
||||
let event;
|
||||
while ((event = walker.next())) {
|
||||
const node = event.node;
|
||||
if (node.type !== "image" || !node.destination) continue;
|
||||
|
||||
const imageUrl = tryGetUrl(node.destination);
|
||||
let imageUrl: string | null = null;
|
||||
|
||||
if (node.type === "image" && node.destination) {
|
||||
imageUrl = node.destination;
|
||||
} else if (
|
||||
(node.type === "html_inline" || node.type === "html_block") &&
|
||||
node.literal
|
||||
) {
|
||||
imageUrl = extractImageUrlFromHtml(node.literal);
|
||||
}
|
||||
|
||||
if (!imageUrl) continue;
|
||||
|
||||
const parsedImageUrl = tryGetUrl(imageUrl);
|
||||
|
||||
if (
|
||||
!imageUrl?.pathname.endsWith(".svg") &&
|
||||
imageUrl?.host !== "img.shields.io"
|
||||
!parsedImageUrl?.pathname.endsWith(".svg") &&
|
||||
parsedImageUrl?.host !== "img.shields.io"
|
||||
) {
|
||||
const fullUrl = imageUrl
|
||||
const fullUrl = parsedImageUrl
|
||||
? // GitHub allows embedding images that actually point to webpages on github.com, so we have to replace the URLs here
|
||||
node.destination.replace(
|
||||
imageUrl.replace(
|
||||
/^https?:\/\/github.com\/(.+)\/(.+)\/blob\/(.+)\//gm,
|
||||
`${GITHUB_RAW_CONTENT_URL}/$1/$2/$3/`
|
||||
)
|
||||
: // For relative URLs we also have to resolve them
|
||||
`${baseUrl}/${node.destination}`;
|
||||
`${baseUrl}/${imageUrl}`;
|
||||
|
||||
return fullUrl;
|
||||
}
|
||||
@ -176,7 +215,7 @@ async function downloadImage(
|
||||
const fullImagePath = getPath(relativeImagePath);
|
||||
|
||||
const image = await response.arrayBuffer();
|
||||
await fsp.writeFile(fullImagePath, Buffer.from(image));
|
||||
await fsp.writeFile(fullImagePath, new Uint8Array(image));
|
||||
|
||||
console.log(`Downloaded image from ${imageUrl} to ${fullImagePath}`);
|
||||
return fullImagePath;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user