forked from LukeMeyer1/Model_Reuse_CLI
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUrl_Parser.py
More file actions
81 lines (64 loc) · 2.01 KB
/
Url_Parser.py
File metadata and controls
81 lines (64 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import argparse
from urllib.parse import urlparse
from pathlib import Path
import json
import time
from typing import Any, Dict
def parse_huggingface_url(url: str) -> dict:
"""
Parses a Hugging Face URL into components.
Example: https://huggingface.co/datasets/user/dataset_name
Returns a dictionary with useful parts.
"""
parsed = urlparse(url)
parts = parsed.path.strip("/").split("/")
return {
"scheme": parsed.scheme,
"domain": parsed.netloc,
"path_parts": parts
}
def categorize_url(url: str) -> str:
"""Return category for given URL (MODEL, DATASET, CODE)."""
if "huggingface.co/datasets" in url:
return "DATASET"
elif "huggingface.co" in url:
return "MODEL"
elif "github.com" in url:
return "CODE"
else:
return "UNKNOWN"
def process_urls(file_path: Path) -> int:
"""Process URLs from file and output NDJSON results for models."""
with file_path.open("r", encoding="utf-8") as f:
for line in f:
url = line.strip()
if not url:
continue
category = categorize_url(url)
name = url.split("/")[-1]
if category == "MODEL":
result = score_model(name)
print(json.dumps(result))
def score_model(name: str) -> Dict[str, Any]:
"""Generate placeholder scores for a model."""
start = time.time()
# Fake scoring values
scores = {
"ramp_up_time": 0.7,
"bus_factor": 0.8,
"performance_claims": 0.5,
"license": 1.0,
"dataset_and_code_score": 0.6,
"dataset_quality": 0.7,
"code_quality": 0.8,
}
def main():
parser = argparse.ArgumentParser(
description="Evaluate metrics for a Hugging Face model/dataset."
)
parser.add_argument("url", help="Hugging Face API URL")
args = parser.parse_args()
url_info = parse_huggingface_url(args.url)
print(f"Parsed URL info: {url_info}")
if __name__ == "__main__":
main()