diff --git a/docs/setupcfg.md b/docs/setupcfg.md new file mode 100644 index 00000000..4d1b26bd --- /dev/null +++ b/docs/setupcfg.md @@ -0,0 +1,52 @@ +The following metadata fields can be extracted from a setup.cfg file. +These fields are defined in the [setuptools declarative configuration specification](https://setuptools.pypa.io/en/latest/userguide/declarative_config.html), and are mapped according to the [CodeMeta crosswalk for Python Distutils](https://github.com/codemeta/codemeta/blob/master/crosswalks/Python%20Distutils%20(PyPI).csv). + +| Software metadata category | SOMEF metadata JSON path | SETUP.CFG metadata file field | +|--------------------------------|-----------------------------|----------------------------------------| +| author - value | author[i].result.value | metadata.author | +| author - email | author[i].result.email | metadata.author_email | +| author - name | author[i].result.name | metadata.author | +| code_repository | code_repository[i].result.value | project_urls (source, repository, code) | +| description | description[i].result.value | metadata.description | +| documentation | documentation[i].result.value | project_urls (Documentation, docs) | +| license - value | license[i].result.value | metadata.license or metadata.license_files | +| license - name | license[i].result.name | metadata.license *(1)* | +| license - spdx id | license[i].result.spdx_id | metadata.license if "spdx.org/licenses/" *(1)* | +| has_package_file | has_package_file[i].result.value | URL of the setup.cfg file | +| homepage | homepage[i].result.value | metadata.url or project_urls (Homepage) | +| keywords | keywords[i].result.value | metadata.keywords | +| package_id | package_id[i].result.value | metadata.name | +| requirements - value | requirements[i].result.value | options.install_requires or options.setup_requires *(2)* | +| requirements - name | requirements[i].result.name | options.install_requires or options.setup_requires -> name *(2)* | +| requirements - version | requirements[i].result.version | options.install_requires or options.setup_requires -> version *(2)* | +| runtime_platform - value | runtime_platform[i].result.value | options.python_requires -> "Python" + version *(3)* | +| runtime_platform - name | runtime_platform[i].result.name | options.python_requires -> "Python" *(3)* | +| runtime_platform - version | runtime_platform[i].result.version | options.python_requires *(3)* | +| version - value | version[i].result.value | metadata.version | +| version - tag | version[i].result.tag | metadata.version | + +--- + +*(1)* +- Look for the name and spdx_id in a local dictionary with all licenses. + +*(2)* +- Examples of requirements +``` +[options] +install_requires = + astropy + ctapipe >= 0.12 + h5py ~= 3.1.0 + +setup_requires = + setuptools >= 40.6.0 + wheel + +``` + +*(3)* +- Example: +``` +python_requires = >= 3.10.0 +``` diff --git a/docs/supported_languages.md b/docs/supported_languages.md index 9804da9e..96e8110d 100644 --- a/docs/supported_languages.md +++ b/docs/supported_languages.md @@ -12,7 +12,7 @@ To know more about the extraction details for each type of file, click on it. | JavaScript | [`package.json`](./packagejson.md), [`bower.json`](./bower.md) | | Julia | [`Project.toml`](./julia.md) | | PHP | [`composer.json`](./composer.md) | -| Python | [`setup.py`](./setuppy.md), [`pyproject.toml`](./pyprojecttoml.md), [`requirements.txt`](./requirementstxt.md) | +| Python | [`setup.py`](./setuppy.md), [`setup.cfg`](./setupcfg.md), [`pyproject.toml`](./pyprojecttoml.md), [`requirements.txt`](./requirementstxt.md) | | R | [`DESCRIPTION`](./description.md) | | Ruby | [`*.gemspec`](./gemspec.md) | | Rust | [`Cargo.toml`](./cargo.md) | diff --git a/docs/supported_metadata_files.md b/docs/supported_metadata_files.md index 7bdb9566..403e5803 100644 --- a/docs/supported_metadata_files.md +++ b/docs/supported_metadata_files.md @@ -21,6 +21,7 @@ SOMEF can extract metadata from a wide range of files commonly found in software | `pyproject.toml` | Python | Modern Python project configuration file used by tools like Poetry and Flit | [๐Ÿ”](./pyprojecttoml.md)| [๐Ÿ“„](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/)| [PEP 621](https://peps.python.org/pep-0621/)| [Example](https://github.com/KnowledgeCaptureAndDiscovery/somef/blob/master/pyproject.toml) | | `requirements.txt` | Python | Lists Python package dependencies | [๐Ÿ”](./requirementstxt.md)| [๐Ÿ“„](https://pip.pypa.io/en/stable/reference/requirements-file-format/)| [Latest](https://pip.pypa.io/en/stable/reference/requirements-file-format/)| [Example](https://github.com/oeg-upm/FAIR-Research-Object/blob/main/requirements.txt) | | `setup.py` | Python | Package file format used in python projects | [๐Ÿ”](./setuppy.md)| [๐Ÿ“„](https://setuptools.pypa.io/en/latest/references/keywords.html)| [v75.0.0](https://github.com/pypa/setuptools)| [Example](https://github.com/oeg-upm/soca/blob/main/setup.py) | +| `setup.cfg` | Python | Configuration file for setuptools used to define package metadata and options in a declarative way | [๐Ÿ”](./setupcfg.md)| [๐Ÿ“„](https://setuptools.pypa.io/en/latest/userguide/declarative_config.html) | [v75.0.0](https://github.com/pypa/setuptools)|[Example](https://github.com/oeg-upm/soca/blob/main/setup.cfg)| | `DESCRIPTION` | R | Metadata file for R packages including title, author, and version | [๐Ÿ”](./description.md) | [๐Ÿ“„](https://cran.r-project.org/doc/manuals/R-exts.html#The-DESCRIPTION-file)| [v4.4.1](https://cran.r-project.org/doc/manuals/r-release/R-exts.html) | [Example](https://github.com/cran/ggplot2/blob/master/DESCRIPTION) | | `*.gemspec` | Ruby | Manifest file serves as the package descriptor used in Ruby gem projects. | [๐Ÿ”](./gemspec.md)| [๐Ÿ“„](https://guides.rubygems.org/specification-reference/)| [v3.5.22](https://github.com/rubygems/rubygems)|[Example](https://github.com/rubygems/rubygems/blob/master/bundler/bundler.gemspec) | | `cargo.toml` | Rust | Manifest file serves as the package descriptor used in Rust projects | [๐Ÿ”](./cargo.md) | [๐Ÿ“„](https://doc.rust-lang.org/cargo/reference/manifest.html)| [v0.85.0](https://github.com/rust-lang/cargo) | [Example](https://github.com/rust-lang/cargo/blob/master/Cargo.toml) | diff --git a/src/somef/parser/setupcfg_parser.py b/src/somef/parser/setupcfg_parser.py new file mode 100644 index 00000000..a045ab17 --- /dev/null +++ b/src/somef/parser/setupcfg_parser.py @@ -0,0 +1,192 @@ +import re +import os +import logging +import configparser +from pathlib import Path +from ..process_results import Result +from ..utils import constants +from ..regular_expressions import detect_license_spdx, detect_spdx_from_declared + +def parse_setup_cfg(file_path, metadata_result: Result, source): + """ + Parser for setup.cfg files. Very similar to the one for pyproject.toml, but using configparser instead of toml library. + """ + + try: + metadata_result.add_result( + constants.CAT_HAS_PACKAGE_FILE, + {"value": source, "type": constants.URL}, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) + + config = configparser.ConfigParser() + config.read(file_path, encoding="utf-8") + + metadata = dict(config["metadata"]) if "metadata" in config else {} + options = dict(config["options"]) if "options" in config else {} + + if "name" in metadata: + metadata_result.add_result( + constants.CAT_PACKAGE_ID, + {"value": metadata["name"], "type": constants.STRING}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "version" in metadata: + version_value = metadata["version"] + if not version_value.startswith("attr:"): + metadata_result.add_result( + constants.CAT_VERSION, + {"value": version_value, "type": constants.RELEASE, "tag": version_value}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "description" in metadata: + metadata_result.add_result( + constants.CAT_DESCRIPTION, + {"value": metadata["description"], "type": constants.STRING}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "author" in metadata or "author_email" in metadata: + author_data = { + "name": metadata.get("author"), + "email": metadata.get("author_email"), + "type": constants.AGENT, + "value": metadata.get("author") + } + metadata_result.add_result( + constants.CAT_AUTHORS, author_data, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "license" in metadata: + license_value = metadata["license"] + license_info_spdx = detect_spdx_from_declared(license_value) + if not license_info_spdx: + license_info_spdx = detect_license_spdx(license_value, 'JSON') + if license_info_spdx: + license_data = { + "value": license_value, + "spdx_id": license_info_spdx.get('spdx_id'), + "name": license_info_spdx.get('name'), + "type": constants.LICENSE + } + else: + license_data = {"value": license_value, "type": constants.LICENSE} + + metadata_result.add_result( + constants.CAT_LICENSE, license_data, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "keywords" in metadata: + for kw in re.split(r'[,\n]', metadata["keywords"]): + kw = kw.strip() + if kw: + metadata_result.add_result( + constants.CAT_KEYWORDS, + {"value": kw, "type": constants.STRING}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "url" in metadata: + metadata_result.add_result( + constants.CAT_HOMEPAGE, + {"value": metadata["url"], "type": constants.URL}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "install_requires" in options: + for req in options["install_requires"].strip().splitlines(): + req = req.strip() + if req: + name, version = parse_dependency(req) + if name: + metadata_result.add_result( + constants.CAT_REQUIREMENTS, + { + "value": req, + "name": name, + "version": version, + "type": constants.SOFTWARE_DEPENDENCY, + "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, + "dependency_resolver": "python" + }, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "setup_requires" in options: + for req in options["setup_requires"].strip().splitlines(): + req = req.strip() + if req: + name, version = parse_dependency(req) + if name: + metadata_result.add_result( + constants.CAT_REQUIREMENTS, + { + "value": req, + "name": name, + "version": version, + "type": constants.SOFTWARE_DEPENDENCY, + "dependency_type": constants.DEPENDENCY_TYPE_DEVELOPMENT, + "dependency_resolver": "python" + }, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "python_requires" in options: + metadata_result.add_result( + constants.CAT_RUNTIME_PLATFORM, + { + "value": f"Python{options['python_requires']}", + "name": "Python", + "version": options["python_requires"], + "type": constants.STRING + }, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + if "project_urls" in metadata: + lines = metadata["project_urls"].split('\n') + for line in lines: + if '=' in line: + label, url_val = [part.strip() for part in line.split('=', 1)] + label_lower = label.lower() + + if label_lower in ["documentation", "docs", "doc"]: + metadata_result.add_result( + constants.CAT_DOCUMENTATION, + {"value": url_val, "type": constants.URL}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + elif label_lower in ["repository", "source", "code"]: + metadata_result.add_result( + constants.CAT_CODE_REPOSITORY, + {"value": url_val, "type": constants.URL}, + 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source + ) + + except Exception as e: + logging.error(f"Error parsing setup.cfg file {file_path}: {str(e)}") + + return metadata_result + +def parse_dependency(dependency_str): + """Parse a dependency string to extract name and version.""" + if not dependency_str: + return None, None + + parts = re.split(r'(>=|<=|==|!=|>|<|~=)', dependency_str, 1) + name = parts[0].strip() + if len(parts) > 1: + version = ''.join(parts[1:]) + else: + version = "" + + version = re.sub(r'[\[\]]', '', version) + + return name, version \ No newline at end of file diff --git a/src/somef/parser/toml_parser.py b/src/somef/parser/toml_parser.py index 64ad254d..b7ebfd46 100644 --- a/src/somef/parser/toml_parser.py +++ b/src/somef/parser/toml_parser.py @@ -118,7 +118,8 @@ def extract_common_version_field(data, metadata_result, source, file_type): For Project.toml: data["version"] """ version_value = None - + version_type = None + if file_type == "cargo" and "package" in data and "version" in data["package"]: version_value = data["package"]["version"] version_type = constants.RELEASE diff --git a/src/somef/process_files.py b/src/somef/process_files.py index 709b6392..2b80ad0f 100644 --- a/src/somef/process_files.py +++ b/src/somef/process_files.py @@ -25,6 +25,7 @@ from .parser.publiccode_parser import parse_publiccode_file from .parser.codeowners_parser import parse_codeowners_file from .parser.conda_environment_parser import parse_conda_environment_file +from .parser.setupcfg_parser import parse_setup_cfg from chardet import detect @@ -277,7 +278,8 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner (filename.lower() == "environment.yml" or filename.lower() == "environment.yaml") or \ (filename.lower() == ".zenodo.json") or \ (filename.lower() == "cargo.toml" and repo_relative_path == ".") or (filename.lower() == "composer.json" and repo_relative_path == ".") or \ - (filename == "Project.toml" or (filename.lower()== "publiccode.yml" or filename.lower()== "publiccode.yaml") and repo_relative_path == "."): + (filename == "Project.toml" or (filename.lower()== "publiccode.yml" or filename.lower()== "publiccode.yaml") and repo_relative_path == ".") or \ + filename.lower() == "setup.cfg": if filename.lower() in parsed_build_files and repo_relative_path != ".": logging.info(f"Ignoring secondary {filename} in {dir_path}") continue @@ -318,6 +320,8 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner metadata_result = parse_publiccode_file(os.path.join(dir_path, filename), metadata_result, build_file_url) if filename.lower() == "environment.yml" or filename.lower() == "environment.yaml": metadata_result = parse_conda_environment_file(os.path.join(dir_path, filename), metadata_result, build_file_url) + if filename.lower() == "setup.cfg": + metadata_result = parse_setup_cfg(os.path.join(dir_path, filename), metadata_result, build_file_url) # if filename.lower() == ".zenodo": # metadata_result = parse_zenodo_file(os.path.join(dir_path, filename), metadata_result, build_file_url) parsed_build_files.add(filename.lower()) diff --git a/src/somef/test/test_data/repositories/soca/README.md b/src/somef/test/test_data/repositories/soca/README.md new file mode 100644 index 00000000..d9716b91 --- /dev/null +++ b/src/somef/test/test_data/repositories/soca/README.md @@ -0,0 +1,301 @@ + +# Software Catalog Creator (soca) + +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10316197.svg)](https://doi.org/10.5281/zenodo.10316197) [![PyPI](https://badge.fury.io/py/soca.svg)](https://badge.fury.io/py/soca) [![Project Status: Active: The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) + +logo + +A python package that given an organization/user name, it will create a software catalog for browsing all repositories or just a single repository in a minimalist card. + +## Sample result + +Click [here](https://oeg-upm.github.io/soca/example/oeg-upm/index.html) to see an interactive example generated by using the `oeg-upm` organization as input for SOCA. + +Click [here](https://oeg-upm.github.io/soca/example/KnowledgeCaptureAndDiscoveryANDmintproject/index.html) to see an interactive example generated by using the `KnowledgeCaptureAndDiscovery` and `mintproject` organization as input for SOCA. + +Click [here](https://oeg-upm.github.io/soca/example/LinkedEarth/index.html) to see an interactive example generated by using the `LinkedEarth` organization as input for SOCA. + +Command used: + +```bash +soca fetch -i oeg-upm --org -o oeg-upm_repos -na +soca extract -i oeg-upm_repos -o oeg-upm_metadata -i4p +soca portal -i oeg-upm_metadata -o oeg-upm_portal +``` + +This is an example of a single card using the command: + +```bash +soca card -i https://github.com/oeg-upm/soca --png +``` + +soca-card + +## Requirements + +* Git +* Python 3.10 + +## Install from GitHub + +```bash +git clone https://github.com/oeg-upm/soca +cd soca +pip install -e . +``` +Highly recommended steps: + +```bash +somef configure +``` +Alternatively you may run the _installer.sh_ file which will also configure SOMEF, just edit it to it for your needs. + +And you will be asked to provide the following: + +* A GitHub authentication token [**optional, leave blank if not used**], which SOMEF uses to retrieve metadata from GitHub. If you don't include an authentication token, you can still use SOMEF. However, you may be limited to a series of requests per hour. For more information, see [https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) + +* The path to the trained classifiers (pickle files). If you have your own classifiers, you can provide them here. Otherwise, you can leave it blank + +### InfluxDB setup +For SOCA-Dash to work you will need to have a working version of influx 2.+ as well as grafana on your machine. SOCA-Dash needs two datasources and requires tokens to be able to access the influxDB datasources. +For more information please visit: https://docs.influxdata.com/influxdb/cloud/reference/cli/influx/auth/create/ + +To generate a token: +``` +influx auth create -o [organistation name] --all-access +``` + +SOCA-Dash requires influxQL datasource connection within grafana. +To ensure that influx 2.+ allows influxQL queries execute the following: +``` +influx v1 dbrp create --db [Bucket Name] -rp 0 --bucket-id [Bucket-id] +``` +You also need to create a v1 authentication: +``` +influx v1 auth create \ + --read-bucket [Bucket-id] \ + --write-bucket [Bucket-id] \ + --username admin + ``` +Once the influx has been setup and token created please ensure that SOCA is using said token. Now is a good time to execute the SOCA configure command. Or edit the ./_installer.sh_ file to your needs and executing the script. + +## Install from DockerFile + +```bash +git clone https://github.com/oeg-upm/soca +cd soca +``` +SOCA comes with a _installer.sh_ file which will automatically run the SOCA and SOMEF configure commands. Please edit it in accordance to your needs. +The _installer.sh_ file is necessary for the docker installation process + +``` +docker compose up +``` +Docker compose up starts the grafana and the influxdb within their own container. It also creates its own network: "socaNet" +You may want to list the containers you have/running: +``` +docker ps -a +``` +If you wish to access the influx container to generate a token you will first need to enter the container: +``` +docker run exec -it [influx container id] /bin/bash +``` +This starts an bash shell for the container. Remember, the container must be running at the time of executing this command. + +Once within the container you will need to generate a influx token. The following command will generate a token, you may change the token flags to your needs. Once this command returns a token please copy this into the _installer.sh_ file "databaseToken" +For more information please visit: https://docs.influxdata.com/influxdb/cloud/reference/cli/influx/auth/create/ + +To generate a token: +``` +influx auth create -o [organistation name] --access-all +``` + +SOCA-Dash requires influxQL datasource connection within grafana. +To ensure that influx 2.+ allows influxQL queries execute the following: +``` +influx v1 dbrp create --db [Bucket Name] -rp 0 --bucket-id [Bucket-id] +``` +You also need to create a v1 authentication: +``` +influx v1 auth create \ + --read-bucket [Bucket-id] \ + --write-bucket [Bucket-id] \ + --username admin + ``` +Once the influx has been setup and token copied to _installer.sh_ you may feel free to exit the container. + +Now we need to build the SOCA container, please ensure you are within the github directory when executing this command: +Remember, container_run.sh will create a summary for the oeg-upm group, modify to your needs and desires. More information can be found within USAGE + +```bash +docker build -t [INSERT_NAME] . +``` +Once the container has been built you may execute the SOCA container by running the following: + +```bash +docker run -it --network [network influx is running on] [container name] +``` + +## SOCA-Dash +Once the grafana, influx and soca have been set up correctly you can create a grafana dashboard by importing SOCA-Dash.json. This will allow you to visualise the Summary being uploaded to the influxDB. + +You will require to have created 2 influxDB datasources, one for flux queries and another for influxQL. The following are two examples on how to do so. + +fluxDatasource +For the token use the one previously created. + +For the influxQL follow the example provided below. + +influxQL_Datasource + +Here you can see you must create custom headers. Key being "Authorization" and the key being the same token used for the flux datasource. + +influxQLDatasource_login + +For the login please use the login created during the influx v1 auth create. For the rest add your org_name and bucket name. If you have used the SOCA defaults you can just copy the image + +## Usage + +```text +Usage: soca [OPTIONS] COMMAND [ARGS]... + + SOCA (Software Catalog Creator) + + Automatically generates a searchable portal for every repository of an + organization/s or user/s, which is easy to host. + + Usage: + + =. (Configure) Create configuration file for database etc + 1. (fetch) Fetch all repos from the desired organization/s + 2. (extract) Extract all metadata for every repo + 3. (portal) Generate a searchable portal for all the retrieved data + 4. (summary) Create a summary from the portal information + +Options: + -h, --help Show this message and exit. + +Commands: + card Create a stand-alone card ready to be embedded in a website + configure This creates a ~/.soca/configure.ini file + extract Fetch and save metadata from introduced repos + portal Build a portal with a minimalist design + fetch Retrieve all organization/s or user/s repositories + summary Create a summary of good practices from portal card data +``` + +In order to use SOCA you will need to follow the next steps: + +### 1 - Fetch + +First thing to do is gather all repositories pointers that we want to use. We'll use the `fetch` command to ease this task. + +```text + -i, --input Organization or user name [required] + -o, --output Repository list output file [default: repos] + --org Extracting from a organization [default: True] + --user Extracting from a user [default: False] + -na, --not_archived Fetch only repos that are not archived + [default: False] + -nf, --not_forked Fetch only repos that are not forked [default: + False] + -nd, --not_disabled Fetch only repos that are not disabled + [default: False] + -h, --help Show this message and exit. +``` + +Is important to determine if the name belongs to a user or a organization by using the --user or --org flag, additionally you can specify an output path with the flag -o. + +Example: + +`soca fetch -i dakixr --user` +`soca fetch -i oeg-upm --org -o oeg-upm_repos --not_archived` + +This command also accepts a file as input (names separated by a new-line) for ingesting multiple names at a time. + +Example: +`soca fetch -i multiple-users.csv --user -o multiple-users_repos` +`soca fetch -i multiple-orgs.csv --org -o multiple-orgs_repos --not_archived` + +The output of this command is a csv file with all the repos of the selected users/orgs. +At this moment is a good time to clean this file (remove all repos that you don't want to use). +Note: you can add manually any other repository. + +### 2 - Extract + +Then we use the `extract` command to extract all the metadata required from each repository. If you want a more in-depth analysis on Python repositories use the flag -i4p or --inspect4py. + +```text + -i, --input Pointers to the repositories in csv format + [required] + -o, --output Dir where repositories metadata will be saved + -i4p, --inspect4py Use inspect4py to extract additional metadata from + Python repositories + -h, --help Show this message and exit. +``` + +Example: +`soca extract -i oeg-upm_repos -o oeg-upm_metadata` + +### 3 - Portal + +This is the last step in the pipeline. For building the portal we need to use the command `portal`, it will take as input the directory created by the command extract. + +```text + -i, --input + Dir repositories metadata in json format + [required] + -o, --output Dir where Software Catalog Portal will be + saved [default: portal] + -t, --title Portal's title [default: Software Catalog] + -fi, --favicon <path-icon.ico> Portal's favicon [default: img/soca- + logo.ico] + -h, --help Show this message and exit. +``` + +Example: +`soca portal -i oeg-upm_metadata -o dir_portal --title '[Portal's title]'` + +If everything worked fine now a new dir should have been created with all the assets and code to deploy this portal. + +### Summary +SOCA now allows to produce a summary json of a given cards_data.json created by the previous portal step. +User must decide whether or not to upload (default = false), or to create JSON file for output summary +For building the summary we need to use the command `summary` +``` + -i, --input <dir-json-metadata> + Dir repositories metadata in json format + [required] + -o, --output <path> Dir where Software Catalog Portal will be + saved [default: summary] + -U, --upload Will upload file to influxdb +``` +Example +`soca summary -i cards_data.json -o test '` + +### Create a stand-alone card + +SOCA also gives the option to create a single card in one of two different formats: + +* HTML +* PNG + +```text + -i, --input <url> Repository URL [required] + -o, --output <path> Output file where the html will be saved [default: + card] + --html Save card as html [default: True] + --png Save card as a png [default: False] + -h, --help Show this message and exit. +``` + +As input you will need a github repository url and use one of the flags: --html or --png. +Note: if no flag is used the default is html. + +Example: +`soca card -i https://github.com/oeg-upm/soca --html` +`soca card -i https://github.com/oeg-upm/soca --png` + +### Styling the portal + +In case you want to change the default style of the portal, SOCA decouples the .css files from the code-base. So in the resulting portal directory there will be two .css files are available for further tinkering and styling to everyone needs. diff --git a/src/somef/test/test_data/repositories/soca/codemeta.json b/src/somef/test/test_data/repositories/soca/codemeta.json new file mode 100644 index 00000000..befbce13 --- /dev/null +++ b/src/somef/test/test_data/repositories/soca/codemeta.json @@ -0,0 +1,75 @@ +{ + "@context": "https://w3id.org/codemeta/3.0", + "@type": [ + "SoftwareSourceCode", + "SoftwareApplication" + ], + "license": { + "name": "Apache License 2.0", + "url": "https://raw.githubusercontent.com/oeg-upm/soca/main/LICENSE", + "identifier": "https://spdx.org/licenses/Apache-2.0" + }, + "codeRepository": "https://github.com/oeg-upm/soca", + "issueTracker": "https://github.com/oeg-upm/soca/issues", + "dateCreated": "2021-09-01", + "dateModified": "2025-10-06", + "downloadUrl": "https://github.com/oeg-upm/soca/releases", + "name": "soca", + "logo": "https://raw.githubusercontent.com/oeg-upm/soca/main/doc/images/soca-logo.png", + "keywords": [ + "software", + "software-engineering", + "software-metadata" + ], + "programmingLanguage": [ + "Python", + "HTML", + "JavaScript", + "CSS" + ], + "softwareRequirements": [ + { + "name": "setuptools", + "@type": "SoftwareApplication", + "version": ">=42" + }, + { + "name": "wheel", + "@type": "SoftwareApplication" + } + ], + "continuousIntegration": "https://raw.githubusercontent.com/oeg-upm/soca/main/.github/workflows/pypi-publish.yml", + "releaseNotes": "This release is a minor update to fix issues and the integration with the dasboards. See an example here: (https://software.oeg.fi.upm.es).\r\n\r\nThis release contains the version of the tool that has been sent to the MSR 2024 conference", + "softwareVersion": "0.0.3", + "datePublished": "2023-03-30", + "buildInstructions": [ + "https://raw.githubusercontent.com/oeg-upm/soca/main/README.md" + ], + "author": [ + { + "@type": "Person", + "@id": "https://github.com/dgarijo", + "name": "Daniel Garijo" + }, + { + "@type": "Person", + "name": "Daniel Rodrรญguez Mariblanca" + }, + { + "@type": "Person", + "name": "Miguel Arroyo Mรกrquez" + }, + { + "@type": "Person", + "name": "Esteban Gonzalez" + } + ], + "developmentStatus": "active", + "identifier": [ + "https://doi.org/10.5281/zenodo.10316197" + ], + "readme": "https://raw.githubusercontent.com/oeg-upm/soca/main/README.md", + "description": [ + "Software Catalog Creator. A repository that given an organization URL, it will create a software catalog for browsing all repositories" + ] +} \ No newline at end of file diff --git a/src/somef/test/test_data/repositories/soca/pyproject.toml b/src/somef/test/test_data/repositories/soca/pyproject.toml new file mode 100644 index 00000000..b5a3c468 --- /dev/null +++ b/src/somef/test/test_data/repositories/soca/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/src/somef/test/test_data/repositories/soca/setup.cfg b/src/somef/test/test_data/repositories/soca/setup.cfg new file mode 100644 index 00000000..fed9aba0 --- /dev/null +++ b/src/somef/test/test_data/repositories/soca/setup.cfg @@ -0,0 +1,45 @@ +[metadata] +name = soca +version = attr: soca.__version__ +author = Daniel Rodrรญguez Mariblanca, Miguel Arroyo Mรกrquez, Esteban Gonzalez, Daniel Garijo +author_email = daniel.garijo@upm.es +description = A python package that given an organization/user name, it will create a software catalog for browsing all repositories or just a single repository in a minimalist card. +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/oeg-upm/soca +classifiers = + Programming Language :: Python :: 3 + Operating System :: OS Independent + +[options] +package_dir = + = src +packages = find: +include_package_data = True +python_requires = >= 3.10.0 +install_requires = + somef >= 0.9.5 + requests + bs4 + htmlmin + progressbar2 + mistune + Pygments + html2image + inspect4py + click + influxdb-client + configparser + DateTime + pathlib + nltk + configparser + influxdb_client + validators + +[options.packages.find] +where = src + +[options.entry_points] +console_scripts = + soca = soca.__main__:cli \ No newline at end of file diff --git a/src/somef/test/test_setupcfg_parser.py b/src/somef/test/test_setupcfg_parser.py new file mode 100644 index 00000000..142fd2ce --- /dev/null +++ b/src/somef/test/test_setupcfg_parser.py @@ -0,0 +1,53 @@ +import unittest +import os +from pathlib import Path +from somef.process_results import Result +from somef.parser.setupcfg_parser import parse_setup_cfg +from ..utils import constants + +test_data_path = str(Path(__file__).parent / "test_data") + os.path.sep +test_data_repositories = str(Path(__file__).parent / "test_data" / "repositories") + os.path.sep + +class TestSetupCfgParser(unittest.TestCase): + + def test_issue_988(self): + """ + Checks that setup.cfg files are parsed correctly. + """ + setupcfg_file_path = test_data_repositories + os.path.sep + "soca" + os.path.sep + "setup.cfg" + result = Result() + + metadata_result = parse_setup_cfg(setupcfg_file_path, result, setupcfg_file_path) + # print(metadata_result.results) + package_results = metadata_result.results.get(constants.CAT_HAS_PACKAGE_FILE, []) + self.assertTrue(len(package_results) > 0, "No package file info found") + self.assertEqual(package_results[0]["result"]["value"], setupcfg_file_path) + self.assertEqual(package_results[0]["result"]["type"], constants.URL) + + id_results = metadata_result.results.get(constants.CAT_PACKAGE_ID, []) + self.assertTrue(len(id_results) > 0, "No package id found") + self.assertEqual(id_results[0]["result"]["value"], "soca") + self.assertEqual(id_results[0]["result"]["type"], constants.STRING) + + author_results = metadata_result.results.get(constants.CAT_AUTHORS, []) + self.assertTrue(len(author_results) > 0, "No author found") + self.assertEqual(author_results[0]["result"]["email"], "daniel.garijo@upm.es") + self.assertEqual(author_results[0]["result"]["type"], constants.AGENT) + + homepage_results = metadata_result.results.get(constants.CAT_HOMEPAGE, []) + self.assertTrue(len(homepage_results) > 0, "No homepage found") + self.assertEqual(homepage_results[0]["result"]["value"], "https://github.com/oeg-upm/soca") + self.assertEqual(homepage_results[0]["result"]["type"], constants.URL) + requirements_results = metadata_result.results.get(constants.CAT_REQUIREMENTS, []) + self.assertTrue(len(requirements_results) > 0, "No dependencies found") + found_dep = False + for req_result in requirements_results: + dependency = req_result["result"] + if dependency.get("name") == "somef" and dependency.get("dependency_type") == constants.DEPENDENCY_TYPE_RUNTIME: + found_dep = True + self.assertTrue(found_dep, "Dependency 'somef' not found") + + runtime_results = metadata_result.results.get(constants.CAT_RUNTIME_PLATFORM, []) + self.assertTrue(len(runtime_results) > 0, "No runtime platform found") + self.assertEqual(runtime_results[0]["result"]["name"], "Python") + self.assertEqual(runtime_results[0]["result"]["version"], ">= 3.10.0") \ No newline at end of file